From 2e9ea19207a7410f929bb908759d48cb2340f29c Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 25 Feb 2024 11:20:44 -0500 Subject: "isJapanese" check move (#730) * Move isStringPartiallyJapanese out of ClipboardMonitor * Create isStringPartiallyJapanese function * Add textMayBeTranslatable * Rename API function * Rename internal function * Add helper * Update translatable check * Pass language to TextScanner * Pass language explicitly * Use textMayBeTranslatable * No redundant translatable check * Update eslint * Remove double newline * Collapse * Rename --- ext/js/language/language-descriptors.js | 2 ++ ext/js/language/languages.js | 11 +++++++++++ ext/js/language/text-scanner.js | 12 +++++++++--- ext/js/language/text-utilities.js | 29 +++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 ext/js/language/text-utilities.js (limited to 'ext/js/language') diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index beb1417e..d78a96e5 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -18,6 +18,7 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; import {eszettPreprocessor} from './de/german-text-preprocessors.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {isStringPartiallyJapanese} from './ja/japanese.js'; import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; @@ -114,6 +115,7 @@ const languageDescriptors = [ iso: 'ja', name: 'Japanese', exampleText: '読め', + isTextLookupWorthy: isStringPartiallyJapanese, textPreprocessors: { convertHalfWidthCharacters, convertNumericCharacters, diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index f964dfec..fd58477d 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -47,3 +47,14 @@ export function getAllLanguageTextPreprocessors() { } return results; } + +/** + * @param {string} text + * @param {string} language + * @returns {boolean} + */ +export function isTextLookupWorthy(text, language) { + const descriptor = languageDescriptorMap.get(language); + if (typeof descriptor === 'undefined') { return false; } + return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text); +} diff --git a/ext/js/language/text-scanner.js b/ext/js/language/text-scanner.js index 64333093..ad5ba12b 100644 --- a/ext/js/language/text-scanner.js +++ b/ext/js/language/text-scanner.js @@ -70,6 +70,8 @@ export class TextScanner extends EventDispatcher { this._includeSelector = null; /** @type {?string} */ this._excludeSelector = null; + /** @type {?string} */ + this._language = null; /** @type {?import('text-scanner').InputInfo} */ this._inputInfoCurrent = null; @@ -188,6 +190,10 @@ export class TextScanner extends EventDispatcher { this._excludeSelector = value; } + /** @type {?string} */ + get language() { return this._language; } + set language(value) { this._language = value; } + /** */ prepare() { this._isPrepared = true; @@ -449,7 +455,7 @@ export class TextScanner extends EventDispatcher { const result = await this._findDictionaryEntries(textSource, searchTerms, searchKanji, optionsContext); if (result !== null) { ({dictionaryEntries, sentence, type} = result); - } else if (textSource !== null && textSource instanceof TextSourceElement && await this._hasJapanese(textSource.fullContent)) { + } else if (textSource !== null && textSource instanceof TextSourceElement && await this._isTextLookupWorthy(textSource.fullContent)) { dictionaryEntries = []; sentence = {text: '', offset: 0}; } @@ -1549,9 +1555,9 @@ export class TextScanner extends EventDispatcher { * @param {string} text * @returns {Promise} */ - async _hasJapanese(text) { + async _isTextLookupWorthy(text) { try { - return await this._api.textHasJapaneseCharacters(text); + return this._language !== null && await this._api.isTextLookupWorthy(text, this._language); } catch (e) { return false; } diff --git a/ext/js/language/text-utilities.js b/ext/js/language/text-utilities.js new file mode 100644 index 00000000..ca8958aa --- /dev/null +++ b/ext/js/language/text-utilities.js @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {isStringPartiallyJapanese} from './ja/japanese.js'; + +/** + * Returns the language that the string might be by using some heuristic checks. + * Values returned are ISO codes. `null` is returned if no language can be determined. + * @param {string} text + * @returns {?string} + */ +export function getLanguageFromText(text) { + if (isStringPartiallyJapanese(text)) { return 'ja'; } + return null; +} -- cgit v1.2.3