diff options
Diffstat (limited to 'ext/js/language')
-rw-r--r-- | ext/js/language/language-descriptors.js | 2 | ||||
-rwxr-xr-x | ext/js/language/languages.js | 11 | ||||
-rw-r--r-- | ext/js/language/text-scanner.js | 12 | ||||
-rw-r--r-- | ext/js/language/text-utilities.js | 29 |
4 files changed, 51 insertions, 3 deletions
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index beb1417e..d78a96e5 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -18,6 +18,7 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; import {eszettPreprocessor} from './de/german-text-preprocessors.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {isStringPartiallyJapanese} from './ja/japanese.js'; import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; @@ -114,6 +115,7 @@ const languageDescriptors = [ iso: 'ja', name: 'Japanese', exampleText: '読め', + isTextLookupWorthy: isStringPartiallyJapanese, textPreprocessors: { convertHalfWidthCharacters, convertNumericCharacters, diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index f964dfec..fd58477d 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -47,3 +47,14 @@ export function getAllLanguageTextPreprocessors() { } return results; } + +/** + * @param {string} text + * @param {string} language + * @returns {boolean} + */ +export function isTextLookupWorthy(text, language) { + const descriptor = languageDescriptorMap.get(language); + if (typeof descriptor === 'undefined') { return false; } + return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text); +} diff --git a/ext/js/language/text-scanner.js b/ext/js/language/text-scanner.js index 64333093..ad5ba12b 100644 --- a/ext/js/language/text-scanner.js +++ b/ext/js/language/text-scanner.js @@ -70,6 +70,8 @@ export class TextScanner extends EventDispatcher { this._includeSelector = null; /** @type {?string} */ this._excludeSelector = null; + /** @type {?string} */ + this._language = null; /** @type {?import('text-scanner').InputInfo} */ this._inputInfoCurrent = null; @@ -188,6 +190,10 @@ export class TextScanner extends EventDispatcher { this._excludeSelector = value; } + /** @type {?string} */ + get language() { return this._language; } + set language(value) { this._language = value; } + /** */ prepare() { this._isPrepared = true; @@ -449,7 +455,7 @@ export class TextScanner extends EventDispatcher { const result = await this._findDictionaryEntries(textSource, searchTerms, searchKanji, optionsContext); if (result !== null) { ({dictionaryEntries, sentence, type} = result); - } else if (textSource !== null && textSource instanceof TextSourceElement && await this._hasJapanese(textSource.fullContent)) { + } else if (textSource !== null && textSource instanceof TextSourceElement && await this._isTextLookupWorthy(textSource.fullContent)) { dictionaryEntries = []; sentence = {text: '', offset: 0}; } @@ -1549,9 +1555,9 @@ export class TextScanner extends EventDispatcher { * @param {string} text * @returns {Promise<boolean>} */ - async _hasJapanese(text) { + async _isTextLookupWorthy(text) { try { - return await this._api.textHasJapaneseCharacters(text); + return this._language !== null && await this._api.isTextLookupWorthy(text, this._language); } catch (e) { return false; } diff --git a/ext/js/language/text-utilities.js b/ext/js/language/text-utilities.js new file mode 100644 index 00000000..ca8958aa --- /dev/null +++ b/ext/js/language/text-utilities.js @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +import {isStringPartiallyJapanese} from './ja/japanese.js'; + +/** + * Returns the language that the string might be by using some heuristic checks. + * Values returned are ISO codes. `null` is returned if no language can be determined. + * @param {string} text + * @returns {?string} + */ +export function getLanguageFromText(text) { + if (isStringPartiallyJapanese(text)) { return 'ja'; } + return null; +} |