diff options
Diffstat (limited to 'ext/js/language')
| -rw-r--r-- | ext/js/language/language-descriptors.js | 2 | ||||
| -rwxr-xr-x | ext/js/language/languages.js | 11 | ||||
| -rw-r--r-- | ext/js/language/text-scanner.js | 12 | ||||
| -rw-r--r-- | ext/js/language/text-utilities.js | 29 | 
4 files changed, 51 insertions, 3 deletions
| diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index beb1417e..d78a96e5 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -18,6 +18,7 @@  import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';  import {eszettPreprocessor} from './de/german-text-preprocessors.js';  import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {isStringPartiallyJapanese} from './ja/japanese.js';  import {removeLatinDiacritics} from './la/latin-text-preprocessors.js';  import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';  import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; @@ -114,6 +115,7 @@ const languageDescriptors = [          iso: 'ja',          name: 'Japanese',          exampleText: '読め', +        isTextLookupWorthy: isStringPartiallyJapanese,          textPreprocessors: {              convertHalfWidthCharacters,              convertNumericCharacters, diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index f964dfec..fd58477d 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -47,3 +47,14 @@ export function getAllLanguageTextPreprocessors() {      }      return results;  } + +/** + * @param {string} text + * @param {string} language + * @returns {boolean} + */ +export function isTextLookupWorthy(text, language) { +    const descriptor = languageDescriptorMap.get(language); +    if (typeof descriptor === 'undefined') { return false; } +    return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text); +} diff --git a/ext/js/language/text-scanner.js b/ext/js/language/text-scanner.js index 64333093..ad5ba12b 100644 --- a/ext/js/language/text-scanner.js +++ b/ext/js/language/text-scanner.js @@ -70,6 +70,8 @@ export class TextScanner extends EventDispatcher {          this._includeSelector = null;          /** @type {?string} */          this._excludeSelector = null; +        /** @type {?string} */ +        this._language = null;          /** @type {?import('text-scanner').InputInfo} */          this._inputInfoCurrent = null; @@ -188,6 +190,10 @@ export class TextScanner extends EventDispatcher {          this._excludeSelector = value;      } +    /** @type {?string} */ +    get language() { return this._language; } +    set language(value) { this._language = value; } +      /** */      prepare() {          this._isPrepared = true; @@ -449,7 +455,7 @@ export class TextScanner extends EventDispatcher {              const result = await this._findDictionaryEntries(textSource, searchTerms, searchKanji, optionsContext);              if (result !== null) {                  ({dictionaryEntries, sentence, type} = result); -            } else if (textSource !== null && textSource instanceof TextSourceElement && await this._hasJapanese(textSource.fullContent)) { +            } else if (textSource !== null && textSource instanceof TextSourceElement && await this._isTextLookupWorthy(textSource.fullContent)) {                  dictionaryEntries = [];                  sentence = {text: '', offset: 0};              } @@ -1549,9 +1555,9 @@ export class TextScanner extends EventDispatcher {       * @param {string} text       * @returns {Promise<boolean>}       */ -    async _hasJapanese(text) { +    async _isTextLookupWorthy(text) {          try { -            return await this._api.textHasJapaneseCharacters(text); +            return this._language !== null && await this._api.isTextLookupWorthy(text, this._language);          } catch (e) {              return false;          } diff --git a/ext/js/language/text-utilities.js b/ext/js/language/text-utilities.js new file mode 100644 index 00000000..ca8958aa --- /dev/null +++ b/ext/js/language/text-utilities.js @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {isStringPartiallyJapanese} from './ja/japanese.js'; + +/** + * Returns the language that the string might be by using some heuristic checks. + * Values returned are ISO codes. `null` is returned if no language can be determined. + * @param {string} text + * @returns {?string} + */ +export function getLanguageFromText(text) { +    if (isStringPartiallyJapanese(text)) { return 'ja'; } +    return null; +} |