summaryrefslogtreecommitdiff
path: root/ext/js/language
diff options
context:
space:
mode:
Diffstat (limited to 'ext/js/language')
-rw-r--r--ext/js/language/language-descriptors.js2
-rwxr-xr-xext/js/language/languages.js11
-rw-r--r--ext/js/language/text-scanner.js12
-rw-r--r--ext/js/language/text-utilities.js29
4 files changed, 51 insertions, 3 deletions
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index beb1417e..d78a96e5 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -18,6 +18,7 @@
import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {isStringPartiallyJapanese} from './ja/japanese.js';
import {removeLatinDiacritics} from './la/latin-text-preprocessors.js';
import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js';
@@ -114,6 +115,7 @@ const languageDescriptors = [
iso: 'ja',
name: 'Japanese',
exampleText: '読め',
+ isTextLookupWorthy: isStringPartiallyJapanese,
textPreprocessors: {
convertHalfWidthCharacters,
convertNumericCharacters,
diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js
index f964dfec..fd58477d 100755
--- a/ext/js/language/languages.js
+++ b/ext/js/language/languages.js
@@ -47,3 +47,14 @@ export function getAllLanguageTextPreprocessors() {
}
return results;
}
+
+/**
+ * @param {string} text
+ * @param {string} language
+ * @returns {boolean}
+ */
+export function isTextLookupWorthy(text, language) {
+ const descriptor = languageDescriptorMap.get(language);
+ if (typeof descriptor === 'undefined') { return false; }
+ return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text);
+}
diff --git a/ext/js/language/text-scanner.js b/ext/js/language/text-scanner.js
index 64333093..ad5ba12b 100644
--- a/ext/js/language/text-scanner.js
+++ b/ext/js/language/text-scanner.js
@@ -70,6 +70,8 @@ export class TextScanner extends EventDispatcher {
this._includeSelector = null;
/** @type {?string} */
this._excludeSelector = null;
+ /** @type {?string} */
+ this._language = null;
/** @type {?import('text-scanner').InputInfo} */
this._inputInfoCurrent = null;
@@ -188,6 +190,10 @@ export class TextScanner extends EventDispatcher {
this._excludeSelector = value;
}
+ /** @type {?string} */
+ get language() { return this._language; }
+ set language(value) { this._language = value; }
+
/** */
prepare() {
this._isPrepared = true;
@@ -449,7 +455,7 @@ export class TextScanner extends EventDispatcher {
const result = await this._findDictionaryEntries(textSource, searchTerms, searchKanji, optionsContext);
if (result !== null) {
({dictionaryEntries, sentence, type} = result);
- } else if (textSource !== null && textSource instanceof TextSourceElement && await this._hasJapanese(textSource.fullContent)) {
+ } else if (textSource !== null && textSource instanceof TextSourceElement && await this._isTextLookupWorthy(textSource.fullContent)) {
dictionaryEntries = [];
sentence = {text: '', offset: 0};
}
@@ -1549,9 +1555,9 @@ export class TextScanner extends EventDispatcher {
* @param {string} text
* @returns {Promise<boolean>}
*/
- async _hasJapanese(text) {
+ async _isTextLookupWorthy(text) {
try {
- return await this._api.textHasJapaneseCharacters(text);
+ return this._language !== null && await this._api.isTextLookupWorthy(text, this._language);
} catch (e) {
return false;
}
diff --git a/ext/js/language/text-utilities.js b/ext/js/language/text-utilities.js
new file mode 100644
index 00000000..ca8958aa
--- /dev/null
+++ b/ext/js/language/text-utilities.js
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {isStringPartiallyJapanese} from './ja/japanese.js';
+
+/**
+ * Returns the language that the string might be by using some heuristic checks.
+ * Values returned are ISO codes. `null` is returned if no language can be determined.
+ * @param {string} text
+ * @returns {?string}
+ */
+export function getLanguageFromText(text) {
+ if (isStringPartiallyJapanese(text)) { return 'ja'; }
+ return null;
+}