summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/js/background/backend.js13
-rw-r--r--ext/js/background/offscreen-proxy.js6
-rw-r--r--ext/js/background/offscreen.js4
-rw-r--r--ext/js/language/en/english-transforms.json39
-rw-r--r--ext/js/language/ja/japanese-transforms.json (renamed from ext/data/language/japanese-transforms.json)0
-rw-r--r--ext/js/language/language-descriptors.js6
-rw-r--r--ext/js/language/language-transformer.js50
-rwxr-xr-xext/js/language/languages.js4
-rw-r--r--ext/js/language/multi-language-transformer.js77
-rw-r--r--ext/js/language/translator.js34
10 files changed, 181 insertions, 52 deletions
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js
index 6340d021..dbcbdd62 100644
--- a/ext/js/background/backend.js
+++ b/ext/js/background/backend.js
@@ -274,9 +274,16 @@ export class Backend {
log.error(e);
}
- /** @type {import('language-transformer').LanguageTransformDescriptor} */
- const descriptor = await fetchJson('/data/language/japanese-transforms.json');
- void this._translator.prepare(descriptor);
+ /** @type {import('language-transformer').LanguageTransformDescriptor[]} */
+ const descriptors = [];
+ const languageSummaries = getLanguageSummaries();
+ for (const {languageTransformsFile} of languageSummaries) {
+ if (!languageTransformsFile) { continue; }
+ /** @type {import('language-transformer').LanguageTransformDescriptor} */
+ const descriptor = await fetchJson(languageTransformsFile);
+ descriptors.push(descriptor);
+ }
+ void this._translator.prepare(descriptors);
await this._optionsUtil.prepare();
this._defaultAnkiFieldTemplates = (await fetchText('/data/templates/default-anki-field-templates.handlebars')).trim();
diff --git a/ext/js/background/offscreen-proxy.js b/ext/js/background/offscreen-proxy.js
index c888fe29..102a9eed 100644
--- a/ext/js/background/offscreen-proxy.js
+++ b/ext/js/background/offscreen-proxy.js
@@ -184,10 +184,10 @@ export class TranslatorProxy {
}
/**
- * @param {import('language-transformer').LanguageTransformDescriptor} descriptor
+ * @param {import('language-transformer').LanguageTransformDescriptor[]} descriptors
*/
- async prepare(descriptor) {
- await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptor}});
+ async prepare(descriptors) {
+ await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptors}});
}
/**
diff --git a/ext/js/background/offscreen.js b/ext/js/background/offscreen.js
index 3b8b6a3e..e38d4ba3 100644
--- a/ext/js/background/offscreen.js
+++ b/ext/js/background/offscreen.js
@@ -111,8 +111,8 @@ export class Offscreen {
}
/** @type {import('offscreen').ApiHandler<'translatorPrepareOffscreen'>} */
- _prepareTranslatorHandler({descriptor}) {
- this._translator.prepare(descriptor);
+ _prepareTranslatorHandler({descriptors}) {
+ this._translator.prepare(descriptors);
}
/** @type {import('offscreen').ApiHandler<'findKanjiOffscreen'>} */
diff --git a/ext/js/language/en/english-transforms.json b/ext/js/language/en/english-transforms.json
new file mode 100644
index 00000000..623a4ddf
--- /dev/null
+++ b/ext/js/language/en/english-transforms.json
@@ -0,0 +1,39 @@
+{
+ "language": "en",
+ "conditions": {
+ "v": {
+ "name": "Verb",
+ "isDictionaryForm": true
+ },
+ "n": {
+ "name": "Noun",
+ "isDictionaryForm": true,
+ "subConditions": ["np", "ns"]
+ },
+ "np": {
+ "name": "Noun plural",
+ "isDictionaryForm": true
+ },
+ "ns": {
+ "name": "Noun singular",
+ "isDictionaryForm": true
+ },
+ "adj": {
+ "name": "Adjective",
+ "isDictionaryForm": true
+ }
+ },
+ "transforms": [
+ {
+ "name": "plural",
+ "description": "Plural form of a noun",
+ "rules": [
+ {"suffixIn": "s", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "es", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "ies", "suffixOut": "y", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "ves", "suffixOut": "fe", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "ves", "suffixOut": "f", "conditionsIn": ["np"], "conditionsOut": ["ns"]}
+ ]
+ }
+ ]
+}
diff --git a/ext/data/language/japanese-transforms.json b/ext/js/language/ja/japanese-transforms.json
index 0812f01e..0812f01e 100644
--- a/ext/data/language/japanese-transforms.json
+++ b/ext/js/language/ja/japanese-transforms.json
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index d78a96e5..6675f78e 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -57,7 +57,8 @@ const languageDescriptors = [
iso: 'en',
name: 'English',
exampleText: 'read',
- textPreprocessors: capitalizationPreprocessors
+ textPreprocessors: capitalizationPreprocessors,
+ languageTransformsFile: '/js/language/en/english-transforms.json'
},
{
iso: 'es',
@@ -122,7 +123,8 @@ const languageDescriptors = [
convertAlphabeticCharacters,
convertHiraganaToKatakana,
collapseEmphaticSequences
- }
+ },
+ languageTransformsFile: '/js/language/ja/japanese-transforms.json'
},
{
iso: 'km',
diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js
index 08a2166f..8a82e4d2 100644
--- a/ext/js/language/language-transformer.js
+++ b/ext/js/language/language-transformer.js
@@ -77,11 +77,11 @@ export class LanguageTransformer {
this._transforms.push(transform);
}
- for (const [type, condition] of conditionEntries) {
+ for (const [type, {isDictionaryForm}] of conditionEntries) {
const flags = conditionFlagsMap.get(type);
if (typeof flags === 'undefined') { continue; } // This case should never happen
this._conditionTypeToConditionFlagsMap.set(type, flags);
- if (condition.isDictionaryForm) {
+ if (isDictionaryForm) {
this._partOfSpeechToConditionFlagsMap.set(type, flags);
}
}
@@ -116,7 +116,7 @@ export class LanguageTransformer {
* @returns {import('language-transformer-internal').TransformedText[]}
*/
transform(sourceText) {
- const results = [this._createTransformedText(sourceText, 0, [])];
+ const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])];
for (let i = 0; i < results.length; ++i) {
const {text, conditions, trace} = results[i];
for (const transform of this._transforms) {
@@ -128,7 +128,7 @@ export class LanguageTransformer {
if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }
const {suffixIn, suffixOut} = rule;
if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; }
- results.push(this._createTransformedText(
+ results.push(LanguageTransformer.createTransformedText(
text.substring(0, text.length - suffixIn.length) + suffixOut,
rule.conditionsOut,
this._extendTrace(trace, {transform: name, ruleIndex: j})
@@ -140,6 +140,27 @@ export class LanguageTransformer {
}
/**
+ * @param {string} text
+ * @param {number} conditions
+ * @param {import('language-transformer-internal').Trace} trace
+ * @returns {import('language-transformer-internal').TransformedText}
+ */
+ static createTransformedText(text, conditions, trace) {
+ return {text, conditions, trace};
+ }
+
+ /**
+ * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
+ * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
+ * @param {number} currentConditions
+ * @param {number} nextConditions
+ * @returns {boolean}
+ */
+ static conditionsMatch(currentConditions, nextConditions) {
+ return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
+ }
+
+ /**
* @param {import('language-transformer').ConditionMapEntries} conditions
* @param {number} nextFlagIndex
* @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}}
@@ -218,16 +239,6 @@ export class LanguageTransformer {
}
/**
- * @param {string} text
- * @param {number} conditions
- * @param {import('language-transformer-internal').Trace} trace
- * @returns {import('language-transformer-internal').TransformedText}
- */
- _createTransformedText(text, conditions, trace) {
- return {text, conditions, trace};
- }
-
- /**
* @param {import('language-transformer-internal').Trace} trace
* @param {import('language-transformer-internal').TraceFrame} newFrame
* @returns {import('language-transformer-internal').Trace}
@@ -239,15 +250,4 @@ export class LanguageTransformer {
}
return newTrace;
}
-
- /**
- * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
- * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
- * @param {number} currentConditions
- * @param {number} nextConditions
- * @returns {boolean}
- */
- static conditionsMatch(currentConditions, nextConditions) {
- return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
- }
}
diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js
index fd58477d..63bbc791 100755
--- a/ext/js/language/languages.js
+++ b/ext/js/language/languages.js
@@ -22,8 +22,8 @@ import {languageDescriptorMap} from './language-descriptors.js';
*/
export function getLanguageSummaries() {
const results = [];
- for (const {name, iso, exampleText} of languageDescriptorMap.values()) {
- results.push({name, iso, exampleText});
+ for (const {name, iso, exampleText, languageTransformsFile} of languageDescriptorMap.values()) {
+ results.push({name, iso, exampleText, languageTransformsFile});
}
return results;
}
diff --git a/ext/js/language/multi-language-transformer.js b/ext/js/language/multi-language-transformer.js
new file mode 100644
index 00000000..692e1718
--- /dev/null
+++ b/ext/js/language/multi-language-transformer.js
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {LanguageTransformer} from './language-transformer.js';
+
+export class MultiLanguageTransformer {
+ constructor() {
+ /** @type {Map<string, LanguageTransformer>} */
+ this._languageTransformers = new Map();
+ }
+
+ /**
+ * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors
+ */
+ prepare(languageTransformDescriptors) {
+ for (const descriptor of languageTransformDescriptors) {
+ const languageTransformer = new LanguageTransformer();
+ languageTransformer.addDescriptor(descriptor);
+ this._languageTransformers.set(descriptor.language, languageTransformer);
+ }
+ }
+
+ /**
+ * @param {string} language
+ * @param {string[]} partsOfSpeech
+ * @returns {number}
+ */
+ getConditionFlagsFromPartsOfSpeech(language, partsOfSpeech) {
+ const languageTransformer = this._languageTransformers.get(language);
+ return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromPartsOfSpeech(partsOfSpeech) : 0;
+ }
+
+ /**
+ * @param {string} language
+ * @param {string[]} conditionTypes
+ * @returns {number}
+ */
+ getConditionFlagsFromConditionTypes(language, conditionTypes) {
+ const languageTransformer = this._languageTransformers.get(language);
+ return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionTypes(conditionTypes) : 0;
+ }
+
+ /**
+ * @param {string} language
+ * @param {string} conditionType
+ * @returns {number}
+ */
+ getConditionFlagsFromConditionType(language, conditionType) {
+ const languageTransformer = this._languageTransformers.get(language);
+ return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionType(conditionType) : 0;
+ }
+
+ /**
+ * @param {string} language
+ * @param {string} sourceText
+ * @returns {import('language-transformer-internal').TransformedText[]}
+ */
+ transform(language, sourceText) {
+ const languageTransformer = this._languageTransformers.get(language);
+ if (typeof languageTransformer === 'undefined') { return [LanguageTransformer.createTransformedText(sourceText, 0, [])]; }
+ return languageTransformer.transform(sourceText);
+ }
+}
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index b10af226..04319e60 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -21,6 +21,7 @@ import {TextSourceMap} from '../general/text-source-map.js';
import {isCodePointJapanese} from './ja/japanese.js';
import {LanguageTransformer} from './language-transformer.js';
import {getAllLanguageTextPreprocessors} from './languages.js';
+import {MultiLanguageTransformer} from './multi-language-transformer.js';
/**
* Class which finds term and kanji dictionary entries for text.
@@ -32,8 +33,8 @@ export class Translator {
constructor(database) {
/** @type {import('../dictionary/dictionary-database.js').DictionaryDatabase} */
this._database = database;
- /** @type {LanguageTransformer} */
- this._languageTransformer = new LanguageTransformer();
+ /** @type {MultiLanguageTransformer} */
+ this._multiLanguageTransformer = new MultiLanguageTransformer();
/** @type {import('translator').DictionaryTagCache} */
this._tagCache = new Map();
/** @type {Intl.Collator} */
@@ -46,10 +47,10 @@ export class Translator {
/**
* Initializes the instance for use. The public API should not be used until this function has been called.
- * @param {import('language-transformer').LanguageTransformDescriptor} descriptor
+ * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors
*/
- prepare(descriptor) {
- this._languageTransformer.addDescriptor(descriptor);
+ prepare(languageTransformDescriptors) {
+ this._multiLanguageTransformer.prepare(languageTransformDescriptors);
for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) {
/** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
const optionSpace = new Map();
@@ -316,11 +317,11 @@ export class Translator {
);
if (deinflections.length === 0) { return []; }
- const {matchType} = options;
+ const {matchType, language} = options;
- await this._addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType);
+ await this._addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType);
- const dictionaryDeinflections = await this._getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType);
+ const dictionaryDeinflections = await this._getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType);
deinflections.push(...dictionaryDeinflections);
for (const deinflection of deinflections) {
@@ -335,12 +336,13 @@ export class Translator {
}
/**
+ * @param {string} language
* @param {import('translation-internal').DatabaseDeinflection[]} deinflections
* @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap
* @param {import('dictionary').TermSourceMatchType} matchType
* @returns {Promise<import('translation-internal').DatabaseDeinflection[]>}
*/
- async _getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType) {
+ async _getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType) {
/** @type {import('translation-internal').DatabaseDeinflection[]} */
const dictionaryDeinflections = [];
for (const deinflection of deinflections) {
@@ -369,23 +371,24 @@ export class Translator {
}
}
- await this._addEntriesToDeinflections(dictionaryDeinflections, enabledDictionaryMap, matchType);
+ await this._addEntriesToDeinflections(language, dictionaryDeinflections, enabledDictionaryMap, matchType);
return dictionaryDeinflections;
}
/**
+ * @param {string} language
* @param {import('translation-internal').DatabaseDeinflection[]} deinflections
* @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap
* @param {import('dictionary').TermSourceMatchType} matchType
*/
- async _addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType) {
+ async _addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType) {
const uniqueDeinflectionsMap = this._groupDeinflectionsByTerm(deinflections);
const uniqueDeinflectionArrays = [...uniqueDeinflectionsMap.values()];
const uniqueDeinflectionTerms = [...uniqueDeinflectionsMap.keys()];
const databaseEntries = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, matchType);
- this._matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap);
+ this._matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap);
}
/**
@@ -407,16 +410,17 @@ export class Translator {
}
/**
+ * @param {string} language
* @param {import('dictionary-database').TermEntry[]} databaseEntries
* @param {import('translation-internal').DatabaseDeinflection[][]} uniqueDeinflectionArrays
* @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap
*/
- _matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) {
+ _matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) {
for (const databaseEntry of databaseEntries) {
const entryDictionary = /** @type {import('translation').FindTermDictionary} */ (enabledDictionaryMap.get(databaseEntry.dictionary));
const {partsOfSpeechFilter} = entryDictionary;
- const definitionConditions = this._languageTransformer.getConditionFlagsFromPartsOfSpeech(databaseEntry.rules);
+ const definitionConditions = this._multiLanguageTransformer.getConditionFlagsFromPartsOfSpeech(language, databaseEntry.rules);
for (const deinflection of uniqueDeinflectionArrays[databaseEntry.index]) {
if (!partsOfSpeechFilter || LanguageTransformer.conditionsMatch(deinflection.conditions, definitionConditions)) {
deinflection.databaseEntries.push(databaseEntry);
@@ -475,7 +479,7 @@ export class Translator {
if (used.has(source)) { break; }
used.add(source);
const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
- for (const {text: transformedText, conditions, trace} of this._languageTransformer.transform(source)) {
+ for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) {
/** @type {import('dictionary').InflectionRuleChainCandidate} */
const inflectionRuleChainCandidate = {
source: 'algorithm',