aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-02-27 13:16:21 +0100
committerGitHub <noreply@github.com>2024-02-27 12:16:21 +0000
commit0792954e345925b3e3b2ebb733367e138375978e (patch)
treea9f169cf7dc2de396d6e46c9d8c418f9870d9301
parentc4fea2251606fb0b3d0d761a544dac578f3d9048 (diff)
make deinflections language-specific (#720)
* abstract deinflections * undo redundant changes * remove cast * MultiLanguageTransformer * comments * comments
-rw-r--r--.eslintrc.json1
-rw-r--r--benches/language-transformer.bench.js2
-rw-r--r--ext/js/background/backend.js13
-rw-r--r--ext/js/background/offscreen-proxy.js6
-rw-r--r--ext/js/background/offscreen.js4
-rw-r--r--ext/js/language/en/english-transforms.json39
-rw-r--r--ext/js/language/ja/japanese-transforms.json (renamed from ext/data/language/japanese-transforms.json)0
-rw-r--r--ext/js/language/language-descriptors.js6
-rw-r--r--ext/js/language/language-transformer.js50
-rwxr-xr-xext/js/language/languages.js4
-rw-r--r--ext/js/language/multi-language-transformer.js77
-rw-r--r--ext/js/language/translator.js34
-rw-r--r--test/data/json.json7
-rw-r--r--test/fixtures/translator-test.js4
-rw-r--r--test/language-transformer-cycles.test.js17
-rw-r--r--test/language-transformer.test.js2
-rw-r--r--types/ext/language-descriptors.d.ts1
-rw-r--r--types/ext/language-transformer-internal.d.ts8
-rw-r--r--types/ext/language.d.ts1
-rw-r--r--types/ext/offscreen.d.ts2
20 files changed, 215 insertions, 63 deletions
diff --git a/.eslintrc.json b/.eslintrc.json
index 63cc6b7e..86fa06e0 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -648,6 +648,7 @@
"ext/js/language/language-descriptors.js",
"ext/js/language/language-transformer.js",
"ext/js/language/languages.js",
+ "ext/js/language/multi-language-transformer.js",
"ext/js/language/ru/russian-text-preprocessors.js",
"ext/js/language/text-preprocessors.js",
"ext/js/language/translator.js",
diff --git a/benches/language-transformer.bench.js b/benches/language-transformer.bench.js
index 8212ae86..071d5627 100644
--- a/benches/language-transformer.bench.js
+++ b/benches/language-transformer.bench.js
@@ -25,7 +25,7 @@ import {LanguageTransformer} from '../ext/js/language/language-transformer.js';
const dirname = path.dirname(fileURLToPath(import.meta.url));
/** @type {import('language-transformer').LanguageTransformDescriptor} */
-const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'data/language/japanese-transforms.json'), {encoding: 'utf8'}));
+const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'js/language/ja/japanese-transforms.json'), {encoding: 'utf8'}));
const languageTransformer = new LanguageTransformer();
languageTransformer.addDescriptor(descriptor);
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js
index 6340d021..dbcbdd62 100644
--- a/ext/js/background/backend.js
+++ b/ext/js/background/backend.js
@@ -274,9 +274,16 @@ export class Backend {
log.error(e);
}
- /** @type {import('language-transformer').LanguageTransformDescriptor} */
- const descriptor = await fetchJson('/data/language/japanese-transforms.json');
- void this._translator.prepare(descriptor);
+ /** @type {import('language-transformer').LanguageTransformDescriptor[]} */
+ const descriptors = [];
+ const languageSummaries = getLanguageSummaries();
+ for (const {languageTransformsFile} of languageSummaries) {
+ if (!languageTransformsFile) { continue; }
+ /** @type {import('language-transformer').LanguageTransformDescriptor} */
+ const descriptor = await fetchJson(languageTransformsFile);
+ descriptors.push(descriptor);
+ }
+ void this._translator.prepare(descriptors);
await this._optionsUtil.prepare();
this._defaultAnkiFieldTemplates = (await fetchText('/data/templates/default-anki-field-templates.handlebars')).trim();
diff --git a/ext/js/background/offscreen-proxy.js b/ext/js/background/offscreen-proxy.js
index c888fe29..102a9eed 100644
--- a/ext/js/background/offscreen-proxy.js
+++ b/ext/js/background/offscreen-proxy.js
@@ -184,10 +184,10 @@ export class TranslatorProxy {
}
/**
- * @param {import('language-transformer').LanguageTransformDescriptor} descriptor
+ * @param {import('language-transformer').LanguageTransformDescriptor[]} descriptors
*/
- async prepare(descriptor) {
- await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptor}});
+ async prepare(descriptors) {
+ await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptors}});
}
/**
diff --git a/ext/js/background/offscreen.js b/ext/js/background/offscreen.js
index 3b8b6a3e..e38d4ba3 100644
--- a/ext/js/background/offscreen.js
+++ b/ext/js/background/offscreen.js
@@ -111,8 +111,8 @@ export class Offscreen {
}
/** @type {import('offscreen').ApiHandler<'translatorPrepareOffscreen'>} */
- _prepareTranslatorHandler({descriptor}) {
- this._translator.prepare(descriptor);
+ _prepareTranslatorHandler({descriptors}) {
+ this._translator.prepare(descriptors);
}
/** @type {import('offscreen').ApiHandler<'findKanjiOffscreen'>} */
diff --git a/ext/js/language/en/english-transforms.json b/ext/js/language/en/english-transforms.json
new file mode 100644
index 00000000..623a4ddf
--- /dev/null
+++ b/ext/js/language/en/english-transforms.json
@@ -0,0 +1,39 @@
+{
+ "language": "en",
+ "conditions": {
+ "v": {
+ "name": "Verb",
+ "isDictionaryForm": true
+ },
+ "n": {
+ "name": "Noun",
+ "isDictionaryForm": true,
+ "subConditions": ["np", "ns"]
+ },
+ "np": {
+ "name": "Noun plural",
+ "isDictionaryForm": true
+ },
+ "ns": {
+ "name": "Noun singular",
+ "isDictionaryForm": true
+ },
+ "adj": {
+ "name": "Adjective",
+ "isDictionaryForm": true
+ }
+ },
+ "transforms": [
+ {
+ "name": "plural",
+ "description": "Plural form of a noun",
+ "rules": [
+ {"suffixIn": "s", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "es", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "ies", "suffixOut": "y", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "ves", "suffixOut": "fe", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
+ {"suffixIn": "ves", "suffixOut": "f", "conditionsIn": ["np"], "conditionsOut": ["ns"]}
+ ]
+ }
+ ]
+}
diff --git a/ext/data/language/japanese-transforms.json b/ext/js/language/ja/japanese-transforms.json
index 0812f01e..0812f01e 100644
--- a/ext/data/language/japanese-transforms.json
+++ b/ext/js/language/ja/japanese-transforms.json
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index d78a96e5..6675f78e 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -57,7 +57,8 @@ const languageDescriptors = [
iso: 'en',
name: 'English',
exampleText: 'read',
- textPreprocessors: capitalizationPreprocessors
+ textPreprocessors: capitalizationPreprocessors,
+ languageTransformsFile: '/js/language/en/english-transforms.json'
},
{
iso: 'es',
@@ -122,7 +123,8 @@ const languageDescriptors = [
convertAlphabeticCharacters,
convertHiraganaToKatakana,
collapseEmphaticSequences
- }
+ },
+ languageTransformsFile: '/js/language/ja/japanese-transforms.json'
},
{
iso: 'km',
diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js
index 08a2166f..8a82e4d2 100644
--- a/ext/js/language/language-transformer.js
+++ b/ext/js/language/language-transformer.js
@@ -77,11 +77,11 @@ export class LanguageTransformer {
this._transforms.push(transform);
}
- for (const [type, condition] of conditionEntries) {
+ for (const [type, {isDictionaryForm}] of conditionEntries) {
const flags = conditionFlagsMap.get(type);
if (typeof flags === 'undefined') { continue; } // This case should never happen
this._conditionTypeToConditionFlagsMap.set(type, flags);
- if (condition.isDictionaryForm) {
+ if (isDictionaryForm) {
this._partOfSpeechToConditionFlagsMap.set(type, flags);
}
}
@@ -116,7 +116,7 @@ export class LanguageTransformer {
* @returns {import('language-transformer-internal').TransformedText[]}
*/
transform(sourceText) {
- const results = [this._createTransformedText(sourceText, 0, [])];
+ const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])];
for (let i = 0; i < results.length; ++i) {
const {text, conditions, trace} = results[i];
for (const transform of this._transforms) {
@@ -128,7 +128,7 @@ export class LanguageTransformer {
if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }
const {suffixIn, suffixOut} = rule;
if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; }
- results.push(this._createTransformedText(
+ results.push(LanguageTransformer.createTransformedText(
text.substring(0, text.length - suffixIn.length) + suffixOut,
rule.conditionsOut,
this._extendTrace(trace, {transform: name, ruleIndex: j})
@@ -140,6 +140,27 @@ export class LanguageTransformer {
}
/**
+ * @param {string} text
+ * @param {number} conditions
+ * @param {import('language-transformer-internal').Trace} trace
+ * @returns {import('language-transformer-internal').TransformedText}
+ */
+ static createTransformedText(text, conditions, trace) {
+ return {text, conditions, trace};
+ }
+
+ /**
+ * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
+ * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
+ * @param {number} currentConditions
+ * @param {number} nextConditions
+ * @returns {boolean}
+ */
+ static conditionsMatch(currentConditions, nextConditions) {
+ return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
+ }
+
+ /**
* @param {import('language-transformer').ConditionMapEntries} conditions
* @param {number} nextFlagIndex
* @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}}
@@ -218,16 +239,6 @@ export class LanguageTransformer {
}
/**
- * @param {string} text
- * @param {number} conditions
- * @param {import('language-transformer-internal').Trace} trace
- * @returns {import('language-transformer-internal').TransformedText}
- */
- _createTransformedText(text, conditions, trace) {
- return {text, conditions, trace};
- }
-
- /**
* @param {import('language-transformer-internal').Trace} trace
* @param {import('language-transformer-internal').TraceFrame} newFrame
* @returns {import('language-transformer-internal').Trace}
@@ -239,15 +250,4 @@ export class LanguageTransformer {
}
return newTrace;
}
-
- /**
- * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
- * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
- * @param {number} currentConditions
- * @param {number} nextConditions
- * @returns {boolean}
- */
- static conditionsMatch(currentConditions, nextConditions) {
- return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
- }
}
diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js
index fd58477d..63bbc791 100755
--- a/ext/js/language/languages.js
+++ b/ext/js/language/languages.js
@@ -22,8 +22,8 @@ import {languageDescriptorMap} from './language-descriptors.js';
*/
export function getLanguageSummaries() {
const results = [];
- for (const {name, iso, exampleText} of languageDescriptorMap.values()) {
- results.push({name, iso, exampleText});
+ for (const {name, iso, exampleText, languageTransformsFile} of languageDescriptorMap.values()) {
+ results.push({name, iso, exampleText, languageTransformsFile});
}
return results;
}
diff --git a/ext/js/language/multi-language-transformer.js b/ext/js/language/multi-language-transformer.js
new file mode 100644
index 00000000..692e1718
--- /dev/null
+++ b/ext/js/language/multi-language-transformer.js
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {LanguageTransformer} from './language-transformer.js';
+
+export class MultiLanguageTransformer {
+ constructor() {
+ /** @type {Map<string, LanguageTransformer>} */
+ this._languageTransformers = new Map();
+ }
+
+ /**
+ * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors
+ */
+ prepare(languageTransformDescriptors) {
+ for (const descriptor of languageTransformDescriptors) {
+ const languageTransformer = new LanguageTransformer();
+ languageTransformer.addDescriptor(descriptor);
+ this._languageTransformers.set(descriptor.language, languageTransformer);
+ }
+ }
+
+ /**
+ * @param {string} language
+ * @param {string[]} partsOfSpeech
+ * @returns {number}
+ */
+ getConditionFlagsFromPartsOfSpeech(language, partsOfSpeech) {
+ const languageTransformer = this._languageTransformers.get(language);
+ return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromPartsOfSpeech(partsOfSpeech) : 0;
+ }
+
+ /**
+ * @param {string} language
+ * @param {string[]} conditionTypes
+ * @returns {number}
+ */
+ getConditionFlagsFromConditionTypes(language, conditionTypes) {
+ const languageTransformer = this._languageTransformers.get(language);
+ return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionTypes(conditionTypes) : 0;
+ }
+
+ /**
+ * @param {string} language
+ * @param {string} conditionType
+ * @returns {number}
+ */
+ getConditionFlagsFromConditionType(language, conditionType) {
+ const languageTransformer = this._languageTransformers.get(language);
+ return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionType(conditionType) : 0;
+ }
+
+ /**
+ * @param {string} language
+ * @param {string} sourceText
+ * @returns {import('language-transformer-internal').TransformedText[]}
+ */
+ transform(language, sourceText) {
+ const languageTransformer = this._languageTransformers.get(language);
+ if (typeof languageTransformer === 'undefined') { return [LanguageTransformer.createTransformedText(sourceText, 0, [])]; }
+ return languageTransformer.transform(sourceText);
+ }
+}
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index b10af226..04319e60 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -21,6 +21,7 @@ import {TextSourceMap} from '../general/text-source-map.js';
import {isCodePointJapanese} from './ja/japanese.js';
import {LanguageTransformer} from './language-transformer.js';
import {getAllLanguageTextPreprocessors} from './languages.js';
+import {MultiLanguageTransformer} from './multi-language-transformer.js';
/**
* Class which finds term and kanji dictionary entries for text.
@@ -32,8 +33,8 @@ export class Translator {
constructor(database) {
/** @type {import('../dictionary/dictionary-database.js').DictionaryDatabase} */
this._database = database;
- /** @type {LanguageTransformer} */
- this._languageTransformer = new LanguageTransformer();
+ /** @type {MultiLanguageTransformer} */
+ this._multiLanguageTransformer = new MultiLanguageTransformer();
/** @type {import('translator').DictionaryTagCache} */
this._tagCache = new Map();
/** @type {Intl.Collator} */
@@ -46,10 +47,10 @@ export class Translator {
/**
* Initializes the instance for use. The public API should not be used until this function has been called.
- * @param {import('language-transformer').LanguageTransformDescriptor} descriptor
+ * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors
*/
- prepare(descriptor) {
- this._languageTransformer.addDescriptor(descriptor);
+ prepare(languageTransformDescriptors) {
+ this._multiLanguageTransformer.prepare(languageTransformDescriptors);
for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) {
/** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
const optionSpace = new Map();
@@ -316,11 +317,11 @@ export class Translator {
);
if (deinflections.length === 0) { return []; }
- const {matchType} = options;
+ const {matchType, language} = options;
- await this._addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType);
+ await this._addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType);
- const dictionaryDeinflections = await this._getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType);
+ const dictionaryDeinflections = await this._getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType);
deinflections.push(...dictionaryDeinflections);
for (const deinflection of deinflections) {
@@ -335,12 +336,13 @@ export class Translator {
}
/**
+ * @param {string} language
* @param {import('translation-internal').DatabaseDeinflection[]} deinflections
* @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap
* @param {import('dictionary').TermSourceMatchType} matchType
* @returns {Promise<import('translation-internal').DatabaseDeinflection[]>}
*/
- async _getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType) {
+ async _getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType) {
/** @type {import('translation-internal').DatabaseDeinflection[]} */
const dictionaryDeinflections = [];
for (const deinflection of deinflections) {
@@ -369,23 +371,24 @@ export class Translator {
}
}
- await this._addEntriesToDeinflections(dictionaryDeinflections, enabledDictionaryMap, matchType);
+ await this._addEntriesToDeinflections(language, dictionaryDeinflections, enabledDictionaryMap, matchType);
return dictionaryDeinflections;
}
/**
+ * @param {string} language
* @param {import('translation-internal').DatabaseDeinflection[]} deinflections
* @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap
* @param {import('dictionary').TermSourceMatchType} matchType
*/
- async _addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType) {
+ async _addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType) {
const uniqueDeinflectionsMap = this._groupDeinflectionsByTerm(deinflections);
const uniqueDeinflectionArrays = [...uniqueDeinflectionsMap.values()];
const uniqueDeinflectionTerms = [...uniqueDeinflectionsMap.keys()];
const databaseEntries = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, matchType);
- this._matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap);
+ this._matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap);
}
/**
@@ -407,16 +410,17 @@ export class Translator {
}
/**
+ * @param {string} language
* @param {import('dictionary-database').TermEntry[]} databaseEntries
* @param {import('translation-internal').DatabaseDeinflection[][]} uniqueDeinflectionArrays
* @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap
*/
- _matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) {
+ _matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) {
for (const databaseEntry of databaseEntries) {
const entryDictionary = /** @type {import('translation').FindTermDictionary} */ (enabledDictionaryMap.get(databaseEntry.dictionary));
const {partsOfSpeechFilter} = entryDictionary;
- const definitionConditions = this._languageTransformer.getConditionFlagsFromPartsOfSpeech(databaseEntry.rules);
+ const definitionConditions = this._multiLanguageTransformer.getConditionFlagsFromPartsOfSpeech(language, databaseEntry.rules);
for (const deinflection of uniqueDeinflectionArrays[databaseEntry.index]) {
if (!partsOfSpeechFilter || LanguageTransformer.conditionsMatch(deinflection.conditions, definitionConditions)) {
deinflection.databaseEntries.push(databaseEntry);
@@ -475,7 +479,7 @@ export class Translator {
if (used.has(source)) { break; }
used.add(source);
const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
- for (const {text: transformedText, conditions, trace} of this._languageTransformer.transform(source)) {
+ for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) {
/** @type {import('dictionary').InflectionRuleChainCandidate} */
const inflectionRuleChainCandidate = {
source: 'algorithm',
diff --git a/test/data/json.json b/test/data/json.json
index c7c1d3aa..16ca6f01 100644
--- a/test/data/json.json
+++ b/test/data/json.json
@@ -93,7 +93,12 @@
"type": "AjvSchema"
},
{
- "path": "ext/data/language/japanese-transforms.json",
+ "path": "ext/js/language/ja/japanese-transforms.json",
+ "typeFile": "types/ext/language-transformer.d.ts",
+ "type": "LanguageTransformDescriptor"
+ },
+ {
+ "path": "ext/js/language/en/english-transforms.json",
"typeFile": "types/ext/language-transformer.d.ts",
"type": "LanguageTransformDescriptor"
},
diff --git a/test/fixtures/translator-test.js b/test/fixtures/translator-test.js
index ff3782a7..6b6eaad8 100644
--- a/test/fixtures/translator-test.js
+++ b/test/fixtures/translator-test.js
@@ -31,7 +31,7 @@ import {DictionaryImporterMediaLoader} from '../mocks/dictionary-importer-media-
import {createDomTest} from './dom-test.js';
const extDir = join(dirname(fileURLToPath(import.meta.url)), '../../ext');
-const languageTransformDescriptorPath = join(extDir, 'data/language/japanese-transforms.json');
+const languageTransformDescriptorPath = join(extDir, 'js/language/ja/japanese-transforms.json');
vi.stubGlobal('indexedDB', indexedDB);
vi.stubGlobal('IDBKeyRange', IDBKeyRange);
@@ -65,7 +65,7 @@ export async function createTranslatorContext(dictionaryDirectory, dictionaryNam
const translator = new Translator(dictionaryDatabase);
/** @type {import('language-transformer').LanguageTransformDescriptor} */
const deinflectionReasons = parseJson(readFileSync(languageTransformDescriptorPath, {encoding: 'utf8'}));
- translator.prepare(deinflectionReasons);
+ translator.prepare([deinflectionReasons]);
return translator;
}
diff --git a/test/language-transformer-cycles.test.js b/test/language-transformer-cycles.test.js
index b778c188..751f65a6 100644
--- a/test/language-transformer-cycles.test.js
+++ b/test/language-transformer-cycles.test.js
@@ -21,6 +21,7 @@ import {fileURLToPath} from 'url';
import {describe, test} from 'vitest';
import {parseJson} from '../dev/json.js';
import {LanguageTransformer} from '../ext/js/language/language-transformer.js';
+import {getLanguageSummaries} from '../ext/js/language/languages.js';
class DeinflectionNode {
/**
@@ -103,12 +104,18 @@ function arraysAreEqual(rules1, rules2) {
return true;
}
-describe('Deinflection data', () => {
- test('Check for cycles', ({expect}) => {
- const dirname = pathDirname(fileURLToPath(import.meta.url));
+const dirname = pathDirname(fileURLToPath(import.meta.url));
+const descriptors = [];
+const languageSummaries = getLanguageSummaries();
+for (const {languageTransformsFile} of languageSummaries) {
+ if (!languageTransformsFile) { continue; }
+ /** @type {import('language-transformer').LanguageTransformDescriptor} */
+ const descriptor = parseJson(readFileSync(join(dirname, `../ext/${languageTransformsFile}`), {encoding: 'utf8'}));
+ descriptors.push(descriptor);
+}
- /** @type {import('language-transformer').LanguageTransformDescriptor} */
- const descriptor = parseJson(readFileSync(join(dirname, '../ext/data/language/japanese-transforms.json'), {encoding: 'utf8'}));
+describe.each(descriptors)('Cycles Test $language', (descriptor) => {
+ test('Check for cycles', ({expect}) => {
const languageTransformer = new LanguageTransformer();
languageTransformer.addDescriptor(descriptor);
diff --git a/test/language-transformer.test.js b/test/language-transformer.test.js
index 7c0da48b..b682f66b 100644
--- a/test/language-transformer.test.js
+++ b/test/language-transformer.test.js
@@ -1147,7 +1147,7 @@ describe('LanguageTransformer', () => {
/* eslint-enable @stylistic/no-multi-spaces */
/** @type {import('language-transformer').LanguageTransformDescriptor} */
- const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'data/language/japanese-transforms.json'), {encoding: 'utf8'}));
+ const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'js/language/ja/japanese-transforms.json'), {encoding: 'utf8'}));
const languageTransformer = new LanguageTransformer();
languageTransformer.addDescriptor(descriptor);
diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts
index ca457721..79668da4 100644
--- a/types/ext/language-descriptors.d.ts
+++ b/types/ext/language-descriptors.d.ts
@@ -32,6 +32,7 @@ type LanguageDescriptor<TIso extends string, TTextPreprocessorDescriptor extends
*/
isTextLookupWorthy?: IsTextLookupWorthyFunction;
textPreprocessors: TTextPreprocessorDescriptor;
+ languageTransformsFile?: string;
};
type TextPreprocessorDescriptor = {
diff --git a/types/ext/language-transformer-internal.d.ts b/types/ext/language-transformer-internal.d.ts
index c186eeca..fdabc0fb 100644
--- a/types/ext/language-transformer-internal.d.ts
+++ b/types/ext/language-transformer-internal.d.ts
@@ -40,3 +40,11 @@ export type TraceFrame = {
transform: string;
ruleIndex: number;
};
+
+export type ConditionTypeToConditionFlagsMap = Map<string, number>;
+
+export type LanguageTransformDescriptorInternal = {
+ transforms: Transform[];
+ conditionTypeToConditionFlagsMap: ConditionTypeToConditionFlagsMap;
+ partOfSpeechToConditionFlagsMap: ConditionTypeToConditionFlagsMap;
+};
diff --git a/types/ext/language.d.ts b/types/ext/language.d.ts
index 8e5a5c70..a50d3d4b 100644
--- a/types/ext/language.d.ts
+++ b/types/ext/language.d.ts
@@ -51,4 +51,5 @@ export type LanguageSummary = {
name: string;
iso: string;
exampleText: string;
+ languageTransformsFile?: string;
};
diff --git a/types/ext/offscreen.d.ts b/types/ext/offscreen.d.ts
index 443e0b64..777756b9 100644
--- a/types/ext/offscreen.d.ts
+++ b/types/ext/offscreen.d.ts
@@ -52,7 +52,7 @@ type ApiSurface = {
};
translatorPrepareOffscreen: {
params: {
- descriptor: LanguageTransformDescriptor;
+ descriptors: LanguageTransformDescriptor[];
};
return: void;
};