diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-02-27 13:16:21 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-27 12:16:21 +0000 |
commit | 0792954e345925b3e3b2ebb733367e138375978e (patch) | |
tree | a9f169cf7dc2de396d6e46c9d8c418f9870d9301 | |
parent | c4fea2251606fb0b3d0d761a544dac578f3d9048 (diff) |
make deinflections language-specific (#720)
* abstract deinflections
* undo redundant changes
* remove cast
* MultiLanguageTransformer
* comments
* comments
-rw-r--r-- | .eslintrc.json | 1 | ||||
-rw-r--r-- | benches/language-transformer.bench.js | 2 | ||||
-rw-r--r-- | ext/js/background/backend.js | 13 | ||||
-rw-r--r-- | ext/js/background/offscreen-proxy.js | 6 | ||||
-rw-r--r-- | ext/js/background/offscreen.js | 4 | ||||
-rw-r--r-- | ext/js/language/en/english-transforms.json | 39 | ||||
-rw-r--r-- | ext/js/language/ja/japanese-transforms.json (renamed from ext/data/language/japanese-transforms.json) | 0 | ||||
-rw-r--r-- | ext/js/language/language-descriptors.js | 6 | ||||
-rw-r--r-- | ext/js/language/language-transformer.js | 50 | ||||
-rwxr-xr-x | ext/js/language/languages.js | 4 | ||||
-rw-r--r-- | ext/js/language/multi-language-transformer.js | 77 | ||||
-rw-r--r-- | ext/js/language/translator.js | 34 | ||||
-rw-r--r-- | test/data/json.json | 7 | ||||
-rw-r--r-- | test/fixtures/translator-test.js | 4 | ||||
-rw-r--r-- | test/language-transformer-cycles.test.js | 17 | ||||
-rw-r--r-- | test/language-transformer.test.js | 2 | ||||
-rw-r--r-- | types/ext/language-descriptors.d.ts | 1 | ||||
-rw-r--r-- | types/ext/language-transformer-internal.d.ts | 8 | ||||
-rw-r--r-- | types/ext/language.d.ts | 1 | ||||
-rw-r--r-- | types/ext/offscreen.d.ts | 2 |
20 files changed, 215 insertions, 63 deletions
diff --git a/.eslintrc.json b/.eslintrc.json index 63cc6b7e..86fa06e0 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -648,6 +648,7 @@ "ext/js/language/language-descriptors.js", "ext/js/language/language-transformer.js", "ext/js/language/languages.js", + "ext/js/language/multi-language-transformer.js", "ext/js/language/ru/russian-text-preprocessors.js", "ext/js/language/text-preprocessors.js", "ext/js/language/translator.js", diff --git a/benches/language-transformer.bench.js b/benches/language-transformer.bench.js index 8212ae86..071d5627 100644 --- a/benches/language-transformer.bench.js +++ b/benches/language-transformer.bench.js @@ -25,7 +25,7 @@ import {LanguageTransformer} from '../ext/js/language/language-transformer.js'; const dirname = path.dirname(fileURLToPath(import.meta.url)); /** @type {import('language-transformer').LanguageTransformDescriptor} */ -const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'data/language/japanese-transforms.json'), {encoding: 'utf8'})); +const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'js/language/ja/japanese-transforms.json'), {encoding: 'utf8'})); const languageTransformer = new LanguageTransformer(); languageTransformer.addDescriptor(descriptor); diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 6340d021..dbcbdd62 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -274,9 +274,16 @@ export class Backend { log.error(e); } - /** @type {import('language-transformer').LanguageTransformDescriptor} */ - const descriptor = await fetchJson('/data/language/japanese-transforms.json'); - void this._translator.prepare(descriptor); + /** @type {import('language-transformer').LanguageTransformDescriptor[]} */ + const descriptors = []; + const languageSummaries = getLanguageSummaries(); + for (const {languageTransformsFile} of languageSummaries) { + if (!languageTransformsFile) { continue; } + /** @type {import('language-transformer').LanguageTransformDescriptor} */ + const descriptor = await fetchJson(languageTransformsFile); + descriptors.push(descriptor); + } + void this._translator.prepare(descriptors); await this._optionsUtil.prepare(); this._defaultAnkiFieldTemplates = (await fetchText('/data/templates/default-anki-field-templates.handlebars')).trim(); diff --git a/ext/js/background/offscreen-proxy.js b/ext/js/background/offscreen-proxy.js index c888fe29..102a9eed 100644 --- a/ext/js/background/offscreen-proxy.js +++ b/ext/js/background/offscreen-proxy.js @@ -184,10 +184,10 @@ export class TranslatorProxy { } /** - * @param {import('language-transformer').LanguageTransformDescriptor} descriptor + * @param {import('language-transformer').LanguageTransformDescriptor[]} descriptors */ - async prepare(descriptor) { - await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptor}}); + async prepare(descriptors) { + await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptors}}); } /** diff --git a/ext/js/background/offscreen.js b/ext/js/background/offscreen.js index 3b8b6a3e..e38d4ba3 100644 --- a/ext/js/background/offscreen.js +++ b/ext/js/background/offscreen.js @@ -111,8 +111,8 @@ export class Offscreen { } /** @type {import('offscreen').ApiHandler<'translatorPrepareOffscreen'>} */ - _prepareTranslatorHandler({descriptor}) { - this._translator.prepare(descriptor); + _prepareTranslatorHandler({descriptors}) { + this._translator.prepare(descriptors); } /** @type {import('offscreen').ApiHandler<'findKanjiOffscreen'>} */ diff --git a/ext/js/language/en/english-transforms.json b/ext/js/language/en/english-transforms.json new file mode 100644 index 00000000..623a4ddf --- /dev/null +++ b/ext/js/language/en/english-transforms.json @@ -0,0 +1,39 @@ +{ + "language": "en", + "conditions": { + "v": { + "name": "Verb", + "isDictionaryForm": true + }, + "n": { + "name": "Noun", + "isDictionaryForm": true, + "subConditions": ["np", "ns"] + }, + "np": { + "name": "Noun plural", + "isDictionaryForm": true + }, + "ns": { + "name": "Noun singular", + "isDictionaryForm": true + }, + "adj": { + "name": "Adjective", + "isDictionaryForm": true + } + }, + "transforms": [ + { + "name": "plural", + "description": "Plural form of a noun", + "rules": [ + {"suffixIn": "s", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "es", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "ies", "suffixOut": "y", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "ves", "suffixOut": "fe", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "ves", "suffixOut": "f", "conditionsIn": ["np"], "conditionsOut": ["ns"]} + ] + } + ] +} diff --git a/ext/data/language/japanese-transforms.json b/ext/js/language/ja/japanese-transforms.json index 0812f01e..0812f01e 100644 --- a/ext/data/language/japanese-transforms.json +++ b/ext/js/language/ja/japanese-transforms.json diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index d78a96e5..6675f78e 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -57,7 +57,8 @@ const languageDescriptors = [ iso: 'en', name: 'English', exampleText: 'read', - textPreprocessors: capitalizationPreprocessors + textPreprocessors: capitalizationPreprocessors, + languageTransformsFile: '/js/language/en/english-transforms.json' }, { iso: 'es', @@ -122,7 +123,8 @@ const languageDescriptors = [ convertAlphabeticCharacters, convertHiraganaToKatakana, collapseEmphaticSequences - } + }, + languageTransformsFile: '/js/language/ja/japanese-transforms.json' }, { iso: 'km', diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js index 08a2166f..8a82e4d2 100644 --- a/ext/js/language/language-transformer.js +++ b/ext/js/language/language-transformer.js @@ -77,11 +77,11 @@ export class LanguageTransformer { this._transforms.push(transform); } - for (const [type, condition] of conditionEntries) { + for (const [type, {isDictionaryForm}] of conditionEntries) { const flags = conditionFlagsMap.get(type); if (typeof flags === 'undefined') { continue; } // This case should never happen this._conditionTypeToConditionFlagsMap.set(type, flags); - if (condition.isDictionaryForm) { + if (isDictionaryForm) { this._partOfSpeechToConditionFlagsMap.set(type, flags); } } @@ -116,7 +116,7 @@ export class LanguageTransformer { * @returns {import('language-transformer-internal').TransformedText[]} */ transform(sourceText) { - const results = [this._createTransformedText(sourceText, 0, [])]; + const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])]; for (let i = 0; i < results.length; ++i) { const {text, conditions, trace} = results[i]; for (const transform of this._transforms) { @@ -128,7 +128,7 @@ export class LanguageTransformer { if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; } const {suffixIn, suffixOut} = rule; if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; } - results.push(this._createTransformedText( + results.push(LanguageTransformer.createTransformedText( text.substring(0, text.length - suffixIn.length) + suffixOut, rule.conditionsOut, this._extendTrace(trace, {transform: name, ruleIndex: j}) @@ -140,6 +140,27 @@ export class LanguageTransformer { } /** + * @param {string} text + * @param {number} conditions + * @param {import('language-transformer-internal').Trace} trace + * @returns {import('language-transformer-internal').TransformedText} + */ + static createTransformedText(text, conditions, trace) { + return {text, conditions, trace}; + } + + /** + * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. + * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. + * @param {number} currentConditions + * @param {number} nextConditions + * @returns {boolean} + */ + static conditionsMatch(currentConditions, nextConditions) { + return currentConditions === 0 || (currentConditions & nextConditions) !== 0; + } + + /** * @param {import('language-transformer').ConditionMapEntries} conditions * @param {number} nextFlagIndex * @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}} @@ -218,16 +239,6 @@ export class LanguageTransformer { } /** - * @param {string} text - * @param {number} conditions - * @param {import('language-transformer-internal').Trace} trace - * @returns {import('language-transformer-internal').TransformedText} - */ - _createTransformedText(text, conditions, trace) { - return {text, conditions, trace}; - } - - /** * @param {import('language-transformer-internal').Trace} trace * @param {import('language-transformer-internal').TraceFrame} newFrame * @returns {import('language-transformer-internal').Trace} @@ -239,15 +250,4 @@ export class LanguageTransformer { } return newTrace; } - - /** - * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. - * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. - * @param {number} currentConditions - * @param {number} nextConditions - * @returns {boolean} - */ - static conditionsMatch(currentConditions, nextConditions) { - return currentConditions === 0 || (currentConditions & nextConditions) !== 0; - } } diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index fd58477d..63bbc791 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -22,8 +22,8 @@ import {languageDescriptorMap} from './language-descriptors.js'; */ export function getLanguageSummaries() { const results = []; - for (const {name, iso, exampleText} of languageDescriptorMap.values()) { - results.push({name, iso, exampleText}); + for (const {name, iso, exampleText, languageTransformsFile} of languageDescriptorMap.values()) { + results.push({name, iso, exampleText, languageTransformsFile}); } return results; } diff --git a/ext/js/language/multi-language-transformer.js b/ext/js/language/multi-language-transformer.js new file mode 100644 index 00000000..692e1718 --- /dev/null +++ b/ext/js/language/multi-language-transformer.js @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +import {LanguageTransformer} from './language-transformer.js'; + +export class MultiLanguageTransformer { + constructor() { + /** @type {Map<string, LanguageTransformer>} */ + this._languageTransformers = new Map(); + } + + /** + * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors + */ + prepare(languageTransformDescriptors) { + for (const descriptor of languageTransformDescriptors) { + const languageTransformer = new LanguageTransformer(); + languageTransformer.addDescriptor(descriptor); + this._languageTransformers.set(descriptor.language, languageTransformer); + } + } + + /** + * @param {string} language + * @param {string[]} partsOfSpeech + * @returns {number} + */ + getConditionFlagsFromPartsOfSpeech(language, partsOfSpeech) { + const languageTransformer = this._languageTransformers.get(language); + return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromPartsOfSpeech(partsOfSpeech) : 0; + } + + /** + * @param {string} language + * @param {string[]} conditionTypes + * @returns {number} + */ + getConditionFlagsFromConditionTypes(language, conditionTypes) { + const languageTransformer = this._languageTransformers.get(language); + return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionTypes(conditionTypes) : 0; + } + + /** + * @param {string} language + * @param {string} conditionType + * @returns {number} + */ + getConditionFlagsFromConditionType(language, conditionType) { + const languageTransformer = this._languageTransformers.get(language); + return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionType(conditionType) : 0; + } + + /** + * @param {string} language + * @param {string} sourceText + * @returns {import('language-transformer-internal').TransformedText[]} + */ + transform(language, sourceText) { + const languageTransformer = this._languageTransformers.get(language); + if (typeof languageTransformer === 'undefined') { return [LanguageTransformer.createTransformedText(sourceText, 0, [])]; } + return languageTransformer.transform(sourceText); + } +} diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index b10af226..04319e60 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -21,6 +21,7 @@ import {TextSourceMap} from '../general/text-source-map.js'; import {isCodePointJapanese} from './ja/japanese.js'; import {LanguageTransformer} from './language-transformer.js'; import {getAllLanguageTextPreprocessors} from './languages.js'; +import {MultiLanguageTransformer} from './multi-language-transformer.js'; /** * Class which finds term and kanji dictionary entries for text. @@ -32,8 +33,8 @@ export class Translator { constructor(database) { /** @type {import('../dictionary/dictionary-database.js').DictionaryDatabase} */ this._database = database; - /** @type {LanguageTransformer} */ - this._languageTransformer = new LanguageTransformer(); + /** @type {MultiLanguageTransformer} */ + this._multiLanguageTransformer = new MultiLanguageTransformer(); /** @type {import('translator').DictionaryTagCache} */ this._tagCache = new Map(); /** @type {Intl.Collator} */ @@ -46,10 +47,10 @@ export class Translator { /** * Initializes the instance for use. The public API should not be used until this function has been called. - * @param {import('language-transformer').LanguageTransformDescriptor} descriptor + * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors */ - prepare(descriptor) { - this._languageTransformer.addDescriptor(descriptor); + prepare(languageTransformDescriptors) { + this._multiLanguageTransformer.prepare(languageTransformDescriptors); for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) { /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ const optionSpace = new Map(); @@ -316,11 +317,11 @@ export class Translator { ); if (deinflections.length === 0) { return []; } - const {matchType} = options; + const {matchType, language} = options; - await this._addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType); + await this._addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType); - const dictionaryDeinflections = await this._getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType); + const dictionaryDeinflections = await this._getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType); deinflections.push(...dictionaryDeinflections); for (const deinflection of deinflections) { @@ -335,12 +336,13 @@ export class Translator { } /** + * @param {string} language * @param {import('translation-internal').DatabaseDeinflection[]} deinflections * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap * @param {import('dictionary').TermSourceMatchType} matchType * @returns {Promise<import('translation-internal').DatabaseDeinflection[]>} */ - async _getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType) { + async _getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType) { /** @type {import('translation-internal').DatabaseDeinflection[]} */ const dictionaryDeinflections = []; for (const deinflection of deinflections) { @@ -369,23 +371,24 @@ export class Translator { } } - await this._addEntriesToDeinflections(dictionaryDeinflections, enabledDictionaryMap, matchType); + await this._addEntriesToDeinflections(language, dictionaryDeinflections, enabledDictionaryMap, matchType); return dictionaryDeinflections; } /** + * @param {string} language * @param {import('translation-internal').DatabaseDeinflection[]} deinflections * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap * @param {import('dictionary').TermSourceMatchType} matchType */ - async _addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType) { + async _addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType) { const uniqueDeinflectionsMap = this._groupDeinflectionsByTerm(deinflections); const uniqueDeinflectionArrays = [...uniqueDeinflectionsMap.values()]; const uniqueDeinflectionTerms = [...uniqueDeinflectionsMap.keys()]; const databaseEntries = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, matchType); - this._matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap); + this._matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap); } /** @@ -407,16 +410,17 @@ export class Translator { } /** + * @param {string} language * @param {import('dictionary-database').TermEntry[]} databaseEntries * @param {import('translation-internal').DatabaseDeinflection[][]} uniqueDeinflectionArrays * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap */ - _matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) { + _matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) { for (const databaseEntry of databaseEntries) { const entryDictionary = /** @type {import('translation').FindTermDictionary} */ (enabledDictionaryMap.get(databaseEntry.dictionary)); const {partsOfSpeechFilter} = entryDictionary; - const definitionConditions = this._languageTransformer.getConditionFlagsFromPartsOfSpeech(databaseEntry.rules); + const definitionConditions = this._multiLanguageTransformer.getConditionFlagsFromPartsOfSpeech(language, databaseEntry.rules); for (const deinflection of uniqueDeinflectionArrays[databaseEntry.index]) { if (!partsOfSpeechFilter || LanguageTransformer.conditionsMatch(deinflection.conditions, definitionConditions)) { deinflection.databaseEntries.push(databaseEntry); @@ -475,7 +479,7 @@ export class Translator { if (used.has(source)) { break; } used.add(source); const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); - for (const {text: transformedText, conditions, trace} of this._languageTransformer.transform(source)) { + for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) { /** @type {import('dictionary').InflectionRuleChainCandidate} */ const inflectionRuleChainCandidate = { source: 'algorithm', diff --git a/test/data/json.json b/test/data/json.json index c7c1d3aa..16ca6f01 100644 --- a/test/data/json.json +++ b/test/data/json.json @@ -93,7 +93,12 @@ "type": "AjvSchema" }, { - "path": "ext/data/language/japanese-transforms.json", + "path": "ext/js/language/ja/japanese-transforms.json", + "typeFile": "types/ext/language-transformer.d.ts", + "type": "LanguageTransformDescriptor" + }, + { + "path": "ext/js/language/en/english-transforms.json", "typeFile": "types/ext/language-transformer.d.ts", "type": "LanguageTransformDescriptor" }, diff --git a/test/fixtures/translator-test.js b/test/fixtures/translator-test.js index ff3782a7..6b6eaad8 100644 --- a/test/fixtures/translator-test.js +++ b/test/fixtures/translator-test.js @@ -31,7 +31,7 @@ import {DictionaryImporterMediaLoader} from '../mocks/dictionary-importer-media- import {createDomTest} from './dom-test.js'; const extDir = join(dirname(fileURLToPath(import.meta.url)), '../../ext'); -const languageTransformDescriptorPath = join(extDir, 'data/language/japanese-transforms.json'); +const languageTransformDescriptorPath = join(extDir, 'js/language/ja/japanese-transforms.json'); vi.stubGlobal('indexedDB', indexedDB); vi.stubGlobal('IDBKeyRange', IDBKeyRange); @@ -65,7 +65,7 @@ export async function createTranslatorContext(dictionaryDirectory, dictionaryNam const translator = new Translator(dictionaryDatabase); /** @type {import('language-transformer').LanguageTransformDescriptor} */ const deinflectionReasons = parseJson(readFileSync(languageTransformDescriptorPath, {encoding: 'utf8'})); - translator.prepare(deinflectionReasons); + translator.prepare([deinflectionReasons]); return translator; } diff --git a/test/language-transformer-cycles.test.js b/test/language-transformer-cycles.test.js index b778c188..751f65a6 100644 --- a/test/language-transformer-cycles.test.js +++ b/test/language-transformer-cycles.test.js @@ -21,6 +21,7 @@ import {fileURLToPath} from 'url'; import {describe, test} from 'vitest'; import {parseJson} from '../dev/json.js'; import {LanguageTransformer} from '../ext/js/language/language-transformer.js'; +import {getLanguageSummaries} from '../ext/js/language/languages.js'; class DeinflectionNode { /** @@ -103,12 +104,18 @@ function arraysAreEqual(rules1, rules2) { return true; } -describe('Deinflection data', () => { - test('Check for cycles', ({expect}) => { - const dirname = pathDirname(fileURLToPath(import.meta.url)); +const dirname = pathDirname(fileURLToPath(import.meta.url)); +const descriptors = []; +const languageSummaries = getLanguageSummaries(); +for (const {languageTransformsFile} of languageSummaries) { + if (!languageTransformsFile) { continue; } + /** @type {import('language-transformer').LanguageTransformDescriptor} */ + const descriptor = parseJson(readFileSync(join(dirname, `../ext/${languageTransformsFile}`), {encoding: 'utf8'})); + descriptors.push(descriptor); +} - /** @type {import('language-transformer').LanguageTransformDescriptor} */ - const descriptor = parseJson(readFileSync(join(dirname, '../ext/data/language/japanese-transforms.json'), {encoding: 'utf8'})); +describe.each(descriptors)('Cycles Test $language', (descriptor) => { + test('Check for cycles', ({expect}) => { const languageTransformer = new LanguageTransformer(); languageTransformer.addDescriptor(descriptor); diff --git a/test/language-transformer.test.js b/test/language-transformer.test.js index 7c0da48b..b682f66b 100644 --- a/test/language-transformer.test.js +++ b/test/language-transformer.test.js @@ -1147,7 +1147,7 @@ describe('LanguageTransformer', () => { /* eslint-enable @stylistic/no-multi-spaces */ /** @type {import('language-transformer').LanguageTransformDescriptor} */ - const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'data/language/japanese-transforms.json'), {encoding: 'utf8'})); + const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'js/language/ja/japanese-transforms.json'), {encoding: 'utf8'})); const languageTransformer = new LanguageTransformer(); languageTransformer.addDescriptor(descriptor); diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index ca457721..79668da4 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -32,6 +32,7 @@ type LanguageDescriptor<TIso extends string, TTextPreprocessorDescriptor extends */ isTextLookupWorthy?: IsTextLookupWorthyFunction; textPreprocessors: TTextPreprocessorDescriptor; + languageTransformsFile?: string; }; type TextPreprocessorDescriptor = { diff --git a/types/ext/language-transformer-internal.d.ts b/types/ext/language-transformer-internal.d.ts index c186eeca..fdabc0fb 100644 --- a/types/ext/language-transformer-internal.d.ts +++ b/types/ext/language-transformer-internal.d.ts @@ -40,3 +40,11 @@ export type TraceFrame = { transform: string; ruleIndex: number; }; + +export type ConditionTypeToConditionFlagsMap = Map<string, number>; + +export type LanguageTransformDescriptorInternal = { + transforms: Transform[]; + conditionTypeToConditionFlagsMap: ConditionTypeToConditionFlagsMap; + partOfSpeechToConditionFlagsMap: ConditionTypeToConditionFlagsMap; +}; diff --git a/types/ext/language.d.ts b/types/ext/language.d.ts index 8e5a5c70..a50d3d4b 100644 --- a/types/ext/language.d.ts +++ b/types/ext/language.d.ts @@ -51,4 +51,5 @@ export type LanguageSummary = { name: string; iso: string; exampleText: string; + languageTransformsFile?: string; }; diff --git a/types/ext/offscreen.d.ts b/types/ext/offscreen.d.ts index 443e0b64..777756b9 100644 --- a/types/ext/offscreen.d.ts +++ b/types/ext/offscreen.d.ts @@ -52,7 +52,7 @@ type ApiSurface = { }; translatorPrepareOffscreen: { params: { - descriptor: LanguageTransformDescriptor; + descriptors: LanguageTransformDescriptor[]; }; return: void; }; |