From d2e9841f96ebff61d4a5c26a322484f6268115f1 Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Fri, 22 Mar 2024 15:27:35 +0100 Subject: expand deinflection format (#745) * abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * add suru masu stem deinflection --- test/data/json.json | 10 ------ test/fixtures/translator-test.js | 11 +------ test/language-transformer-cycles.test.js | 53 +++++++++++++++----------------- test/language-transformer.test.js | 11 ++----- 4 files changed, 27 insertions(+), 58 deletions(-) (limited to 'test') diff --git a/test/data/json.json b/test/data/json.json index 16ca6f01..73709fd5 100644 --- a/test/data/json.json +++ b/test/data/json.json @@ -92,16 +92,6 @@ "typeFile": "types/test/json.d.ts", "type": "AjvSchema" }, - { - "path": "ext/js/language/ja/japanese-transforms.json", - "typeFile": "types/ext/language-transformer.d.ts", - "type": "LanguageTransformDescriptor" - }, - { - "path": "ext/js/language/en/english-transforms.json", - "typeFile": "types/ext/language-transformer.d.ts", - "type": "LanguageTransformDescriptor" - }, { "path": "test/data/translator-test-inputs.json", "typeFile": "types/test/translator.d.ts", diff --git a/test/fixtures/translator-test.js b/test/fixtures/translator-test.js index 6b6eaad8..9f1e87df 100644 --- a/test/fixtures/translator-test.js +++ b/test/fixtures/translator-test.js @@ -17,12 +17,8 @@ */ import {IDBKeyRange, indexedDB} from 'fake-indexeddb'; -import {readFileSync} from 'fs'; -import {fileURLToPath} from 'node:url'; -import {dirname, join} from 'path'; import {expect, vi} from 'vitest'; import {createDictionaryArchiveData} from '../../dev/dictionary-archive-util.js'; -import {parseJson} from '../../dev/json.js'; import {DictionaryDatabase} from '../../ext/js/dictionary/dictionary-database.js'; import {DictionaryImporter} from '../../ext/js/dictionary/dictionary-importer.js'; import {Translator} from '../../ext/js/language/translator.js'; @@ -30,9 +26,6 @@ import {chrome, fetch} from '../mocks/common.js'; import {DictionaryImporterMediaLoader} from '../mocks/dictionary-importer-media-loader.js'; import {createDomTest} from './dom-test.js'; -const extDir = join(dirname(fileURLToPath(import.meta.url)), '../../ext'); -const languageTransformDescriptorPath = join(extDir, 'js/language/ja/japanese-transforms.json'); - vi.stubGlobal('indexedDB', indexedDB); vi.stubGlobal('IDBKeyRange', IDBKeyRange); vi.stubGlobal('fetch', fetch); @@ -63,9 +56,7 @@ export async function createTranslatorContext(dictionaryDirectory, dictionaryNam // Setup translator const translator = new Translator(dictionaryDatabase); - /** @type {import('language-transformer').LanguageTransformDescriptor} */ - const deinflectionReasons = parseJson(readFileSync(languageTransformDescriptorPath, {encoding: 'utf8'})); - translator.prepare([deinflectionReasons]); + translator.prepare(); return translator; } diff --git a/test/language-transformer-cycles.test.js b/test/language-transformer-cycles.test.js index 751f65a6..ab14eaa4 100644 --- a/test/language-transformer-cycles.test.js +++ b/test/language-transformer-cycles.test.js @@ -15,13 +15,9 @@ * along with this program. If not, see . */ -import {readFileSync} from 'fs'; -import {join, dirname as pathDirname} from 'path'; -import {fileURLToPath} from 'url'; import {describe, test} from 'vitest'; -import {parseJson} from '../dev/json.js'; import {LanguageTransformer} from '../ext/js/language/language-transformer.js'; -import {getLanguageSummaries} from '../ext/js/language/languages.js'; +import {getAllLanguageTransformDescriptors} from '../ext/js/language/languages.js'; class DeinflectionNode { /** @@ -80,12 +76,12 @@ class DeinflectionNode { class RuleNode { /** * @param {string} groupName - * @param {import('language-transformer').Rule} rule + * @param {import('language-transformer').SuffixRule} rule */ constructor(groupName, rule) { /** @type {string} */ this.groupName = groupName; - /** @type {import('language-transformer').Rule} */ + /** @type {import('language-transformer').SuffixRule} */ this.rule = rule; } } @@ -104,39 +100,36 @@ function arraysAreEqual(rules1, rules2) { return true; } -const dirname = pathDirname(fileURLToPath(import.meta.url)); -const descriptors = []; -const languageSummaries = getLanguageSummaries(); -for (const {languageTransformsFile} of languageSummaries) { - if (!languageTransformsFile) { continue; } - /** @type {import('language-transformer').LanguageTransformDescriptor} */ - const descriptor = parseJson(readFileSync(join(dirname, `../ext/${languageTransformsFile}`), {encoding: 'utf8'})); - descriptors.push(descriptor); -} +const languagesWithTransforms = getAllLanguageTransformDescriptors(); -describe.each(descriptors)('Cycles Test $language', (descriptor) => { +describe.each(languagesWithTransforms)('Cycles Test $iso', ({languageTransforms}) => { test('Check for cycles', ({expect}) => { const languageTransformer = new LanguageTransformer(); - languageTransformer.addDescriptor(descriptor); + languageTransformer.addDescriptor(languageTransforms); /** @type {RuleNode[]} */ const ruleNodes = []; - for (const [groupName, reasonInfo] of Object.entries(descriptor.transforms)) { + for (const [groupName, reasonInfo] of Object.entries(languageTransforms.transforms)) { for (const rule of reasonInfo.rules) { - ruleNodes.push(new RuleNode(groupName, rule)); + if (rule.type === 'suffix') { + ruleNodes.push(new RuleNode(groupName, /** @type {import('language-transformer').SuffixRule}*/ (rule))); + } } } /** @type {DeinflectionNode[]} */ const deinflectionNodes = []; - for (const ruleNode of ruleNodes) { - deinflectionNodes.push(new DeinflectionNode(`?${ruleNode.rule.suffixIn}`, [], null, null)); + for (const {rule: {isInflected}} of ruleNodes) { + const suffixIn = isInflected.source.substring(0, isInflected.source.length - 1); + deinflectionNodes.push(new DeinflectionNode(`?${suffixIn}`, [], null, null)); } + for (let i = 0; i < deinflectionNodes.length; ++i) { const deinflectionNode = deinflectionNodes[i]; const {text, ruleNames} = deinflectionNode; for (const ruleNode of ruleNodes) { - const {suffixIn, suffixOut, conditionsIn, conditionsOut} = ruleNode.rule; + const {isInflected, deinflected: suffixOut, conditionsIn, conditionsOut} = ruleNode.rule; + const suffixIn = isInflected.source.substring(0, isInflected.source.length - 1); if ( !LanguageTransformer.conditionsMatch( languageTransformer.getConditionFlagsFromConditionTypes(ruleNames), @@ -158,12 +151,14 @@ describe.each(descriptors)('Cycles Test $language', (descriptor) => { // Cycle check if (deinflectionNode.historyIncludes(newDeinflectionNode)) { const stack = []; - for (const item of newDeinflectionNode.getHistory()) { - stack.push( - item.ruleNode === null ? - `${item.text} (start)` : - `${item.text} (${item.ruleNode.groupName}, ${item.ruleNode.rule.conditionsIn.join(',')}=>${item.ruleNode.rule.conditionsOut.join(',')}, ${item.ruleNode.rule.suffixIn}=>${item.ruleNode.rule.suffixOut})` - ); + for (const {text: itemText, ruleNode: itemNode} of newDeinflectionNode.getHistory()) { + if (itemNode !== null) { + const itemSuffixIn = itemNode.rule.isInflected.source.substring(0, itemNode.rule.isInflected.source.length - 1); + const itemSuffixOut = itemNode.rule.deinflected; + stack.push(`${itemText} (${itemNode.groupName}, ${itemNode.rule.conditionsIn.join(',')}=>${itemNode.rule.conditionsOut.join(',')}, ${itemSuffixIn}=>${itemSuffixOut})`); + } else { + stack.push(`${itemText} (start)`); + } } const message = `Cycle detected:\n ${stack.join('\n ')}`; expect.soft(true, message).toEqual(false); diff --git a/test/language-transformer.test.js b/test/language-transformer.test.js index 16f044c3..6e6de0a0 100644 --- a/test/language-transformer.test.js +++ b/test/language-transformer.test.js @@ -16,15 +16,10 @@ * along with this program. If not, see . */ -import fs from 'fs'; -import {fileURLToPath} from 'node:url'; -import path from 'path'; import {describe, expect, test} from 'vitest'; -import {parseJson} from '../dev/json.js'; +import {japaneseTransforms} from '../ext/js/language/ja/japanese-transforms.js'; import {LanguageTransformer} from '../ext/js/language/language-transformer.js'; -const dirname = path.dirname(fileURLToPath(import.meta.url)); - /** * @param {LanguageTransformer} languageTransformer * @param {string} source @@ -1146,10 +1141,8 @@ describe('LanguageTransformer', () => { ]; /* eslint-enable @stylistic/no-multi-spaces */ - /** @type {import('language-transformer').LanguageTransformDescriptor} */ - const descriptor = parseJson(fs.readFileSync(path.join(dirname, '..', 'ext', 'js/language/ja/japanese-transforms.json'), {encoding: 'utf8'})); const languageTransformer = new LanguageTransformer(); - languageTransformer.addDescriptor(descriptor); + languageTransformer.addDescriptor(japaneseTransforms); describe('deinflections', () => { describe.each(data)('$category', ({valid, tests}) => { -- cgit v1.2.3