From d68e93e9ca210a3653e3a464391a77b27cfd353a Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Sat, 11 May 2024 01:37:09 +0200 Subject: add a few deinflections for Latin (#901) * abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * start * organize language transform test by language * simpler folders * wip * wip * delete german test * cleanup --- .eslintrc.json | 1 + ext/js/language/la/latin-transforms.js | 162 ++++++++++++++++++++++++++++++++ ext/js/language/language-descriptors.js | 4 +- test/language/latin-transforms.test.js | 56 +++++++++++ 4 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 ext/js/language/la/latin-transforms.js create mode 100644 test/language/latin-transforms.test.js diff --git a/.eslintrc.json b/.eslintrc.json index 51bb2328..a5418154 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -647,6 +647,7 @@ "ext/js/language/ja/japanese-transforms.js", "ext/js/language/ja/japanese-wanakana.js", "ext/js/language/ja/japanese.js", + "ext/js/language/la/latin-transforms.js", "ext/js/language/language-descriptors.js", "ext/js/language/language-transformer.js", "ext/js/language/language-transforms.js", diff --git a/ext/js/language/la/latin-transforms.js b/ext/js/language/la/latin-transforms.js new file mode 100644 index 00000000..5616adce --- /dev/null +++ b/ext/js/language/la/latin-transforms.js @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {suffixInflection} from '../language-transforms.js'; + +// TODO: -ne suffix (estne, nonne)? + +/** @type {import('language-transformer').LanguageTransformDescriptor} */ +export const latinTransforms = { + language: 'la', + conditions: { + v: { + name: 'Verb', + isDictionaryForm: true + }, + n: { + name: 'Noun', + isDictionaryForm: true, + subConditions: ['ns', 'np'] + }, + ns: { + name: 'Noun, singular', + isDictionaryForm: true, + subConditions: ['n1s', 'n2s', 'n3s', 'n4s', 'n5s'] + }, + np: { + name: 'Noun, plural', + isDictionaryForm: true, + subConditions: ['n1p', 'n2p', 'n3p', 'n4p', 'n5p'] + }, + n1: { + name: 'Noun, 1st declension', + isDictionaryForm: true, + subConditions: ['n1s', 'n1p'] + }, + n1p: { + name: 'Noun, 1st declension, plural', + isDictionaryForm: true + }, + n1s: { + name: 'Noun, 1st declension, singular', + isDictionaryForm: true + }, + n2: { + name: 'Noun, 2nd declension', + isDictionaryForm: true, + subConditions: ['n2s', 'n2p'] + }, + n2p: { + name: 'Noun, 2nd declension, plural', + isDictionaryForm: true + }, + n2s: { + name: 'Noun, 2nd declension, singular', + isDictionaryForm: true + }, + n3: { + name: 'Noun, 3rd declension', + isDictionaryForm: true, + subConditions: ['n3s', 'n3p'] + }, + n3p: { + name: 'Noun, 3rd declension, plural', + isDictionaryForm: true + }, + n3s: { + name: 'Noun, 3rd declension, singular', + isDictionaryForm: true + }, + n4: { + name: 'Noun, 4th declension', + isDictionaryForm: true, + subConditions: ['n4s', 'n4p'] + }, + n4p: { + name: 'Noun, 4th declension, plural', + isDictionaryForm: true + }, + n4s: { + name: 'Noun, 4th declension, singular', + isDictionaryForm: true + }, + n5: { + name: 'Noun, 5th declension', + isDictionaryForm: true, + subConditions: ['n5s', 'n5p'] + }, + n5p: { + name: 'Noun, 5th declension, plural', + isDictionaryForm: true + }, + n5s: { + name: 'Noun, 5th declension, singular', + isDictionaryForm: true + }, + adj: { + name: 'Adjective', + isDictionaryForm: true, + subConditions: ['adj3', 'adj12'] + }, + adj12: { + name: 'Adjective, 1st-2nd declension', + isDictionaryForm: true + }, + adj3: { + name: 'Adjective, 3rd declension', + isDictionaryForm: true + }, + adv: { + name: 'Adverb', + isDictionaryForm: true + } + }, + transforms: [ + { + name: 'plural', + description: 'Plural declension', + rules: [ + suffixInflection('i', 'us', ['n2p'], ['n2s']), + suffixInflection('i', 'us', ['adj12'], ['adj12']), + suffixInflection('e', '', ['n1p'], ['n1s']), + suffixInflection('ae', 'a', ['adj12'], ['adj12']), + suffixInflection('a', 'um', ['adj12'], ['adj12']) + ] + }, + { + name: 'feminine', + description: 'Adjective form', + rules: [ + suffixInflection('a', 'us', ['adj12'], ['adj12']) + ] + }, + { + name: 'neuter', + description: 'Adjective form', + rules: [ + suffixInflection('um', 'us', ['adj12'], ['adj12']) + ] + }, + { + name: 'ablative', + description: 'Ablative case', + rules: [ + suffixInflection('o', 'um', ['n2s'], ['n2s']) + ] + } + ] +}; diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 1c577039..e1d89054 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -22,6 +22,7 @@ import {englishTransforms} from './en/english-transforms.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; +import {latinTransforms} from './la/latin-transforms.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {oldIrishTransforms} from './sga/old-irish-transforms.js'; import {albanianTransforms} from './sq/albanian-transforms.js'; @@ -125,7 +126,8 @@ const languageDescriptors = [ textPreprocessors: { ...capitalizationPreprocessors, removeAlphabeticDiacritics - } + }, + languageTransforms: latinTransforms }, { iso: 'ja', diff --git a/test/language/latin-transforms.test.js b/test/language/latin-transforms.test.js new file mode 100644 index 00000000..238d8eaf --- /dev/null +++ b/test/language/latin-transforms.test.js @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2023-2024 Yomitan Authors + * Copyright (C) 2020-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {latinTransforms} from '../../ext/js/language/la/latin-transforms.js'; +import {LanguageTransformer} from '../../ext/js/language/language-transformer.js'; +import {testLanguageTransformer} from '../fixtures/language-transformer-test.js'; + +/* eslint-disable @stylistic/no-multi-spaces */ +const tests = [ + { + category: 'plural', + valid: true, + tests: [ + {term: 'fluvius', source: 'fluvii', rule: 'n', reasons: ['plural']}, + {term: 'magnus', source: 'magni', rule: 'adj', reasons: ['plural']}, + {term: 'insula', source: 'insulae', rule: 'n', reasons: ['plural']} + ] + }, + { + category: 'adjective', + valid: true, + tests: [ + {term: 'magnus', source: 'magna', rule: 'adj', reasons: ['feminine']}, + {term: 'Graecus', source: 'Graecum', rule: 'adj', reasons: ['neuter']}, + {term: 'primus', source: 'prima', rule: 'adj', reasons: ['neuter', 'plural']} + ] + }, + { + category: 'ablative', + valid: true, + tests: [ + {term: 'vocabulum', source: 'vocabulo', rule: 'n', reasons: ['ablative']} + ] + } +]; +/* eslint-enable @stylistic/no-multi-spaces */ + +const languageTransformer = new LanguageTransformer(); +languageTransformer.addDescriptor(latinTransforms); + +testLanguageTransformer(languageTransformer, tests); -- cgit v1.2.3