From d68e93e9ca210a3653e3a464391a77b27cfd353a Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Sat, 11 May 2024 01:37:09 +0200 Subject: add a few deinflections for Latin (#901) * abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * start * organize language transform test by language * simpler folders * wip * wip * delete german test * cleanup --- ext/js/language/la/latin-transforms.js | 162 ++++++++++++++++++++++++++++++++ ext/js/language/language-descriptors.js | 4 +- 2 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 ext/js/language/la/latin-transforms.js (limited to 'ext/js') diff --git a/ext/js/language/la/latin-transforms.js b/ext/js/language/la/latin-transforms.js new file mode 100644 index 00000000..5616adce --- /dev/null +++ b/ext/js/language/la/latin-transforms.js @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {suffixInflection} from '../language-transforms.js'; + +// TODO: -ne suffix (estne, nonne)? + +/** @type {import('language-transformer').LanguageTransformDescriptor} */ +export const latinTransforms = { + language: 'la', + conditions: { + v: { + name: 'Verb', + isDictionaryForm: true + }, + n: { + name: 'Noun', + isDictionaryForm: true, + subConditions: ['ns', 'np'] + }, + ns: { + name: 'Noun, singular', + isDictionaryForm: true, + subConditions: ['n1s', 'n2s', 'n3s', 'n4s', 'n5s'] + }, + np: { + name: 'Noun, plural', + isDictionaryForm: true, + subConditions: ['n1p', 'n2p', 'n3p', 'n4p', 'n5p'] + }, + n1: { + name: 'Noun, 1st declension', + isDictionaryForm: true, + subConditions: ['n1s', 'n1p'] + }, + n1p: { + name: 'Noun, 1st declension, plural', + isDictionaryForm: true + }, + n1s: { + name: 'Noun, 1st declension, singular', + isDictionaryForm: true + }, + n2: { + name: 'Noun, 2nd declension', + isDictionaryForm: true, + subConditions: ['n2s', 'n2p'] + }, + n2p: { + name: 'Noun, 2nd declension, plural', + isDictionaryForm: true + }, + n2s: { + name: 'Noun, 2nd declension, singular', + isDictionaryForm: true + }, + n3: { + name: 'Noun, 3rd declension', + isDictionaryForm: true, + subConditions: ['n3s', 'n3p'] + }, + n3p: { + name: 'Noun, 3rd declension, plural', + isDictionaryForm: true + }, + n3s: { + name: 'Noun, 3rd declension, singular', + isDictionaryForm: true + }, + n4: { + name: 'Noun, 4th declension', + isDictionaryForm: true, + subConditions: ['n4s', 'n4p'] + }, + n4p: { + name: 'Noun, 4th declension, plural', + isDictionaryForm: true + }, + n4s: { + name: 'Noun, 4th declension, singular', + isDictionaryForm: true + }, + n5: { + name: 'Noun, 5th declension', + isDictionaryForm: true, + subConditions: ['n5s', 'n5p'] + }, + n5p: { + name: 'Noun, 5th declension, plural', + isDictionaryForm: true + }, + n5s: { + name: 'Noun, 5th declension, singular', + isDictionaryForm: true + }, + adj: { + name: 'Adjective', + isDictionaryForm: true, + subConditions: ['adj3', 'adj12'] + }, + adj12: { + name: 'Adjective, 1st-2nd declension', + isDictionaryForm: true + }, + adj3: { + name: 'Adjective, 3rd declension', + isDictionaryForm: true + }, + adv: { + name: 'Adverb', + isDictionaryForm: true + } + }, + transforms: [ + { + name: 'plural', + description: 'Plural declension', + rules: [ + suffixInflection('i', 'us', ['n2p'], ['n2s']), + suffixInflection('i', 'us', ['adj12'], ['adj12']), + suffixInflection('e', '', ['n1p'], ['n1s']), + suffixInflection('ae', 'a', ['adj12'], ['adj12']), + suffixInflection('a', 'um', ['adj12'], ['adj12']) + ] + }, + { + name: 'feminine', + description: 'Adjective form', + rules: [ + suffixInflection('a', 'us', ['adj12'], ['adj12']) + ] + }, + { + name: 'neuter', + description: 'Adjective form', + rules: [ + suffixInflection('um', 'us', ['adj12'], ['adj12']) + ] + }, + { + name: 'ablative', + description: 'Ablative case', + rules: [ + suffixInflection('o', 'um', ['n2s'], ['n2s']) + ] + } + ] +}; diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 1c577039..e1d89054 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -22,6 +22,7 @@ import {englishTransforms} from './en/english-transforms.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; +import {latinTransforms} from './la/latin-transforms.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {oldIrishTransforms} from './sga/old-irish-transforms.js'; import {albanianTransforms} from './sq/albanian-transforms.js'; @@ -125,7 +126,8 @@ const languageDescriptors = [ textPreprocessors: { ...capitalizationPreprocessors, removeAlphabeticDiacritics - } + }, + languageTransforms: latinTransforms }, { iso: 'ja', -- cgit v1.2.3