diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-05-11 01:37:09 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-10 23:37:09 +0000 |
commit | d68e93e9ca210a3653e3a464391a77b27cfd353a (patch) | |
tree | bb6bbb5fb1d7d2d59d66cb4e01fa5014db9afc0b /ext | |
parent | fdfbfc27c6b731cbc41b2dd1239e70a549bcc8f4 (diff) |
add a few deinflections for Latin (#901)
* abstract deinflections
* undo redundant changes
* remove cast
* switch to js
* MultiLanguageTransformer
* comments
* comments
* fix test
* suffixInflection
* fix bench
* substring instead of replace
* without heuristic
* suffixMap
* add other language deinflections
* wip
* catch cycles
* fix tests
* uninflect to deinflect
* use less regex
* start
* organize language transform test by language
* simpler folders
* wip
* wip
* delete german test
* cleanup
Diffstat (limited to 'ext')
-rw-r--r-- | ext/js/language/la/latin-transforms.js | 162 | ||||
-rw-r--r-- | ext/js/language/language-descriptors.js | 4 |
2 files changed, 165 insertions, 1 deletions
diff --git a/ext/js/language/la/latin-transforms.js b/ext/js/language/la/latin-transforms.js new file mode 100644 index 00000000..5616adce --- /dev/null +++ b/ext/js/language/la/latin-transforms.js @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +import {suffixInflection} from '../language-transforms.js'; + +// TODO: -ne suffix (estne, nonne)? + +/** @type {import('language-transformer').LanguageTransformDescriptor} */ +export const latinTransforms = { + language: 'la', + conditions: { + v: { + name: 'Verb', + isDictionaryForm: true + }, + n: { + name: 'Noun', + isDictionaryForm: true, + subConditions: ['ns', 'np'] + }, + ns: { + name: 'Noun, singular', + isDictionaryForm: true, + subConditions: ['n1s', 'n2s', 'n3s', 'n4s', 'n5s'] + }, + np: { + name: 'Noun, plural', + isDictionaryForm: true, + subConditions: ['n1p', 'n2p', 'n3p', 'n4p', 'n5p'] + }, + n1: { + name: 'Noun, 1st declension', + isDictionaryForm: true, + subConditions: ['n1s', 'n1p'] + }, + n1p: { + name: 'Noun, 1st declension, plural', + isDictionaryForm: true + }, + n1s: { + name: 'Noun, 1st declension, singular', + isDictionaryForm: true + }, + n2: { + name: 'Noun, 2nd declension', + isDictionaryForm: true, + subConditions: ['n2s', 'n2p'] + }, + n2p: { + name: 'Noun, 2nd declension, plural', + isDictionaryForm: true + }, + n2s: { + name: 'Noun, 2nd declension, singular', + isDictionaryForm: true + }, + n3: { + name: 'Noun, 3rd declension', + isDictionaryForm: true, + subConditions: ['n3s', 'n3p'] + }, + n3p: { + name: 'Noun, 3rd declension, plural', + isDictionaryForm: true + }, + n3s: { + name: 'Noun, 3rd declension, singular', + isDictionaryForm: true + }, + n4: { + name: 'Noun, 4th declension', + isDictionaryForm: true, + subConditions: ['n4s', 'n4p'] + }, + n4p: { + name: 'Noun, 4th declension, plural', + isDictionaryForm: true + }, + n4s: { + name: 'Noun, 4th declension, singular', + isDictionaryForm: true + }, + n5: { + name: 'Noun, 5th declension', + isDictionaryForm: true, + subConditions: ['n5s', 'n5p'] + }, + n5p: { + name: 'Noun, 5th declension, plural', + isDictionaryForm: true + }, + n5s: { + name: 'Noun, 5th declension, singular', + isDictionaryForm: true + }, + adj: { + name: 'Adjective', + isDictionaryForm: true, + subConditions: ['adj3', 'adj12'] + }, + adj12: { + name: 'Adjective, 1st-2nd declension', + isDictionaryForm: true + }, + adj3: { + name: 'Adjective, 3rd declension', + isDictionaryForm: true + }, + adv: { + name: 'Adverb', + isDictionaryForm: true + } + }, + transforms: [ + { + name: 'plural', + description: 'Plural declension', + rules: [ + suffixInflection('i', 'us', ['n2p'], ['n2s']), + suffixInflection('i', 'us', ['adj12'], ['adj12']), + suffixInflection('e', '', ['n1p'], ['n1s']), + suffixInflection('ae', 'a', ['adj12'], ['adj12']), + suffixInflection('a', 'um', ['adj12'], ['adj12']) + ] + }, + { + name: 'feminine', + description: 'Adjective form', + rules: [ + suffixInflection('a', 'us', ['adj12'], ['adj12']) + ] + }, + { + name: 'neuter', + description: 'Adjective form', + rules: [ + suffixInflection('um', 'us', ['adj12'], ['adj12']) + ] + }, + { + name: 'ablative', + description: 'Ablative case', + rules: [ + suffixInflection('o', 'um', ['n2s'], ['n2s']) + ] + } + ] +}; diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 1c577039..e1d89054 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -22,6 +22,7 @@ import {englishTransforms} from './en/english-transforms.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; +import {latinTransforms} from './la/latin-transforms.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {oldIrishTransforms} from './sga/old-irish-transforms.js'; import {albanianTransforms} from './sq/albanian-transforms.js'; @@ -125,7 +126,8 @@ const languageDescriptors = [ textPreprocessors: { ...capitalizationPreprocessors, removeAlphabeticDiacritics - } + }, + languageTransforms: latinTransforms }, { iso: 'ja', |