aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-05-11 01:37:09 +0200
committerGitHub <noreply@github.com>2024-05-10 23:37:09 +0000
commitd68e93e9ca210a3653e3a464391a77b27cfd353a (patch)
treebb6bbb5fb1d7d2d59d66cb4e01fa5014db9afc0b
parentfdfbfc27c6b731cbc41b2dd1239e70a549bcc8f4 (diff)
add a few deinflections for Latin (#901)
* abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * start * organize language transform test by language * simpler folders * wip * wip * delete german test * cleanup
-rw-r--r--.eslintrc.json1
-rw-r--r--ext/js/language/la/latin-transforms.js162
-rw-r--r--ext/js/language/language-descriptors.js4
-rw-r--r--test/language/latin-transforms.test.js56
4 files changed, 222 insertions, 1 deletions
diff --git a/.eslintrc.json b/.eslintrc.json
index 51bb2328..a5418154 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -647,6 +647,7 @@
"ext/js/language/ja/japanese-transforms.js",
"ext/js/language/ja/japanese-wanakana.js",
"ext/js/language/ja/japanese.js",
+ "ext/js/language/la/latin-transforms.js",
"ext/js/language/language-descriptors.js",
"ext/js/language/language-transformer.js",
"ext/js/language/language-transforms.js",
diff --git a/ext/js/language/la/latin-transforms.js b/ext/js/language/la/latin-transforms.js
new file mode 100644
index 00000000..5616adce
--- /dev/null
+++ b/ext/js/language/la/latin-transforms.js
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {suffixInflection} from '../language-transforms.js';
+
+// TODO: -ne suffix (estne, nonne)?
+
+/** @type {import('language-transformer').LanguageTransformDescriptor} */
+export const latinTransforms = {
+ language: 'la',
+ conditions: {
+ v: {
+ name: 'Verb',
+ isDictionaryForm: true
+ },
+ n: {
+ name: 'Noun',
+ isDictionaryForm: true,
+ subConditions: ['ns', 'np']
+ },
+ ns: {
+ name: 'Noun, singular',
+ isDictionaryForm: true,
+ subConditions: ['n1s', 'n2s', 'n3s', 'n4s', 'n5s']
+ },
+ np: {
+ name: 'Noun, plural',
+ isDictionaryForm: true,
+ subConditions: ['n1p', 'n2p', 'n3p', 'n4p', 'n5p']
+ },
+ n1: {
+ name: 'Noun, 1st declension',
+ isDictionaryForm: true,
+ subConditions: ['n1s', 'n1p']
+ },
+ n1p: {
+ name: 'Noun, 1st declension, plural',
+ isDictionaryForm: true
+ },
+ n1s: {
+ name: 'Noun, 1st declension, singular',
+ isDictionaryForm: true
+ },
+ n2: {
+ name: 'Noun, 2nd declension',
+ isDictionaryForm: true,
+ subConditions: ['n2s', 'n2p']
+ },
+ n2p: {
+ name: 'Noun, 2nd declension, plural',
+ isDictionaryForm: true
+ },
+ n2s: {
+ name: 'Noun, 2nd declension, singular',
+ isDictionaryForm: true
+ },
+ n3: {
+ name: 'Noun, 3rd declension',
+ isDictionaryForm: true,
+ subConditions: ['n3s', 'n3p']
+ },
+ n3p: {
+ name: 'Noun, 3rd declension, plural',
+ isDictionaryForm: true
+ },
+ n3s: {
+ name: 'Noun, 3rd declension, singular',
+ isDictionaryForm: true
+ },
+ n4: {
+ name: 'Noun, 4th declension',
+ isDictionaryForm: true,
+ subConditions: ['n4s', 'n4p']
+ },
+ n4p: {
+ name: 'Noun, 4th declension, plural',
+ isDictionaryForm: true
+ },
+ n4s: {
+ name: 'Noun, 4th declension, singular',
+ isDictionaryForm: true
+ },
+ n5: {
+ name: 'Noun, 5th declension',
+ isDictionaryForm: true,
+ subConditions: ['n5s', 'n5p']
+ },
+ n5p: {
+ name: 'Noun, 5th declension, plural',
+ isDictionaryForm: true
+ },
+ n5s: {
+ name: 'Noun, 5th declension, singular',
+ isDictionaryForm: true
+ },
+ adj: {
+ name: 'Adjective',
+ isDictionaryForm: true,
+ subConditions: ['adj3', 'adj12']
+ },
+ adj12: {
+ name: 'Adjective, 1st-2nd declension',
+ isDictionaryForm: true
+ },
+ adj3: {
+ name: 'Adjective, 3rd declension',
+ isDictionaryForm: true
+ },
+ adv: {
+ name: 'Adverb',
+ isDictionaryForm: true
+ }
+ },
+ transforms: [
+ {
+ name: 'plural',
+ description: 'Plural declension',
+ rules: [
+ suffixInflection('i', 'us', ['n2p'], ['n2s']),
+ suffixInflection('i', 'us', ['adj12'], ['adj12']),
+ suffixInflection('e', '', ['n1p'], ['n1s']),
+ suffixInflection('ae', 'a', ['adj12'], ['adj12']),
+ suffixInflection('a', 'um', ['adj12'], ['adj12'])
+ ]
+ },
+ {
+ name: 'feminine',
+ description: 'Adjective form',
+ rules: [
+ suffixInflection('a', 'us', ['adj12'], ['adj12'])
+ ]
+ },
+ {
+ name: 'neuter',
+ description: 'Adjective form',
+ rules: [
+ suffixInflection('um', 'us', ['adj12'], ['adj12'])
+ ]
+ },
+ {
+ name: 'ablative',
+ description: 'Ablative case',
+ rules: [
+ suffixInflection('o', 'um', ['n2s'], ['n2s'])
+ ]
+ }
+ ]
+};
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index 1c577039..e1d89054 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -22,6 +22,7 @@ import {englishTransforms} from './en/english-transforms.js';
import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
import {japaneseTransforms} from './ja/japanese-transforms.js';
import {isStringPartiallyJapanese} from './ja/japanese.js';
+import {latinTransforms} from './la/latin-transforms.js';
import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
import {oldIrishTransforms} from './sga/old-irish-transforms.js';
import {albanianTransforms} from './sq/albanian-transforms.js';
@@ -125,7 +126,8 @@ const languageDescriptors = [
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics
- }
+ },
+ languageTransforms: latinTransforms
},
{
iso: 'ja',
diff --git a/test/language/latin-transforms.test.js b/test/language/latin-transforms.test.js
new file mode 100644
index 00000000..238d8eaf
--- /dev/null
+++ b/test/language/latin-transforms.test.js
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2023-2024 Yomitan Authors
+ * Copyright (C) 2020-2022 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {latinTransforms} from '../../ext/js/language/la/latin-transforms.js';
+import {LanguageTransformer} from '../../ext/js/language/language-transformer.js';
+import {testLanguageTransformer} from '../fixtures/language-transformer-test.js';
+
+/* eslint-disable @stylistic/no-multi-spaces */
+const tests = [
+ {
+ category: 'plural',
+ valid: true,
+ tests: [
+ {term: 'fluvius', source: 'fluvii', rule: 'n', reasons: ['plural']},
+ {term: 'magnus', source: 'magni', rule: 'adj', reasons: ['plural']},
+ {term: 'insula', source: 'insulae', rule: 'n', reasons: ['plural']}
+ ]
+ },
+ {
+ category: 'adjective',
+ valid: true,
+ tests: [
+ {term: 'magnus', source: 'magna', rule: 'adj', reasons: ['feminine']},
+ {term: 'Graecus', source: 'Graecum', rule: 'adj', reasons: ['neuter']},
+ {term: 'primus', source: 'prima', rule: 'adj', reasons: ['neuter', 'plural']}
+ ]
+ },
+ {
+ category: 'ablative',
+ valid: true,
+ tests: [
+ {term: 'vocabulum', source: 'vocabulo', rule: 'n', reasons: ['ablative']}
+ ]
+ }
+];
+/* eslint-enable @stylistic/no-multi-spaces */
+
+const languageTransformer = new LanguageTransformer();
+languageTransformer.addDescriptor(latinTransforms);
+
+testLanguageTransformer(languageTransformer, tests);