From d2e9841f96ebff61d4a5c26a322484f6268115f1 Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Fri, 22 Mar 2024 15:27:35 +0100 Subject: expand deinflection format (#745) * abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * add suru masu stem deinflection --- ext/js/language/de/german-transforms.js | 106 ++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 ext/js/language/de/german-transforms.js (limited to 'ext/js/language/de/german-transforms.js') diff --git a/ext/js/language/de/german-transforms.js b/ext/js/language/de/german-transforms.js new file mode 100644 index 00000000..d05ca874 --- /dev/null +++ b/ext/js/language/de/german-transforms.js @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {prefixInflection, suffixInflection} from '../language-transforms.js'; + +// https://www.dartmouth.edu/~deutsch/Grammatik/Wortbildung/Separables.html +const separablePrefixes = ['ab', 'an', 'auf', 'aus', 'auseinander', 'bei', 'da', 'dabei', 'dar', 'daran', 'dazwischen', 'durch', 'ein', 'empor', 'entgegen', 'entlang', 'entzwei', 'fehl', 'fern', 'fest', 'fort', 'frei', 'gegenüber', 'gleich', 'heim', 'her', 'herab', 'heran', 'herauf', 'heraus', 'herbei', 'herein', 'herüber', 'herum', 'herunter', 'hervor', 'hin', 'hinab', 'hinauf', 'hinaus', 'hinein', 'hinterher', 'hinunter', 'hinweg', 'hinzu', 'hoch', 'los', 'mit', 'nach', 'nebenher', 'nieder', 'statt', 'um', 'vor', 'voran', 'voraus', 'vorbei', 'vorüber', 'vorweg', 'weg', 'weiter', 'wieder', 'zu', 'zurecht', 'zurück', 'zusammen']; + +/** + * @param {string} prefix + * @param {string[]} conditionsIn + * @param {string[]} conditionsOut + * @returns {import('language-transformer').Rule} + */ +function separatedPrefix(prefix, conditionsIn, conditionsOut) { + const germanLetters = 'a-zA-ZäöüßÄÖÜẞ'; + const regex = new RegExp(`^([${germanLetters}]+) .+ ${prefix}$`); + return { + type: 'other', + isInflected: regex, + deinflect: (term) => { + return term.replace(regex, '$1 ' + prefix); + }, + conditionsIn, + conditionsOut + }; +} + +const separatedPrefixInflections = separablePrefixes.map((prefix) => { + return separatedPrefix(prefix, [], []); +}); + +const zuInfinitiveInflections = separablePrefixes.map((prefix) => { + return prefixInflection(prefix + 'zu', prefix, [], ['v']); +}); + +export const germanTransforms = { + language: 'de', + conditions: { + v: { + name: 'Verb', + isDictionaryForm: true + }, + n: { + name: 'Noun', + isDictionaryForm: true + }, + adj: { + name: 'Adjective', + isDictionaryForm: true + } + }, + transforms: [ + { + name: 'nominalization', + description: 'Noun formed from a verb', + rules: [ + suffixInflection('ung', 'en', [], []), + suffixInflection('lung', 'eln', [], []) + ] + }, + { + name: '-bar', + description: '-able adjective from a verb', + rules: [ + suffixInflection('bar', 'en', [], ['v']), + suffixInflection('bar', 'n', [], ['v']) // Lieferbar + ] + }, + { + name: 'negative', + description: 'Negation', + rules: [ + prefixInflection('un', '', [], ['adj']) + ] + }, + { + name: 'separated prefix', + description: 'Separable prefix', + rules: [ + ...separatedPrefixInflections + ] + }, + { + name: 'zu-infinitive', + description: 'zu-infinitive', + rules: [ + ...zuInfinitiveInflections + ] + } + ] +}; -- cgit v1.2.3