summaryrefslogtreecommitdiff
path: root/ext/js/language/en
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-03-22 15:27:35 +0100
committerGitHub <noreply@github.com>2024-03-22 14:27:35 +0000
commitd2e9841f96ebff61d4a5c26a322484f6268115f1 (patch)
tree3c1351fa3b25994eab8456ea8457ee153bb98aa9 /ext/js/language/en
parent7681131782d958997663b1fb443a3e32e8eef550 (diff)
expand deinflection format (#745)
* abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * add suru masu stem deinflection
Diffstat (limited to 'ext/js/language/en')
-rw-r--r--ext/js/language/en/english-transforms.js282
-rw-r--r--ext/js/language/en/english-transforms.json39
2 files changed, 282 insertions, 39 deletions
diff --git a/ext/js/language/en/english-transforms.js b/ext/js/language/en/english-transforms.js
new file mode 100644
index 00000000..eab8b328
--- /dev/null
+++ b/ext/js/language/en/english-transforms.js
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {prefixInflection, suffixInflection} from '../language-transforms.js';
+
+/**
+ * @param {string} consonants
+ * @param {string} suffix
+ * @param {string[]} conditionsIn
+ * @param {string[]} conditionsOut
+ * @returns {import('language-transformer').SuffixRule[]}
+ */
+function doubledConsonantInflection(consonants, suffix, conditionsIn, conditionsOut) {
+ const inflections = [];
+ for (const consonant of consonants) {
+ inflections.push(suffixInflection(`${consonant}${consonant}${suffix}`, consonant, conditionsIn, conditionsOut));
+ }
+ return inflections;
+}
+
+const pastSuffixInflections = [
+ suffixInflection('ed', '', ['v'], ['v']), // 'walked'
+ suffixInflection('ed', 'e', ['v'], ['v']), // 'hoped'
+ suffixInflection('ied', 'y', ['v'], ['v']), // 'tried'
+ suffixInflection('cked', 'c', ['v'], ['v']), // 'frolicked'
+ ...doubledConsonantInflection('bdgklmnprstz', 'ed', ['v'], ['v']),
+
+ suffixInflection('laid', 'lay', ['v'], ['v']),
+ suffixInflection('paid', 'pay', ['v'], ['v']),
+ suffixInflection('said', 'say', ['v'], ['v'])
+];
+
+const ingSuffixInflections = [
+ suffixInflection('ing', '', ['v'], ['v']), // 'walking'
+ suffixInflection('ing', 'e', ['v'], ['v']), // 'driving'
+ suffixInflection('ying', 'ie', ['v'], ['v']), // 'lying'
+ suffixInflection('cking', 'c', ['v'], ['v']), // 'panicking'
+ ...doubledConsonantInflection('bdgklmnprstz', 'ing', ['v'], ['v'])
+];
+
+const thirdPersonSgPresentSuffixInflections = [
+ suffixInflection('s', '', ['v'], ['v']), // 'walks'
+ suffixInflection('es', '', ['v'], ['v']), // 'teaches'
+ suffixInflection('ies', 'y', ['v'], ['v']) // 'tries'
+];
+
+const phrasalVerbParticles = ['aboard', 'about', 'above', 'across', 'ahead', 'alongside', 'apart', 'around', 'aside', 'astray', 'away', 'back', 'before', 'behind', 'below', 'beneath', 'besides', 'between', 'beyond', 'by', 'close', 'down', 'east', 'west', 'north', 'south', 'eastward', 'westward', 'northward', 'southward', 'forward', 'backward', 'backwards', 'forwards', 'home', 'in', 'inside', 'instead', 'near', 'off', 'on', 'opposite', 'out', 'outside', 'over', 'overhead', 'past', 'round', 'since', 'through', 'throughout', 'together', 'under', 'underneath', 'up', 'within', 'without'];
+const phrasalVerbPrepositions = ['aback', 'about', 'above', 'across', 'after', 'against', 'ahead', 'along', 'among', 'apart', 'around', 'as', 'aside', 'at', 'away', 'back', 'before', 'behind', 'below', 'between', 'beyond', 'by', 'down', 'even', 'for', 'forth', 'forward', 'from', 'in', 'into', 'it', 'of', 'off', 'on', 'one', 'onto', 'open', 'out', 'over', 'past', 'round', 'through', 'to', 'together', 'toward', 'towards', 'under', 'up', 'upon', 'way', 'with', 'without'];
+
+const particlesDisjunction = phrasalVerbParticles.join('|');
+const phrasalVerbWordSet = new Set([...phrasalVerbParticles, ...phrasalVerbPrepositions]);
+const phrasalVerbWordDisjunction = [...phrasalVerbWordSet].join('|');
+/** @type {import('language-transformer').Rule} */
+const phrasalVerbInterposedObjectRule = {
+ type: 'other',
+ isInflected: new RegExp(`^\\w* (?:(?!\\b(${phrasalVerbWordDisjunction})\\b).)+ (?:${particlesDisjunction})`),
+ deinflect: (term) => {
+ return term.replace(new RegExp(`(?<=\\w) (?:(?!\\b(${phrasalVerbWordDisjunction})\\b).)+ (?=(?:${particlesDisjunction}))`), ' ');
+ },
+ conditionsIn: [],
+ conditionsOut: ['v']
+};
+
+/**
+ * @param {string} inflected
+ * @param {string} deinflected
+ * @returns {import('language-transformer').Rule}
+ */
+function createPhrasalVerbInflection(inflected, deinflected) {
+ return {
+ type: 'other',
+ isInflected: new RegExp(`^\\w*${inflected} (?:${phrasalVerbWordDisjunction})`),
+ deinflect: (term) => {
+ return term.replace(new RegExp(`(?<=)${inflected}(?= (?:${phrasalVerbWordDisjunction}))`), deinflected);
+ },
+ conditionsIn: [],
+ conditionsOut: ['v_phr']
+ };
+}
+
+/**
+ * @param {import('language-transformer').SuffixRule[]} sourceRules
+ * @returns {import('language-transformer').Rule[]}
+ */
+function createPhrasalVerbInflectionsFromSuffixInflections(sourceRules) {
+ return sourceRules.flatMap(({isInflected, deinflected}) => {
+ if (typeof deinflected === 'undefined') { return []; }
+ const inflectedSuffix = isInflected.source.replace('$', '');
+ const deinflectedSuffix = deinflected;
+ return [createPhrasalVerbInflection(inflectedSuffix, deinflectedSuffix)];
+ });
+}
+
+/** @type {import('language-transformer').LanguageTransformDescriptor} */
+export const englishTransforms = {
+ language: 'en',
+ conditions: {
+ v_any: {
+ name: 'Verb',
+ isDictionaryForm: false,
+ subConditions: ['v', 'v_irr', 'v_phr']
+ },
+ v: {
+ name: 'Regular verb',
+ isDictionaryForm: true
+ },
+ v_irr: {
+ name: 'Irregular verb',
+ isDictionaryForm: true
+ },
+ v_phr: {
+ name: 'Phrasal verb',
+ isDictionaryForm: true
+ },
+ n: {
+ name: 'Noun',
+ isDictionaryForm: true,
+ subConditions: ['np', 'ns']
+ },
+ np: {
+ name: 'Noun plural',
+ isDictionaryForm: true
+ },
+ ns: {
+ name: 'Noun singular',
+ isDictionaryForm: true
+ },
+ adj: {
+ name: 'Adjective',
+ isDictionaryForm: true
+ },
+ adv: {
+ name: 'Adverb',
+ isDictionaryForm: true
+ }
+ },
+ transforms: [
+ {
+ name: 'plural',
+ description: 'Plural form of a noun',
+ rules: [
+ suffixInflection('s', '', ['np'], ['ns']),
+ suffixInflection('es', '', ['np'], ['ns']),
+ suffixInflection('ies', 'y', ['np'], ['ns']),
+ suffixInflection('ves', 'fe', ['np'], ['ns']),
+ suffixInflection('ves', 'f', ['np'], ['ns'])
+ ]
+ },
+ {
+ name: 'possessive',
+ description: 'Possessive form of a noun',
+ rules: [
+ suffixInflection('\'s', '', ['n'], ['n']),
+ suffixInflection('s\'', 's', ['n'], ['n'])
+ ]
+ },
+ {
+ name: 'past',
+ description: 'Simple past tense of a verb',
+ rules: [
+ ...pastSuffixInflections,
+ ...createPhrasalVerbInflectionsFromSuffixInflections(pastSuffixInflections)
+ ]
+ },
+ {
+ name: 'ing',
+ description: 'Present participle of a verb',
+ rules: [
+ ...ingSuffixInflections,
+ ...createPhrasalVerbInflectionsFromSuffixInflections(ingSuffixInflections)
+ ]
+ },
+ {
+ name: '3rd pers. sing. pres',
+ description: 'Third person singular present tense of a verb',
+ rules: [
+ ...thirdPersonSgPresentSuffixInflections,
+ ...createPhrasalVerbInflectionsFromSuffixInflections(thirdPersonSgPresentSuffixInflections)
+ ]
+ },
+ {
+ name: 'interposed object',
+ description: 'Phrasal verb with interposed object',
+ rules: [
+ phrasalVerbInterposedObjectRule
+ ]
+ },
+ {
+ name: 'archaic',
+ description: 'Archaic form of a word',
+ rules: [
+ suffixInflection('\'d', 'ed', ['v'], ['v'])
+ ]
+ },
+ {
+ name: 'adverb',
+ description: 'Adverb form of an adjective',
+ rules: [
+ suffixInflection('ly', '', ['adv'], ['adj'])
+ ]
+ },
+ {
+ name: 'comparative',
+ description: 'Comparative form of an adjective',
+ rules: [
+ suffixInflection('er', '', ['adj'], ['adj']), // 'faster'
+ suffixInflection('er', 'e', ['adj'], ['adj']), // 'nicer'
+ suffixInflection('ier', 'y', ['adj'], ['adj']), // 'happier'
+ ...doubledConsonantInflection('bdgmnt', 'er', ['adj'], ['adj'])
+ ]
+ },
+ {
+ name: 'superlative',
+ description: 'Superlative form of an adjective',
+ rules: [
+ suffixInflection('est', '', ['adj'], ['adj']), // 'fastest'
+ suffixInflection('est', 'e', ['adj'], ['adj']), // 'nicest'
+ suffixInflection('iest', 'y', ['adj'], ['adj']), // 'happiest'
+ ...doubledConsonantInflection('bdgmnt', 'est', ['adj'], ['adj'])
+ ]
+ },
+ {
+ name: 'dropped g',
+ description: 'Dropped g in -ing form of a verb',
+ rules: [
+ suffixInflection('in\'', 'ing', ['v'], ['v'])
+ ]
+ },
+ {
+ name: '-y',
+ description: 'Adjective formed from a verb or noun',
+ rules: [
+ suffixInflection('y', '', ['adj'], ['n', 'v']), // 'dirty', 'pushy'
+ suffixInflection('y', 'e', ['adj'], ['n', 'v']), // 'hazy'
+ ...doubledConsonantInflection('glmnprst', 'y', [], ['n', 'v']) // 'baggy', 'saggy'
+ ]
+ },
+ {
+ name: 'un-',
+ description: 'Negative form of an adjective, adverb, or verb',
+ rules: [
+ suffixInflection('un', '', ['adj', 'adv', 'v'], ['adj', 'adv', 'v'])
+ ]
+ },
+ {
+ name: 'going-to future',
+ description: 'Going-to future tense of a verb',
+ rules: [
+ prefixInflection('going to ', '', ['v'], ['v'])
+ ]
+ },
+ {
+ name: 'will future',
+ description: 'Will-future tense of a verb',
+ rules: [
+ prefixInflection('will ', '', ['v'], ['v'])
+ ]
+ },
+ {
+ name: 'imperative negative',
+ description: 'Negative imperative form of a verb',
+ rules: [
+ prefixInflection('don\'t ', '', ['v'], ['v']),
+ prefixInflection('do not ', '', ['v'], ['v'])
+ ]
+ }
+ ]
+};
diff --git a/ext/js/language/en/english-transforms.json b/ext/js/language/en/english-transforms.json
deleted file mode 100644
index 623a4ddf..00000000
--- a/ext/js/language/en/english-transforms.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
- "language": "en",
- "conditions": {
- "v": {
- "name": "Verb",
- "isDictionaryForm": true
- },
- "n": {
- "name": "Noun",
- "isDictionaryForm": true,
- "subConditions": ["np", "ns"]
- },
- "np": {
- "name": "Noun plural",
- "isDictionaryForm": true
- },
- "ns": {
- "name": "Noun singular",
- "isDictionaryForm": true
- },
- "adj": {
- "name": "Adjective",
- "isDictionaryForm": true
- }
- },
- "transforms": [
- {
- "name": "plural",
- "description": "Plural form of a noun",
- "rules": [
- {"suffixIn": "s", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
- {"suffixIn": "es", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
- {"suffixIn": "ies", "suffixOut": "y", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
- {"suffixIn": "ves", "suffixOut": "fe", "conditionsIn": ["np"], "conditionsOut": ["ns"]},
- {"suffixIn": "ves", "suffixOut": "f", "conditionsIn": ["np"], "conditionsOut": ["ns"]}
- ]
- }
- ]
-}