aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Maa <jmaa@berkeley.edu>2024-05-23 15:23:10 -0700
committerGitHub <noreply@github.com>2024-05-23 22:23:10 +0000
commitd6aa6737821f5db61e932714322f2401f86b5200 (patch)
treea8d95dab4c7f6ebe1140bb894a919bc666761ab6
parentbbb19669c27a4216ae11937650da173165e72978 (diff)
Basic Spanish Transforms (#908)
* Spanish transforms * Add more spanish transforms * Address comments * Fix types * Undo prefix change
-rw-r--r--.eslintrc.json1
-rw-r--r--ext/js/core/log.js2
-rw-r--r--ext/js/language/es/spanish-transforms.js171
-rw-r--r--ext/js/language/language-descriptors.js4
-rw-r--r--ext/js/language/language-transformer.js2
-rw-r--r--ext/js/language/language-transforms.js18
-rw-r--r--test/language/spanish-transforms.test.js100
-rw-r--r--types/ext/language-transformer-internal.d.ts2
-rw-r--r--types/ext/language-transformer.d.ts2
9 files changed, 297 insertions, 5 deletions
diff --git a/.eslintrc.json b/.eslintrc.json
index 65ae89a6..ea741e5a 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -643,6 +643,7 @@
"ext/js/language/de/german-text-preprocessors.js",
"ext/js/language/de/german-transforms.js",
"ext/js/language/en/english-transforms.js",
+ "ext/js/language/es/spanish-transforms.js",
"ext/js/language/ja/japanese-text-preprocessors.js",
"ext/js/language/ja/japanese-transforms.js",
"ext/js/language/ja/japanese-wanakana.js",
diff --git a/ext/js/core/log.js b/ext/js/core/log.js
index 8401cc2b..cb714e70 100644
--- a/ext/js/core/log.js
+++ b/ext/js/core/log.js
@@ -73,7 +73,7 @@ class Logger extends EventDispatcher {
*/
logGenericError(error, level, context) {
if (typeof context === 'undefined') {
- context = {url: location.href};
+ context = typeof location === 'undefined' ? {url: 'unknown'} : {url: location.href};
}
let errorString;
diff --git a/ext/js/language/es/spanish-transforms.js b/ext/js/language/es/spanish-transforms.js
new file mode 100644
index 00000000..cf145f6a
--- /dev/null
+++ b/ext/js/language/es/spanish-transforms.js
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {suffixInflection, wholeWordInflection} from '../language-transforms.js';
+
+const ACCENTS = new Map([
+ ['a', 'á'],
+ ['e', 'é'],
+ ['i', 'í'],
+ ['o', 'ó'],
+ ['u', 'ú']
+]);
+
+
+/**
+ * @param {string} char
+ * @returns {string}
+ */
+function addAccent(char) {
+ return ACCENTS.get(char) || char;
+}
+
+/** @type {import('language-transformer').LanguageTransformDescriptor} */
+export const spanishTransforms = {
+ language: 'es',
+ conditions: {
+ v: {
+ name: 'Verb',
+ isDictionaryForm: true,
+ subConditions: ['v_ar', 'v_er', 'v_ir']
+ },
+ v_ar: {
+ name: '-ar verb',
+ isDictionaryForm: true
+ },
+ v_er: {
+ name: '-er verb',
+ isDictionaryForm: true
+ },
+ v_ir: {
+ name: '-ir verb',
+ isDictionaryForm: true
+ },
+ n: {
+ name: 'Noun',
+ isDictionaryForm: true,
+ subConditions: ['ns', 'np']
+ },
+ np: {
+ name: 'Noun plural',
+ isDictionaryForm: true
+ },
+ ns: {
+ name: 'Noun singular',
+ isDictionaryForm: true
+ },
+ adj: {
+ name: 'Adjective',
+ isDictionaryForm: true
+ }
+ },
+ transforms: [
+ {
+ name: 'plural',
+ description: 'Plural form of a noun',
+ rules: [
+ suffixInflection('s', '', ['np'], ['ns']),
+ suffixInflection('es', '', ['np'], ['ns']),
+ suffixInflection('ces', 'z', ['np'], ['ns']), // 'lápices' -> lápiz
+ ...[...'aeiou'].map((v) => suffixInflection(`${v}ses`, `${addAccent(v)}s`, ['np'], ['ns'])), // 'autobuses' -> autobús
+ ...[...'aeiou'].map((v) => suffixInflection(`${v}nes`, `${addAccent(v)}n`, ['np'], ['ns'])) // 'canciones' -> canción
+ ]
+ },
+ {
+ name: 'feminine adjective',
+ description: 'feminine form of an adjective',
+ rules: [
+ suffixInflection('a', 'o', ['adj'], ['adj'])
+ ]
+ },
+ {
+ name: 'present indicative',
+ description: 'Present indicative form of a verb',
+ rules: [
+ // -ar verbs
+ suffixInflection('o', 'ar', ['v'], ['v']),
+ suffixInflection('as', 'ar', ['v'], ['v']),
+ suffixInflection('a', 'ar', ['v'], ['v']),
+ suffixInflection('amos', 'ar', ['v'], ['v']),
+ suffixInflection('áis', 'ar', ['v'], ['v']),
+ suffixInflection('an', 'ar', ['v'], ['v']),
+ // -er verbs
+ suffixInflection('o', 'er', ['v'], ['v']),
+ suffixInflection('es', 'er', ['v'], ['v']),
+ suffixInflection('e', 'er', ['v'], ['v']),
+ suffixInflection('emos', 'er', ['v'], ['v']),
+ suffixInflection('éis', 'er', ['v'], ['v']),
+ suffixInflection('en', 'er', ['v'], ['v']),
+ // -ir verbs
+ suffixInflection('o', 'ir', ['v'], ['v']),
+ suffixInflection('es', 'ir', ['v'], ['v']),
+ suffixInflection('e', 'ir', ['v'], ['v']),
+ suffixInflection('imos', 'ir', ['v'], ['v']),
+ suffixInflection('ís', 'ir', ['v'], ['v']),
+ suffixInflection('en', 'ir', ['v'], ['v']),
+ // -tener verbs
+ suffixInflection('tengo', 'tener', ['v'], ['v']),
+ suffixInflection('tienes', 'tener', ['v'], ['v']),
+ suffixInflection('tiene', 'tener', ['v'], ['v']),
+ suffixInflection('tenemos', 'tener', ['v'], ['v']),
+ suffixInflection('tenéis', 'tener', ['v'], ['v']),
+ suffixInflection('tienen', 'tener', ['v'], ['v']),
+ // Verbs with Irregular Yo Forms
+ // -guir, -ger, or -gir verbs
+ suffixInflection('go', 'guir', ['v'], ['v']),
+ suffixInflection('jo', 'ger', ['v'], ['v']),
+ suffixInflection('jo', 'gir', ['v'], ['v']),
+ suffixInflection('aigo', 'aer', ['v'], ['v']),
+ suffixInflection('zco', 'cer', ['v'], ['v']),
+ suffixInflection('zco', 'cir', ['v'], ['v']),
+ suffixInflection('hago', 'hacer', ['v'], ['v']),
+ suffixInflection('pongo', 'poner', ['v'], ['v']),
+ suffixInflection('lgo', 'lir', ['v'], ['v']),
+ suffixInflection('lgo', 'ler', ['v'], ['v']),
+ wholeWordInflection('quepo', 'caber', ['v'], ['v']),
+ wholeWordInflection('doy', 'dar', ['v'], ['v']),
+ wholeWordInflection('sé', 'saber', ['v'], ['v']),
+ wholeWordInflection('veo', 'ver', ['v'], ['v']),
+ // Ser, estar, ir, haber
+ wholeWordInflection('soy', 'ser', ['v'], ['v']),
+ wholeWordInflection('eres', 'ser', ['v'], ['v']),
+ wholeWordInflection('es', 'ser', ['v'], ['v']),
+ wholeWordInflection('somos', 'ser', ['v'], ['v']),
+ wholeWordInflection('sois', 'ser', ['v'], ['v']),
+ wholeWordInflection('son', 'ser', ['v'], ['v']),
+ wholeWordInflection('estoy', 'estar', ['v'], ['v']),
+ wholeWordInflection('estás', 'estar', ['v'], ['v']),
+ wholeWordInflection('está', 'estar', ['v'], ['v']),
+ wholeWordInflection('estamos', 'estar', ['v'], ['v']),
+ wholeWordInflection('estáis', 'estar', ['v'], ['v']),
+ wholeWordInflection('están', 'estar', ['v'], ['v']),
+ wholeWordInflection('voy', 'ir', ['v'], ['v']),
+ wholeWordInflection('vas', 'ir', ['v'], ['v']),
+ wholeWordInflection('va', 'ir', ['v'], ['v']),
+ wholeWordInflection('vamos', 'ir', ['v'], ['v']),
+ wholeWordInflection('vais', 'ir', ['v'], ['v']),
+ wholeWordInflection('van', 'ir', ['v'], ['v']),
+ wholeWordInflection('he', 'haber', ['v'], ['v']),
+ wholeWordInflection('has', 'haber', ['v'], ['v']),
+ wholeWordInflection('ha', 'haber', ['v'], ['v']),
+ wholeWordInflection('hemos', 'haber', ['v'], ['v']),
+ wholeWordInflection('habéis', 'haber', ['v'], ['v']),
+ wholeWordInflection('han', 'haber', ['v'], ['v'])
+ ]
+ }
+ ]
+};
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index baf53f81..3a78aff5 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -19,6 +19,7 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
import {germanTransforms} from './de/german-transforms.js';
import {englishTransforms} from './en/english-transforms.js';
+import {spanishTransforms} from './es/spanish-transforms.js';
import {
alphabeticToHiragana,
alphanumericWidthVariants,
@@ -78,7 +79,8 @@ const languageDescriptors = [
iso: 'es',
name: 'Spanish',
exampleText: 'acabar de',
- textPreprocessors: capitalizationPreprocessors
+ textPreprocessors: capitalizationPreprocessors,
+ languageTransforms: spanishTransforms
},
{
iso: 'fa',
diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js
index 47f31b5f..f859ebf2 100644
--- a/ext/js/language/language-transformer.js
+++ b/ext/js/language/language-transformer.js
@@ -132,7 +132,7 @@ export class LanguageTransformer {
const isCycle = trace.some((frame) => frame.transform === name && frame.ruleIndex === j && frame.text === text);
if (isCycle) {
- log.warn(new Error(`Cycle detected in transform[${name}] rule[${j}] for text: ${text}`));
+ log.warn(new Error(`Cycle detected in transform[${name}] rule[${j}] for text: ${text}\nTrace: ${JSON.stringify(trace)}`));
continue;
}
diff --git a/ext/js/language/language-transforms.js b/ext/js/language/language-transforms.js
index ee8af88b..f3e36560 100644
--- a/ext/js/language/language-transforms.js
+++ b/ext/js/language/language-transforms.js
@@ -52,3 +52,21 @@ export function prefixInflection(inflectedPrefix, deinflectedPrefix, conditionsI
conditionsOut
};
}
+
+/**
+ * @param {string} inflectedWord
+ * @param {string} deinflectedWord
+ * @param {string[]} conditionsIn
+ * @param {string[]} conditionsOut
+ * @returns {import('language-transformer').Rule}
+ */
+export function wholeWordInflection(inflectedWord, deinflectedWord, conditionsIn, conditionsOut) {
+ const regex = new RegExp('^' + inflectedWord + '$');
+ return {
+ type: 'wholeWord',
+ isInflected: regex,
+ deinflect: () => deinflectedWord,
+ conditionsIn,
+ conditionsOut
+ };
+}
diff --git a/test/language/spanish-transforms.test.js b/test/language/spanish-transforms.test.js
new file mode 100644
index 00000000..7a6ab729
--- /dev/null
+++ b/test/language/spanish-transforms.test.js
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {spanishTransforms} from '../../ext/js/language/es/spanish-transforms.js';
+import {LanguageTransformer} from '../../ext/js/language/language-transformer.js';
+import {testLanguageTransformer} from '../fixtures/language-transformer-test.js';
+
+const tests = [
+ {
+ category: 'nouns',
+ valid: true,
+ tests: [
+ {term: 'gato', source: 'gatos', rule: 'ns', reasons: ['plural']},
+ {term: 'sofá', source: 'sofás', rule: 'ns', reasons: ['plural']},
+ {term: 'tisú', source: 'tisús', rule: 'ns', reasons: ['plural']},
+ {term: 'tisú', source: 'tisúes', rule: 'ns', reasons: ['plural']},
+ {term: 'autobús', source: 'autobuses', rule: 'ns', reasons: ['plural']},
+ {term: 'ciudad', source: 'ciudades', rule: 'ns', reasons: ['plural']},
+ {term: 'clic', source: 'clics', rule: 'ns', reasons: ['plural']},
+ {term: 'sí', source: 'síes', rule: 'ns', reasons: ['plural']},
+ {term: 'zigzag', source: 'zigzags', rule: 'ns', reasons: ['plural']},
+ {term: 'luz', source: 'luces', rule: 'ns', reasons: ['plural']},
+ {term: 'canción', source: 'canciones', rule: 'ns', reasons: ['plural']}
+ ]
+ },
+ {
+ category: 'feminine adjectives',
+ valid: true,
+ tests: [
+ {term: 'rojo', source: 'roja', rule: 'adj', reasons: ['feminine adjective']}
+ ]
+ },
+ {
+ category: 'present indicative verbs',
+ valid: true,
+ tests: [
+ {term: 'hablar', source: 'hablo', rule: 'v', reasons: ['present indicative']},
+ {term: 'hablar', source: 'hablas', rule: 'v', reasons: ['present indicative']},
+ {term: 'hablar', source: 'habla', rule: 'v', reasons: ['present indicative']},
+ {term: 'hablar', source: 'hablamos', rule: 'v', reasons: ['present indicative']},
+ {term: 'hablar', source: 'habláis', rule: 'v', reasons: ['present indicative']},
+ {term: 'hablar', source: 'hablan', rule: 'v', reasons: ['present indicative']},
+ {term: 'comer', source: 'como', rule: 'v', reasons: ['present indicative']},
+ {term: 'comer', source: 'comes', rule: 'v', reasons: ['present indicative']},
+ {term: 'comer', source: 'come', rule: 'v', reasons: ['present indicative']},
+ {term: 'comer', source: 'comemos', rule: 'v', reasons: ['present indicative']},
+ {term: 'comer', source: 'coméis', rule: 'v', reasons: ['present indicative']},
+ {term: 'comer', source: 'comen', rule: 'v', reasons: ['present indicative']},
+ {term: 'vivir', source: 'vivo', rule: 'v', reasons: ['present indicative']},
+ {term: 'vivir', source: 'vives', rule: 'v', reasons: ['present indicative']},
+ {term: 'vivir', source: 'vive', rule: 'v', reasons: ['present indicative']},
+ {term: 'vivir', source: 'vivimos', rule: 'v', reasons: ['present indicative']},
+ {term: 'vivir', source: 'vivís', rule: 'v', reasons: ['present indicative']},
+ {term: 'vivir', source: 'viven', rule: 'v', reasons: ['present indicative']},
+ {term: 'tener', source: 'tengo', rule: 'v', reasons: ['present indicative']},
+ {term: 'tener', source: 'tienes', rule: 'v', reasons: ['present indicative']},
+ {term: 'tener', source: 'tiene', rule: 'v', reasons: ['present indicative']},
+ {term: 'tener', source: 'tenemos', rule: 'v', reasons: ['present indicative']},
+ {term: 'tener', source: 'tenéis', rule: 'v', reasons: ['present indicative']},
+ {term: 'tener', source: 'tienen', rule: 'v', reasons: ['present indicative']},
+ {term: 'exigir', source: 'exijo', rule: 'v', reasons: ['present indicative']},
+ {term: 'extinguir', source: 'extingo', rule: 'v', reasons: ['present indicative']},
+ {term: 'escoger', source: 'escojo', rule: 'v', reasons: ['present indicative']},
+ {term: 'caber', source: 'quepo', rule: 'v', reasons: ['present indicative']},
+ {term: 'caer', source: 'caigo', rule: 'v', reasons: ['present indicative']},
+ {term: 'conocer', source: 'conozco', rule: 'v', reasons: ['present indicative']},
+ {term: 'dar', source: 'doy', rule: 'v', reasons: ['present indicative']},
+ {term: 'hacer', source: 'hago', rule: 'v', reasons: ['present indicative']},
+ {term: 'poner', source: 'pongo', rule: 'v', reasons: ['present indicative']},
+ {term: 'saber', source: 'sé', rule: 'v', reasons: ['present indicative']},
+ {term: 'salir', source: 'salgo', rule: 'v', reasons: ['present indicative']},
+ {term: 'traducir', source: 'traduzco', rule: 'v', reasons: ['present indicative']},
+ {term: 'traer', source: 'traigo', rule: 'v', reasons: ['present indicative']},
+ {term: 'valer', source: 'valgo', rule: 'v', reasons: ['present indicative']},
+ {term: 'ver', source: 'veo', rule: 'v', reasons: ['present indicative']},
+ {term: 'ser', source: 'soy', rule: 'v', reasons: ['present indicative']},
+ {term: 'estar', source: 'estoy', rule: 'v', reasons: ['present indicative']},
+ {term: 'ir', source: 'voy', rule: 'v', reasons: ['present indicative']},
+ {term: 'haber', source: 'he', rule: 'v', reasons: ['present indicative']}
+ ]
+ }
+];
+
+const languageTransformer = new LanguageTransformer();
+languageTransformer.addDescriptor(spanishTransforms);
+testLanguageTransformer(languageTransformer, tests);
diff --git a/types/ext/language-transformer-internal.d.ts b/types/ext/language-transformer-internal.d.ts
index 9ae412d3..a2b18b35 100644
--- a/types/ext/language-transformer-internal.d.ts
+++ b/types/ext/language-transformer-internal.d.ts
@@ -22,7 +22,7 @@ export type Transform = {
};
export type Rule = {
- type: 'suffix' | 'prefix' | 'other';
+ type: 'suffix' | 'prefix' | 'wholeWord' | 'other';
isInflected: RegExp;
deinflect: (inflectedWord: string) => string;
conditionsIn: number;
diff --git a/types/ext/language-transformer.d.ts b/types/ext/language-transformer.d.ts
index 95da602d..02457523 100644
--- a/types/ext/language-transformer.d.ts
+++ b/types/ext/language-transformer.d.ts
@@ -55,7 +55,7 @@ export type TransformI18n = {
};
export type Rule = {
- type: 'suffix' | 'prefix' | 'other';
+ type: 'suffix' | 'prefix' | 'wholeWord' | 'other';
isInflected: RegExp;
deinflect: (inflectedWord: string) => string;
conditionsIn: string[];