summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-02-23 02:57:33 +0100
committerGitHub <noreply@github.com>2024-02-23 01:57:33 +0000
commit62ac615450ef8b96fa9dd90b8b4e7fe486cc77a6 (patch)
treeef478b38fdb59caa078f1883ad50cff0e38a582a /ext
parent752a07b97c6d68a075a925c124ed420d621db02c (diff)
add more languages (#684)
* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * part1 * frotend done? * fix tests * offscreen and type complications * add tests * start fixing tests * keep fixing tests * fix tests * Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * delete pt * renames * add tests * kebab-case filenames * lint * minor fixes * merge * fixes * fix part of comments * fix more comments * delete unused types * comment * comment * do backend * other files * move fetch utils to own file * remove extra line * add extra line * remove unnecessary export * simplify folder structure * remove redundant async * fix param type in api * fix language index * undo changes to cssStyleApplier * undo changes to utilities.js * undo changes to utilities.js * simplify language util * lint * undo phantom changes to anki integration * require textTransformations options * explicit locale in localeCompare * punctuate notes * prefer early exit * rename LanguageOptionsObjectMap * rename to textPreprocessor * tuple with names instead of boolean array * safe data setting * optional chaining * simplify LanguageOptions * encapsulate languages * delete language util * nullable language in text preprocessors controller * rename transform to process * remove settings * make translation advanced again * remove unused getTextTransformations api call * comments * change language types * RIP flags * comments * fix tests * lint * Text preprocessor type changes (#10) * Add types * Update types * Simplify type check * Refactor typing and structuring of language definitions * lint * update translator benchmark * undo markdown changes * undo markdown changes * undo markdown changes * more merge * add more languages * wip * refactoring * fixes * add comment, delete settings text * remove language from jsconfig --------- Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com> Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
Diffstat (limited to 'ext')
-rw-r--r--ext/js/language/ar/arabic-text-preprocessors.js28
-rw-r--r--ext/js/language/de/german-text-preprocessors.js34
-rw-r--r--ext/js/language/ja/japanese-text-preprocessors.js31
-rw-r--r--ext/js/language/la/latin-text-preprocessors.js56
-rw-r--r--ext/js/language/language-descriptors.js158
-rw-r--r--ext/js/language/ru/russian-text-preprocessors.js38
-rw-r--r--ext/settings.html29
7 files changed, 323 insertions, 51 deletions
diff --git a/ext/js/language/ar/arabic-text-preprocessors.js b/ext/js/language/ar/arabic-text-preprocessors.js
new file mode 100644
index 00000000..f0118564
--- /dev/null
+++ b/ext/js/language/ar/arabic-text-preprocessors.js
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const removeArabicScriptDiacritics = {
+ name: 'Remove diacritics',
+ description: 'وَلَدَ ⬅️ ولد',
+ options: basicTextPreprocessorOptions,
+ process: (text, setting) => {
+ return setting ? text.replace(/[\u064E-\u0650]/g, '') : text;
+ }
+};
diff --git a/ext/js/language/de/german-text-preprocessors.js b/ext/js/language/de/german-text-preprocessors.js
new file mode 100644
index 00000000..e829bf81
--- /dev/null
+++ b/ext/js/language/de/german-text-preprocessors.js
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+
+/** @type {import('language').BidirectionalConversionPreprocessor} */
+export const eszettPreprocessor = {
+ name: 'Convert "ß" to "ss"',
+ description: 'ß → ss, ẞ → SS and vice versa',
+ options: ['off', 'direct', 'inverse'],
+ process: (str, setting) => {
+ switch (setting) {
+ case 'off':
+ return str;
+ case 'direct':
+ return str.replace(/ẞ/g, 'SS').replace(/ß/g, 'ss');
+ case 'inverse':
+ return str.replace(/SS/g, 'ẞ').replace(/ss/g, 'ß');
+ }
+ }
+};
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js
index ab4138c3..06f944c1 100644
--- a/ext/js/language/ja/japanese-text-preprocessors.js
+++ b/ext/js/language/ja/japanese-text-preprocessors.js
@@ -30,7 +30,6 @@ export const convertHalfWidthCharacters = {
name: 'Convert half width characters to full width',
description: 'ヨミチャン → ヨミチャン',
options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str)
};
@@ -39,7 +38,6 @@ export const convertNumericCharacters = {
name: 'Convert numeric characters to full width',
description: '1234 → 1234',
options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)
};
@@ -48,26 +46,24 @@ export const convertAlphabeticCharacters = {
name: 'Convert alphabetic characters to hiragana',
description: 'yomichan → よみちゃん',
options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str)
};
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').BidirectionalConversionPreprocessor} */
export const convertHiraganaToKatakana = {
name: 'Convert hiragana to katakana',
- description: 'よみちゃん → ヨミチャン',
- options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
- process: (str, setting) => (setting ? convertHiraganaToKatakanaFunction(str) : str)
-};
-
-/** @type {import('language').TextPreprocessor<boolean>} */
-export const convertKatakanaToHiragana = {
- name: 'Convert katakana to hiragana',
- description: 'ヨミチャン → よみちゃん',
- options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
- process: (str, setting) => (setting ? convertKatakanaToHiraganaFunction(str) : str)
+ description: 'よみちゃん → ヨミチャン and vice versa',
+ options: ['off', 'direct', 'inverse'],
+ process: (str, setting) => {
+ switch (setting) {
+ case 'off':
+ return str;
+ case 'direct':
+ return convertHiraganaToKatakanaFunction(str);
+ case 'inverse':
+ return convertKatakanaToHiraganaFunction(str);
+ }
+ }
};
/** @type {import('language').TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
@@ -75,7 +71,6 @@ export const collapseEmphaticSequences = {
name: 'Collapse emphatic character sequences',
description: 'すっっごーーい → すっごーい / すごい',
options: [[false, false], [true, false], [true, true]],
- /** @type {import('language').TextPreprocessorFunction<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
process: (str, setting, sourceMap) => {
const [collapseEmphatic, collapseEmphaticFull] = setting;
if (collapseEmphatic) {
diff --git a/ext/js/language/la/latin-text-preprocessors.js b/ext/js/language/la/latin-text-preprocessors.js
new file mode 100644
index 00000000..ea6aae82
--- /dev/null
+++ b/ext/js/language/la/latin-text-preprocessors.js
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+
+/** @type {Record<string, string>} */
+const diacriticMap = {
+ ā: 'a',
+ ē: 'e',
+ ī: 'i',
+ ō: 'o',
+ ū: 'u',
+ ȳ: 'y',
+ Ā: 'A',
+ Ē: 'E',
+ Ī: 'I',
+ Ō: 'O',
+ Ū: 'U',
+ Ȳ: 'Y',
+ á: 'a',
+ é: 'e',
+ í: 'i',
+ ó: 'o',
+ ú: 'u',
+ ý: 'y',
+ Á: 'A',
+ É: 'E',
+ Í: 'I',
+ Ó: 'O',
+ Ú: 'U',
+ Ý: 'Y'
+};
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const removeLatinDiacritics = {
+ name: 'Remove diacritics',
+ description: 'āēīōūȳ → aeiouy, áéíóúý → aeiouy',
+ options: basicTextPreprocessorOptions,
+ process: (str, setting) => {
+ return setting ? str.replace(/[āēīōūȳáéíóúýĀĒĪŌŪȲÁÉÍÓÚÝ]/g, (match) => diacriticMap[match] || match) : str;
+ }
+};
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index ee65a011..beb1417e 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -15,18 +15,99 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
+import {eszettPreprocessor} from './de/german-text-preprocessors.js';
+import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {removeLatinDiacritics} from './la/latin-text-preprocessors.js';
+import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js';
+const capitalizationPreprocessors = {
+ decapitalize,
+ capitalizeFirstLetter
+};
+
/** @type {import('language-descriptors').LanguageDescriptorAny[]} */
const languageDescriptors = [
{
+ iso: 'ar',
+ name: 'Arabic',
+ exampleText: 'قَرَأَ',
+ textPreprocessors: {
+ removeArabicScriptDiacritics
+ }
+ },
+ {
+ iso: 'de',
+ name: 'German',
+ exampleText: 'gelesen',
+ textPreprocessors: {
+ ...capitalizationPreprocessors,
+ eszettPreprocessor
+ }
+ },
+ {
+ iso: 'el',
+ name: 'Greek',
+ exampleText: 'διαβάζω',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
iso: 'en',
name: 'English',
exampleText: 'read',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'es',
+ name: 'Spanish',
+ exampleText: 'acabar de',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'fa',
+ name: 'Persian',
+ exampleText: 'خواندن',
textPreprocessors: {
- capitalizeFirstLetter,
- decapitalize
+ removeArabicScriptDiacritics
+ }
+ },
+ {
+ iso: 'fr',
+ name: 'French',
+ exampleText: 'lire',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'grc',
+ name: 'Ancient Greek',
+ exampleText: 'γράφω',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'hu',
+ name: 'Hungarian',
+ exampleText: 'olvasni',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'id',
+ name: 'Indonesian',
+ exampleText: 'membaca',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'it',
+ name: 'Italian',
+ exampleText: 'leggere',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'la',
+ name: 'Latin',
+ exampleText: 'legere',
+ textPreprocessors: {
+ removeLatinDiacritics
}
},
{
@@ -38,9 +119,78 @@ const languageDescriptors = [
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
- convertKatakanaToHiragana,
collapseEmphaticSequences
}
+ },
+ {
+ iso: 'km',
+ name: 'Khmer',
+ exampleText: 'អាន',
+ textPreprocessors: {}
+ },
+ {
+ iso: 'pl',
+ name: 'Polish',
+ exampleText: 'czytacie',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'pt',
+ name: 'Portuguese',
+ exampleText: 'ler',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'ro',
+ name: 'Romanian',
+ exampleText: 'citit',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'ru',
+ name: 'Russian',
+ exampleText: 'читать',
+ textPreprocessors: {
+ ...capitalizationPreprocessors,
+ yoToE,
+ removeRussianDiacritics
+ }
+ },
+ {
+ iso: 'sh',
+ name: 'Serbo-Croatian',
+ exampleText: 'čitaše',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'sq',
+ name: 'Albanian',
+ exampleText: 'ndihmojme',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'sv',
+ name: 'Swedish',
+ exampleText: 'läsa',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'th',
+ name: 'Thai',
+ exampleText: 'อ่าน',
+ textPreprocessors: {}
+ },
+ {
+ iso: 'vi',
+ name: 'Vietnamese',
+ exampleText: 'đọc',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'zh',
+ name: 'Chinese',
+ exampleText: '读',
+ textPreprocessors: {}
}
];
diff --git a/ext/js/language/ru/russian-text-preprocessors.js b/ext/js/language/ru/russian-text-preprocessors.js
new file mode 100644
index 00000000..fc4472e9
--- /dev/null
+++ b/ext/js/language/ru/russian-text-preprocessors.js
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const removeRussianDiacritics = {
+ name: 'Remove diacritics',
+ description: 'A\u0301 → A, a\u0301 → a',
+ options: basicTextPreprocessorOptions,
+ process: (str, setting) => {
+ return setting ? str.replace(/\u0301/g, '') : str;
+ }
+};
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const yoToE = {
+ name: 'Yo to E',
+ description: 'ё → е, Ё → Е',
+ options: basicTextPreprocessorOptions,
+ process: (str, setting) => {
+ return setting ? str.replace(/ё/g, 'е').replace(/Ё/g, 'Е') : str;
+ }
+};
diff --git a/ext/settings.html b/ext/settings.html
index 999ecc37..441e26df 100644
--- a/ext/settings.html
+++ b/ext/settings.html
@@ -1493,35 +1493,6 @@
<div class="heading-container">
<div class="heading-container-icon"><span class="icon" data-icon="translation"></span></div>
<div class="heading-container-left"><h2 id="translation"><a href="#!translation">Translation</a></h2></div>
- <div class="heading-container-right"><a tabindex="0" class="more-toggle more-only heading-link-light" data-parent-distance="3">Info&hellip;</a></div>
- </div>
- <div class="heading-description more" hidden>
- <p>
- The following options are used during the translation process to create alternate versions of the input text to search for.
- This can be helpful when the input text doesn't exactly match the term or expression found in the database.
- </p>
- <p>
- The conversion options below are listed in the order that the conversions are applied to the input text.
- Most of the conversions have three possible values:
- </p>
- <ul>
- <li>
- <strong>Disabled</strong> -
- This conversion will never be applied to the input text.
- </li>
- <li>
- <strong>Enabled</strong> -
- This conversion will always be applied to the input text.
- </li>
- <li>
- <strong>Use both variants</strong> -
- The translator will check the database for two variations: the raw input text and the converted input text.
- When multiple options use variants, the translator will search for combinations of the converted text.
- </li>
- </ul>
- <p>
- <a tabindex="0" class="more-toggle" data-parent-distance="3">Less&hellip;</a>
- </p>
</div>
</div>
<div class="settings-group advanced-only">