summaryrefslogtreecommitdiff
path: root/ext/js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/js')
-rw-r--r--ext/js/language/ar/arabic-text-preprocessors.js28
-rw-r--r--ext/js/language/de/german-text-preprocessors.js34
-rw-r--r--ext/js/language/ja/japanese-text-preprocessors.js31
-rw-r--r--ext/js/language/la/latin-text-preprocessors.js56
-rw-r--r--ext/js/language/language-descriptors.js158
-rw-r--r--ext/js/language/ru/russian-text-preprocessors.js38
6 files changed, 323 insertions, 22 deletions
diff --git a/ext/js/language/ar/arabic-text-preprocessors.js b/ext/js/language/ar/arabic-text-preprocessors.js
new file mode 100644
index 00000000..f0118564
--- /dev/null
+++ b/ext/js/language/ar/arabic-text-preprocessors.js
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const removeArabicScriptDiacritics = {
+ name: 'Remove diacritics',
+ description: 'وَلَدَ ⬅️ ولد',
+ options: basicTextPreprocessorOptions,
+ process: (text, setting) => {
+ return setting ? text.replace(/[\u064E-\u0650]/g, '') : text;
+ }
+};
diff --git a/ext/js/language/de/german-text-preprocessors.js b/ext/js/language/de/german-text-preprocessors.js
new file mode 100644
index 00000000..e829bf81
--- /dev/null
+++ b/ext/js/language/de/german-text-preprocessors.js
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+
+/** @type {import('language').BidirectionalConversionPreprocessor} */
+export const eszettPreprocessor = {
+ name: 'Convert "ß" to "ss"',
+ description: 'ß → ss, ẞ → SS and vice versa',
+ options: ['off', 'direct', 'inverse'],
+ process: (str, setting) => {
+ switch (setting) {
+ case 'off':
+ return str;
+ case 'direct':
+ return str.replace(/ẞ/g, 'SS').replace(/ß/g, 'ss');
+ case 'inverse':
+ return str.replace(/SS/g, 'ẞ').replace(/ss/g, 'ß');
+ }
+ }
+};
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js
index ab4138c3..06f944c1 100644
--- a/ext/js/language/ja/japanese-text-preprocessors.js
+++ b/ext/js/language/ja/japanese-text-preprocessors.js
@@ -30,7 +30,6 @@ export const convertHalfWidthCharacters = {
name: 'Convert half width characters to full width',
description: 'ヨミチャン → ヨミチャン',
options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str)
};
@@ -39,7 +38,6 @@ export const convertNumericCharacters = {
name: 'Convert numeric characters to full width',
description: '1234 → 1234',
options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)
};
@@ -48,26 +46,24 @@ export const convertAlphabeticCharacters = {
name: 'Convert alphabetic characters to hiragana',
description: 'yomichan → よみちゃん',
options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str)
};
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').BidirectionalConversionPreprocessor} */
export const convertHiraganaToKatakana = {
name: 'Convert hiragana to katakana',
- description: 'よみちゃん → ヨミチャン',
- options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
- process: (str, setting) => (setting ? convertHiraganaToKatakanaFunction(str) : str)
-};
-
-/** @type {import('language').TextPreprocessor<boolean>} */
-export const convertKatakanaToHiragana = {
- name: 'Convert katakana to hiragana',
- description: 'ヨミチャン → よみちゃん',
- options: basicTextPreprocessorOptions,
- /** @type {import('language').TextPreprocessorFunction<boolean>} */
- process: (str, setting) => (setting ? convertKatakanaToHiraganaFunction(str) : str)
+ description: 'よみちゃん → ヨミチャン and vice versa',
+ options: ['off', 'direct', 'inverse'],
+ process: (str, setting) => {
+ switch (setting) {
+ case 'off':
+ return str;
+ case 'direct':
+ return convertHiraganaToKatakanaFunction(str);
+ case 'inverse':
+ return convertKatakanaToHiraganaFunction(str);
+ }
+ }
};
/** @type {import('language').TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
@@ -75,7 +71,6 @@ export const collapseEmphaticSequences = {
name: 'Collapse emphatic character sequences',
description: 'すっっごーーい → すっごーい / すごい',
options: [[false, false], [true, false], [true, true]],
- /** @type {import('language').TextPreprocessorFunction<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
process: (str, setting, sourceMap) => {
const [collapseEmphatic, collapseEmphaticFull] = setting;
if (collapseEmphatic) {
diff --git a/ext/js/language/la/latin-text-preprocessors.js b/ext/js/language/la/latin-text-preprocessors.js
new file mode 100644
index 00000000..ea6aae82
--- /dev/null
+++ b/ext/js/language/la/latin-text-preprocessors.js
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+
+/** @type {Record<string, string>} */
+const diacriticMap = {
+ ā: 'a',
+ ē: 'e',
+ ī: 'i',
+ ō: 'o',
+ ū: 'u',
+ ȳ: 'y',
+ Ā: 'A',
+ Ē: 'E',
+ Ī: 'I',
+ Ō: 'O',
+ Ū: 'U',
+ Ȳ: 'Y',
+ á: 'a',
+ é: 'e',
+ í: 'i',
+ ó: 'o',
+ ú: 'u',
+ ý: 'y',
+ Á: 'A',
+ É: 'E',
+ Í: 'I',
+ Ó: 'O',
+ Ú: 'U',
+ Ý: 'Y'
+};
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const removeLatinDiacritics = {
+ name: 'Remove diacritics',
+ description: 'āēīōūȳ → aeiouy, áéíóúý → aeiouy',
+ options: basicTextPreprocessorOptions,
+ process: (str, setting) => {
+ return setting ? str.replace(/[āēīōūȳáéíóúýĀĒĪŌŪȲÁÉÍÓÚÝ]/g, (match) => diacriticMap[match] || match) : str;
+ }
+};
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index ee65a011..beb1417e 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -15,18 +15,99 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
+import {eszettPreprocessor} from './de/german-text-preprocessors.js';
+import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {removeLatinDiacritics} from './la/latin-text-preprocessors.js';
+import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js';
+const capitalizationPreprocessors = {
+ decapitalize,
+ capitalizeFirstLetter
+};
+
/** @type {import('language-descriptors').LanguageDescriptorAny[]} */
const languageDescriptors = [
{
+ iso: 'ar',
+ name: 'Arabic',
+ exampleText: 'قَرَأَ',
+ textPreprocessors: {
+ removeArabicScriptDiacritics
+ }
+ },
+ {
+ iso: 'de',
+ name: 'German',
+ exampleText: 'gelesen',
+ textPreprocessors: {
+ ...capitalizationPreprocessors,
+ eszettPreprocessor
+ }
+ },
+ {
+ iso: 'el',
+ name: 'Greek',
+ exampleText: 'διαβάζω',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
iso: 'en',
name: 'English',
exampleText: 'read',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'es',
+ name: 'Spanish',
+ exampleText: 'acabar de',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'fa',
+ name: 'Persian',
+ exampleText: 'خواندن',
textPreprocessors: {
- capitalizeFirstLetter,
- decapitalize
+ removeArabicScriptDiacritics
+ }
+ },
+ {
+ iso: 'fr',
+ name: 'French',
+ exampleText: 'lire',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'grc',
+ name: 'Ancient Greek',
+ exampleText: 'γράφω',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'hu',
+ name: 'Hungarian',
+ exampleText: 'olvasni',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'id',
+ name: 'Indonesian',
+ exampleText: 'membaca',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'it',
+ name: 'Italian',
+ exampleText: 'leggere',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'la',
+ name: 'Latin',
+ exampleText: 'legere',
+ textPreprocessors: {
+ removeLatinDiacritics
}
},
{
@@ -38,9 +119,78 @@ const languageDescriptors = [
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
- convertKatakanaToHiragana,
collapseEmphaticSequences
}
+ },
+ {
+ iso: 'km',
+ name: 'Khmer',
+ exampleText: 'អាន',
+ textPreprocessors: {}
+ },
+ {
+ iso: 'pl',
+ name: 'Polish',
+ exampleText: 'czytacie',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'pt',
+ name: 'Portuguese',
+ exampleText: 'ler',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'ro',
+ name: 'Romanian',
+ exampleText: 'citit',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'ru',
+ name: 'Russian',
+ exampleText: 'читать',
+ textPreprocessors: {
+ ...capitalizationPreprocessors,
+ yoToE,
+ removeRussianDiacritics
+ }
+ },
+ {
+ iso: 'sh',
+ name: 'Serbo-Croatian',
+ exampleText: 'čitaše',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'sq',
+ name: 'Albanian',
+ exampleText: 'ndihmojme',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'sv',
+ name: 'Swedish',
+ exampleText: 'läsa',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'th',
+ name: 'Thai',
+ exampleText: 'อ่าน',
+ textPreprocessors: {}
+ },
+ {
+ iso: 'vi',
+ name: 'Vietnamese',
+ exampleText: 'đọc',
+ textPreprocessors: capitalizationPreprocessors
+ },
+ {
+ iso: 'zh',
+ name: 'Chinese',
+ exampleText: '读',
+ textPreprocessors: {}
}
];
diff --git a/ext/js/language/ru/russian-text-preprocessors.js b/ext/js/language/ru/russian-text-preprocessors.js
new file mode 100644
index 00000000..fc4472e9
--- /dev/null
+++ b/ext/js/language/ru/russian-text-preprocessors.js
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const removeRussianDiacritics = {
+ name: 'Remove diacritics',
+ description: 'A\u0301 → A, a\u0301 → a',
+ options: basicTextPreprocessorOptions,
+ process: (str, setting) => {
+ return setting ? str.replace(/\u0301/g, '') : str;
+ }
+};
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const yoToE = {
+ name: 'Yo to E',
+ description: 'ё → е, Ё → Е',
+ options: basicTextPreprocessorOptions,
+ process: (str, setting) => {
+ return setting ? str.replace(/ё/g, 'е').replace(/Ё/g, 'Е') : str;
+ }
+};