diff options
Diffstat (limited to 'ext/js')
| -rw-r--r-- | ext/js/language/ar/arabic-text-preprocessors.js | 28 | ||||
| -rw-r--r-- | ext/js/language/de/german-text-preprocessors.js | 34 | ||||
| -rw-r--r-- | ext/js/language/ja/japanese-text-preprocessors.js | 31 | ||||
| -rw-r--r-- | ext/js/language/la/latin-text-preprocessors.js | 56 | ||||
| -rw-r--r-- | ext/js/language/language-descriptors.js | 158 | ||||
| -rw-r--r-- | ext/js/language/ru/russian-text-preprocessors.js | 38 | 
6 files changed, 323 insertions, 22 deletions
| diff --git a/ext/js/language/ar/arabic-text-preprocessors.js b/ext/js/language/ar/arabic-text-preprocessors.js new file mode 100644 index 00000000..f0118564 --- /dev/null +++ b/ext/js/language/ar/arabic-text-preprocessors.js @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; + +/** @type {import('language').TextPreprocessor<boolean>} */ +export const removeArabicScriptDiacritics = { +    name: 'Remove diacritics', +    description: 'وَلَدَ ⬅️ ولد', +    options: basicTextPreprocessorOptions, +    process: (text, setting) => { +        return setting ? text.replace(/[\u064E-\u0650]/g, '') : text; +    } +}; diff --git a/ext/js/language/de/german-text-preprocessors.js b/ext/js/language/de/german-text-preprocessors.js new file mode 100644 index 00000000..e829bf81 --- /dev/null +++ b/ext/js/language/de/german-text-preprocessors.js @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + + +/** @type {import('language').BidirectionalConversionPreprocessor} */ +export const eszettPreprocessor = { +    name: 'Convert "ß" to "ss"', +    description: 'ß → ss, ẞ → SS and vice versa', +    options: ['off', 'direct', 'inverse'], +    process: (str, setting) => { +        switch (setting) { +            case 'off': +                return str; +            case 'direct': +                return str.replace(/ẞ/g, 'SS').replace(/ß/g, 'ss'); +            case 'inverse': +                return str.replace(/SS/g, 'ẞ').replace(/ss/g, 'ß'); +        } +    } +}; diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js index ab4138c3..06f944c1 100644 --- a/ext/js/language/ja/japanese-text-preprocessors.js +++ b/ext/js/language/ja/japanese-text-preprocessors.js @@ -30,7 +30,6 @@ export const convertHalfWidthCharacters = {      name: 'Convert half width characters to full width',      description: 'ヨミチャン → ヨミチャン',      options: basicTextPreprocessorOptions, -    /** @type {import('language').TextPreprocessorFunction<boolean>} */      process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str)  }; @@ -39,7 +38,6 @@ export const convertNumericCharacters = {      name: 'Convert numeric characters to full width',      description: '1234 → 1234',      options: basicTextPreprocessorOptions, -    /** @type {import('language').TextPreprocessorFunction<boolean>} */      process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)  }; @@ -48,26 +46,24 @@ export const convertAlphabeticCharacters = {      name: 'Convert alphabetic characters to hiragana',      description: 'yomichan → よみちゃん',      options: basicTextPreprocessorOptions, -    /** @type {import('language').TextPreprocessorFunction<boolean>} */      process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str)  }; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').BidirectionalConversionPreprocessor} */  export const convertHiraganaToKatakana = {      name: 'Convert hiragana to katakana', -    description: 'よみちゃん → ヨミチャン', -    options: basicTextPreprocessorOptions, -    /** @type {import('language').TextPreprocessorFunction<boolean>} */ -    process: (str, setting) => (setting ? convertHiraganaToKatakanaFunction(str) : str) -}; - -/** @type {import('language').TextPreprocessor<boolean>} */ -export const convertKatakanaToHiragana = { -    name: 'Convert katakana to hiragana', -    description: 'ヨミチャン → よみちゃん', -    options: basicTextPreprocessorOptions, -    /** @type {import('language').TextPreprocessorFunction<boolean>} */ -    process: (str, setting) => (setting ? convertKatakanaToHiraganaFunction(str) : str) +    description: 'よみちゃん → ヨミチャン and vice versa', +    options: ['off', 'direct', 'inverse'], +    process: (str, setting) => { +        switch (setting) { +            case 'off': +                return str; +            case 'direct': +                return convertHiraganaToKatakanaFunction(str); +            case 'inverse': +                return convertKatakanaToHiraganaFunction(str); +        } +    }  };  /** @type {import('language').TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */ @@ -75,7 +71,6 @@ export const collapseEmphaticSequences = {      name: 'Collapse emphatic character sequences',      description: 'すっっごーーい → すっごーい / すごい',      options: [[false, false], [true, false], [true, true]], -    /** @type {import('language').TextPreprocessorFunction<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */      process: (str, setting, sourceMap) => {          const [collapseEmphatic, collapseEmphaticFull] = setting;          if (collapseEmphatic) { diff --git a/ext/js/language/la/latin-text-preprocessors.js b/ext/js/language/la/latin-text-preprocessors.js new file mode 100644 index 00000000..ea6aae82 --- /dev/null +++ b/ext/js/language/la/latin-text-preprocessors.js @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; + +/** @type {Record<string, string>} */ +const diacriticMap = { +    ā: 'a', +    ē: 'e', +    ī: 'i', +    ō: 'o', +    ū: 'u', +    ȳ: 'y', +    Ā: 'A', +    Ē: 'E', +    Ī: 'I', +    Ō: 'O', +    Ū: 'U', +    Ȳ: 'Y', +    á: 'a', +    é: 'e', +    í: 'i', +    ó: 'o', +    ú: 'u', +    ý: 'y', +    Á: 'A', +    É: 'E', +    Í: 'I', +    Ó: 'O', +    Ú: 'U', +    Ý: 'Y' +}; + +/** @type {import('language').TextPreprocessor<boolean>} */ +export const removeLatinDiacritics = { +    name: 'Remove diacritics', +    description: 'āēīōūȳ → aeiouy, áéíóúý → aeiouy', +    options: basicTextPreprocessorOptions, +    process: (str, setting) => { +        return setting ? str.replace(/[āēīōūȳáéíóúýĀĒĪŌŪȲÁÉÍÓÚÝ]/g, (match) => diacriticMap[match] || match) : str; +    } +}; diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index ee65a011..beb1417e 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -15,18 +15,99 @@   * along with this program.  If not, see <https://www.gnu.org/licenses/>.   */ -import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; +import {eszettPreprocessor} from './de/german-text-preprocessors.js'; +import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; +import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';  import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; +const capitalizationPreprocessors = { +    decapitalize, +    capitalizeFirstLetter +}; +  /** @type {import('language-descriptors').LanguageDescriptorAny[]} */  const languageDescriptors = [      { +        iso: 'ar', +        name: 'Arabic', +        exampleText: 'قَرَأَ', +        textPreprocessors: { +            removeArabicScriptDiacritics +        } +    }, +    { +        iso: 'de', +        name: 'German', +        exampleText: 'gelesen', +        textPreprocessors: { +            ...capitalizationPreprocessors, +            eszettPreprocessor +        } +    }, +    { +        iso: 'el', +        name: 'Greek', +        exampleText: 'διαβάζω', +        textPreprocessors: capitalizationPreprocessors +    }, +    {          iso: 'en',          name: 'English',          exampleText: 'read', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'es', +        name: 'Spanish', +        exampleText: 'acabar de', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'fa', +        name: 'Persian', +        exampleText: 'خواندن',          textPreprocessors: { -            capitalizeFirstLetter, -            decapitalize +            removeArabicScriptDiacritics +        } +    }, +    { +        iso: 'fr', +        name: 'French', +        exampleText: 'lire', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'grc', +        name: 'Ancient Greek', +        exampleText: 'γράφω', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'hu', +        name: 'Hungarian', +        exampleText: 'olvasni', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'id', +        name: 'Indonesian', +        exampleText: 'membaca', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'it', +        name: 'Italian', +        exampleText: 'leggere', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'la', +        name: 'Latin', +        exampleText: 'legere', +        textPreprocessors: { +            removeLatinDiacritics          }      },      { @@ -38,9 +119,78 @@ const languageDescriptors = [              convertNumericCharacters,              convertAlphabeticCharacters,              convertHiraganaToKatakana, -            convertKatakanaToHiragana,              collapseEmphaticSequences          } +    }, +    { +        iso: 'km', +        name: 'Khmer', +        exampleText: 'អាន', +        textPreprocessors: {} +    }, +    { +        iso: 'pl', +        name: 'Polish', +        exampleText: 'czytacie', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'pt', +        name: 'Portuguese', +        exampleText: 'ler', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'ro', +        name: 'Romanian', +        exampleText: 'citit', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'ru', +        name: 'Russian', +        exampleText: 'читать', +        textPreprocessors: { +            ...capitalizationPreprocessors, +            yoToE, +            removeRussianDiacritics +        } +    }, +    { +        iso: 'sh', +        name: 'Serbo-Croatian', +        exampleText: 'čitaše', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'sq', +        name: 'Albanian', +        exampleText: 'ndihmojme', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'sv', +        name: 'Swedish', +        exampleText: 'läsa', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'th', +        name: 'Thai', +        exampleText: 'อ่าน', +        textPreprocessors: {} +    }, +    { +        iso: 'vi', +        name: 'Vietnamese', +        exampleText: 'đọc', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'zh', +        name: 'Chinese', +        exampleText: '读', +        textPreprocessors: {}      }  ]; diff --git a/ext/js/language/ru/russian-text-preprocessors.js b/ext/js/language/ru/russian-text-preprocessors.js new file mode 100644 index 00000000..fc4472e9 --- /dev/null +++ b/ext/js/language/ru/russian-text-preprocessors.js @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; + +/** @type {import('language').TextPreprocessor<boolean>} */ +export const removeRussianDiacritics = { +    name: 'Remove diacritics', +    description: 'A\u0301 → A, a\u0301 → a', +    options: basicTextPreprocessorOptions, +    process: (str, setting) => { +        return setting ? str.replace(/\u0301/g, '') : str; +    } +}; + +/** @type {import('language').TextPreprocessor<boolean>} */ +export const yoToE = { +    name: 'Yo to E', +    description: 'ё → е, Ё → Е', +    options: basicTextPreprocessorOptions, +    process: (str, setting) => { +        return setting ? str.replace(/ё/g, 'е').replace(/Ё/g, 'Е') : str; +    } +}; |