add more languages (#684)

* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * part1 * frotend done? * fix tests * offscreen and type complications * add tests * start fixing tests * keep fixing tests * fix tests * Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * delete pt * renames * add tests * kebab-case filenames * lint * minor fixes * merge * fixes * fix part of comments * fix more comments * delete unused types * comment * comment * do backend * other files * move fetch utils to own file * remove extra line * add extra line * remove unnecessary export * simplify folder structure * remove redundant async * fix param type in api * fix language index * undo changes to cssStyleApplier * undo changes to utilities.js * undo changes to utilities.js * simplify language util * lint * undo phantom changes to anki integration * require textTransformations options * explicit locale in localeCompare * punctuate notes * prefer early exit * rename LanguageOptionsObjectMap * rename to textPreprocessor * tuple with names instead of boolean array * safe data setting * optional chaining * simplify LanguageOptions * encapsulate languages * delete language util * nullable language in text preprocessors controller * rename transform to process * remove settings * make translation advanced again * remove unused getTextTransformations api call * comments * change language types * RIP flags * comments * fix tests * lint * Text preprocessor type changes (#10) * Add types * Update types * Simplify type check * Refactor typing and structuring of language definitions * lint * update translator benchmark * undo markdown changes * undo markdown changes * undo markdown changes * more merge * add more languages * wip * refactoring * fixes * add comment, delete settings text * remove language from jsconfig --------- Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com> Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
author: StefanVukovic99 <stefanvukovic44@gmail.com> 2024-02-23 02:57:33 +0100
committer: GitHub <noreply@github.com> 2024-02-23 01:57:33 +0000
commit: 62ac615450ef8b96fa9dd90b8b4e7fe486cc77a6 (patch)
tree: ef478b38fdb59caa078f1883ad50cff0e38a582a /ext/js/language/language-descriptors.js
parent: 752a07b97c6d68a075a925c124ed420d621db02c (diff)
1 files changed, 154 insertions, 4 deletions
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index ee65a011..beb1417e 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -15,18 +15,99 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
-import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
+import {eszettPreprocessor} from './de/german-text-preprocessors.js';
+import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {removeLatinDiacritics} from './la/latin-text-preprocessors.js';
+import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
 import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js';
 
+const capitalizationPreprocessors = {
+    decapitalize,
+    capitalizeFirstLetter
+};
+
 /** @type {import('language-descriptors').LanguageDescriptorAny[]} */
 const languageDescriptors = [
     {
+        iso: 'ar',
+        name: 'Arabic',
+        exampleText: 'قَرَأَ',
+        textPreprocessors: {
+            removeArabicScriptDiacritics
+        }
+    },
+    {
+        iso: 'de',
+        name: 'German',
+        exampleText: 'gelesen',
+        textPreprocessors: {
+            ...capitalizationPreprocessors,
+            eszettPreprocessor
+        }
+    },
+    {
+        iso: 'el',
+        name: 'Greek',
+        exampleText: 'διαβάζω',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
         iso: 'en',
         name: 'English',
         exampleText: 'read',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'es',
+        name: 'Spanish',
+        exampleText: 'acabar de',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'fa',
+        name: 'Persian',
+        exampleText: 'خواندن',
         textPreprocessors: {
-            capitalizeFirstLetter,
-            decapitalize
+            removeArabicScriptDiacritics
+        }
+    },
+    {
+        iso: 'fr',
+        name: 'French',
+        exampleText: 'lire',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'grc',
+        name: 'Ancient Greek',
+        exampleText: 'γράφω',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'hu',
+        name: 'Hungarian',
+        exampleText: 'olvasni',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'id',
+        name: 'Indonesian',
+        exampleText: 'membaca',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'it',
+        name: 'Italian',
+        exampleText: 'leggere',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'la',
+        name: 'Latin',
+        exampleText: 'legere',
+        textPreprocessors: {
+            removeLatinDiacritics
         }
     },
     {
@@ -38,9 +119,78 @@ const languageDescriptors = [
             convertNumericCharacters,
             convertAlphabeticCharacters,
             convertHiraganaToKatakana,
-            convertKatakanaToHiragana,
             collapseEmphaticSequences
         }
+    },
+    {
+        iso: 'km',
+        name: 'Khmer',
+        exampleText: 'អាន',
+        textPreprocessors: {}
+    },
+    {
+        iso: 'pl',
+        name: 'Polish',
+        exampleText: 'czytacie',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'pt',
+        name: 'Portuguese',
+        exampleText: 'ler',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'ro',
+        name: 'Romanian',
+        exampleText: 'citit',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'ru',
+        name: 'Russian',
+        exampleText: 'читать',
+        textPreprocessors: {
+            ...capitalizationPreprocessors,
+            yoToE,
+            removeRussianDiacritics
+        }
+    },
+    {
+        iso: 'sh',
+        name: 'Serbo-Croatian',
+        exampleText: 'čitaše',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'sq',
+        name: 'Albanian',
+        exampleText: 'ndihmojme',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'sv',
+        name: 'Swedish',
+        exampleText: 'läsa',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'th',
+        name: 'Thai',
+        exampleText: 'อ่าน',
+        textPreprocessors: {}
+    },
+    {
+        iso: 'vi',
+        name: 'Vietnamese',
+        exampleText: 'đọc',
+        textPreprocessors: capitalizationPreprocessors
+    },
+    {
+        iso: 'zh',
+        name: 'Chinese',
+        exampleText: '读',
+        textPreprocessors: {}
     }
 ];
author	StefanVukovic99 <stefanvukovic44@gmail.com>	2024-02-23 02:57:33 +0100
committer	GitHub <noreply@github.com>	2024-02-23 01:57:33 +0000
commit	62ac615450ef8b96fa9dd90b8b4e7fe486cc77a6 (patch)
tree	ef478b38fdb59caa078f1883ad50cff0e38a582a /ext/js/language/language-descriptors.js
parent	752a07b97c6d68a075a925c124ed420d621db02c (diff)