diff options
| author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-02-23 02:57:33 +0100 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-23 01:57:33 +0000 | 
| commit | 62ac615450ef8b96fa9dd90b8b4e7fe486cc77a6 (patch) | |
| tree | ef478b38fdb59caa078f1883ad50cff0e38a582a /ext/js/language/language-descriptors.js | |
| parent | 752a07b97c6d68a075a925c124ed420d621db02c (diff) | |
add more languages (#684)
* Copy functions from JapaneseUtil
* Remove JapaneseUtil
* Update usages of JapaneseUtil functions
* part1
* frotend done?
* fix tests
* offscreen and type complications
* add tests
* start fixing tests
* keep fixing tests
* fix tests
* Copy functions from JapaneseUtil
* Remove JapaneseUtil
* Update usages of JapaneseUtil functions
* delete pt
* renames
* add tests
* kebab-case filenames
* lint
* minor fixes
* merge
* fixes
* fix part of comments
* fix more comments
* delete unused types
* comment
* comment
* do backend
* other files
* move fetch utils to own file
* remove extra line
* add extra line
* remove unnecessary export
* simplify folder structure
* remove redundant async
* fix param type in api
* fix language index
* undo changes to cssStyleApplier
* undo changes to utilities.js
* undo changes to utilities.js
* simplify language util
* lint
* undo phantom changes to anki integration
* require textTransformations options
* explicit locale in localeCompare
* punctuate notes
* prefer early exit
* rename LanguageOptionsObjectMap
* rename to textPreprocessor
* tuple with names instead of boolean array
* safe data setting
* optional chaining
* simplify LanguageOptions
* encapsulate languages
* delete language util
* nullable language in text preprocessors controller
* rename transform to process
* remove settings
* make translation advanced again
* remove unused getTextTransformations api call
* comments
* change language types
* RIP flags
* comments
* fix tests
* lint
* Text preprocessor type changes (#10)
* Add types
* Update types
* Simplify type check
* Refactor typing and structuring of language definitions
* lint
* update translator benchmark
* undo markdown changes
* undo markdown changes
* undo markdown changes
* more merge
* add more languages
* wip
* refactoring
* fixes
* add comment, delete settings text
* remove language from jsconfig
---------
Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
Diffstat (limited to 'ext/js/language/language-descriptors.js')
| -rw-r--r-- | ext/js/language/language-descriptors.js | 158 | 
1 files changed, 154 insertions, 4 deletions
| diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index ee65a011..beb1417e 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -15,18 +15,99 @@   * along with this program.  If not, see <https://www.gnu.org/licenses/>.   */ -import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; +import {eszettPreprocessor} from './de/german-text-preprocessors.js'; +import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; +import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';  import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; +const capitalizationPreprocessors = { +    decapitalize, +    capitalizeFirstLetter +}; +  /** @type {import('language-descriptors').LanguageDescriptorAny[]} */  const languageDescriptors = [      { +        iso: 'ar', +        name: 'Arabic', +        exampleText: 'قَرَأَ', +        textPreprocessors: { +            removeArabicScriptDiacritics +        } +    }, +    { +        iso: 'de', +        name: 'German', +        exampleText: 'gelesen', +        textPreprocessors: { +            ...capitalizationPreprocessors, +            eszettPreprocessor +        } +    }, +    { +        iso: 'el', +        name: 'Greek', +        exampleText: 'διαβάζω', +        textPreprocessors: capitalizationPreprocessors +    }, +    {          iso: 'en',          name: 'English',          exampleText: 'read', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'es', +        name: 'Spanish', +        exampleText: 'acabar de', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'fa', +        name: 'Persian', +        exampleText: 'خواندن',          textPreprocessors: { -            capitalizeFirstLetter, -            decapitalize +            removeArabicScriptDiacritics +        } +    }, +    { +        iso: 'fr', +        name: 'French', +        exampleText: 'lire', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'grc', +        name: 'Ancient Greek', +        exampleText: 'γράφω', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'hu', +        name: 'Hungarian', +        exampleText: 'olvasni', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'id', +        name: 'Indonesian', +        exampleText: 'membaca', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'it', +        name: 'Italian', +        exampleText: 'leggere', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'la', +        name: 'Latin', +        exampleText: 'legere', +        textPreprocessors: { +            removeLatinDiacritics          }      },      { @@ -38,9 +119,78 @@ const languageDescriptors = [              convertNumericCharacters,              convertAlphabeticCharacters,              convertHiraganaToKatakana, -            convertKatakanaToHiragana,              collapseEmphaticSequences          } +    }, +    { +        iso: 'km', +        name: 'Khmer', +        exampleText: 'អាន', +        textPreprocessors: {} +    }, +    { +        iso: 'pl', +        name: 'Polish', +        exampleText: 'czytacie', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'pt', +        name: 'Portuguese', +        exampleText: 'ler', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'ro', +        name: 'Romanian', +        exampleText: 'citit', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'ru', +        name: 'Russian', +        exampleText: 'читать', +        textPreprocessors: { +            ...capitalizationPreprocessors, +            yoToE, +            removeRussianDiacritics +        } +    }, +    { +        iso: 'sh', +        name: 'Serbo-Croatian', +        exampleText: 'čitaše', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'sq', +        name: 'Albanian', +        exampleText: 'ndihmojme', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'sv', +        name: 'Swedish', +        exampleText: 'läsa', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'th', +        name: 'Thai', +        exampleText: 'อ่าน', +        textPreprocessors: {} +    }, +    { +        iso: 'vi', +        name: 'Vietnamese', +        exampleText: 'đọc', +        textPreprocessors: capitalizationPreprocessors +    }, +    { +        iso: 'zh', +        name: 'Chinese', +        exampleText: '读', +        textPreprocessors: {}      }  ]; |