From 62ac615450ef8b96fa9dd90b8b4e7fe486cc77a6 Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Fri, 23 Feb 2024 02:57:33 +0100 Subject: add more languages (#684) * Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * part1 * frotend done? * fix tests * offscreen and type complications * add tests * start fixing tests * keep fixing tests * fix tests * Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * delete pt * renames * add tests * kebab-case filenames * lint * minor fixes * merge * fixes * fix part of comments * fix more comments * delete unused types * comment * comment * do backend * other files * move fetch utils to own file * remove extra line * add extra line * remove unnecessary export * simplify folder structure * remove redundant async * fix param type in api * fix language index * undo changes to cssStyleApplier * undo changes to utilities.js * undo changes to utilities.js * simplify language util * lint * undo phantom changes to anki integration * require textTransformations options * explicit locale in localeCompare * punctuate notes * prefer early exit * rename LanguageOptionsObjectMap * rename to textPreprocessor * tuple with names instead of boolean array * safe data setting * optional chaining * simplify LanguageOptions * encapsulate languages * delete language util * nullable language in text preprocessors controller * rename transform to process * remove settings * make translation advanced again * remove unused getTextTransformations api call * comments * change language types * RIP flags * comments * fix tests * lint * Text preprocessor type changes (#10) * Add types * Update types * Simplify type check * Refactor typing and structuring of language definitions * lint * update translator benchmark * undo markdown changes * undo markdown changes * undo markdown changes * more merge * add more languages * wip * refactoring * fixes * add comment, delete settings text * remove language from jsconfig --------- Co-authored-by: toasted-nutbread Co-authored-by: Darius Jahandarie --- types/ext/language-descriptors.d.ts | 46 ++++++++++++++++++++++++++++++++----- types/ext/language.d.ts | 9 ++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) (limited to 'types') diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 00a95883..319a3ca5 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -15,7 +15,7 @@ * along with this program. If not, see . */ -import type {TextPreprocessor} from './language'; +import type {TextPreprocessor, BidirectionalConversionPreprocessor} from './language'; import type {SafeAny} from './core'; type LanguageDescriptor = { @@ -35,21 +35,55 @@ type LanguageDescriptorObjectMap = { export type LanguageDescriptorAny = LanguageDescriptorObjectMap[keyof LanguageDescriptorObjectMap]; +type CapitalizationPreprocessors = { + capitalizeFirstLetter: TextPreprocessor; + decapitalize: TextPreprocessor; +}; + /** * This is a mapping of the iso tag to all of the preprocessors for that language. * Any new language should be added to this object. */ type AllTextPreprocessors = { - en: { - capitalizeFirstLetter: TextPreprocessor; - decapitalize: TextPreprocessor; + ar: { + removeArabicScriptDiacritics: TextPreprocessor; + }; + de: CapitalizationPreprocessors & { + eszettPreprocessor: BidirectionalConversionPreprocessor; + }; + el: CapitalizationPreprocessors; + en: CapitalizationPreprocessors; + es: CapitalizationPreprocessors; + fa: { + removeArabicScriptDiacritics: TextPreprocessor; + }; + fr: CapitalizationPreprocessors; + grc: CapitalizationPreprocessors; + hu: CapitalizationPreprocessors; + id: CapitalizationPreprocessors; + it: CapitalizationPreprocessors; + la: { + removeLatinDiacritics: TextPreprocessor; }; ja: { convertHalfWidthCharacters: TextPreprocessor; convertNumericCharacters: TextPreprocessor; convertAlphabeticCharacters: TextPreprocessor; - convertHiraganaToKatakana: TextPreprocessor; - convertKatakanaToHiragana: TextPreprocessor; + convertHiraganaToKatakana: BidirectionalConversionPreprocessor; collapseEmphaticSequences: TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>; }; + km: Record; + pl: CapitalizationPreprocessors; + pt: CapitalizationPreprocessors; + ro: CapitalizationPreprocessors; + ru: CapitalizationPreprocessors & { + yoToE: TextPreprocessor; + removeRussianDiacritics: TextPreprocessor; + }; + sh: CapitalizationPreprocessors; + sq: CapitalizationPreprocessors; + sv: CapitalizationPreprocessors; + th: Record; + vi: CapitalizationPreprocessors; + zh: Record; }; diff --git a/types/ext/language.d.ts b/types/ext/language.d.ts index efbb16c6..8e5a5c70 100644 --- a/types/ext/language.d.ts +++ b/types/ext/language.d.ts @@ -21,6 +21,11 @@ export type TextPreprocessorOptions = T[]; export type TextPreprocessorFunction = (str: string, setting: T, sourceMap: TextSourceMap) => string; +/** + * Text preprocessors are used during the translation process to create alternate versions of the input text to search for. + * This is helpful when the input text doesn't exactly match the term or expression found in the database. + * When a language has multiple preprocessors, the translator will generate variants of the text by applying all combinations of the preprocessors. + */ export type TextPreprocessor = { name: string; description: string; @@ -28,6 +33,10 @@ export type TextPreprocessor = { process: TextPreprocessorFunction; }; +export type BidirectionalPreprocessorOptions = 'off' | 'direct' | 'inverse'; + +export type BidirectionalConversionPreprocessor = TextPreprocessor; + export type LanguageAndPreprocessors = { iso: string; textPreprocessors: TextPreprocessorWithId[]; -- cgit v1.2.3