From 82049f3439acae346767351827e0963a2586b702 Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Tue, 14 May 2024 13:32:57 +0200 Subject: add korean (#787) * prepare preprocessors * add postprocessors * add hangul functions (#13) * move hangul to lib, add test (#14) * add hangul functions * hangul lib * fix tests * test inputs * fix hangul disassemble, fix postprocessing * rename test * reset transformedText for multiple postprocessors * add credits * fix conflict --------- Co-authored-by: Stefan Vukovic * Vetting done (#16) * add hangul functions * hangul lib * fix tests * test inputs * fix hangul disassemble, fix postprocessing * rename test * reset transformedText for multiple postprocessors * add credits * fix conflict * add transforms * move substring * textprocessing function * mostly fixed * delete comment * mostly fixed (cherry picked from commit 268c8968ec64bf9d1fbd7d448a96d590399265c4) * prepare preprocessors (cherry picked from commit d139258398de9285396db31c6d46795ee95eb01a) * (reset) remove textSourceMap * rawSource before textreplacements * update tests * fix transforms * fix transforms * 100% vetted * add link to dict --------- Co-authored-by: Stefan Vukovic * update unit tests * update test --------- Co-authored-by: Lyroxide <33834537+Lyroxide@users.noreply.github.com> --- ext/js/language/language-descriptors.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'ext/js/language/language-descriptors.js') diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 313a5c31..54659683 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -22,6 +22,8 @@ import {englishTransforms} from './en/english-transforms.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; +import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js'; +import {koreanTransforms} from './ko/korean-transforms.js'; import {latinTransforms} from './la/latin-transforms.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {oldIrishTransforms} from './sga/old-irish-transforms.js'; @@ -148,6 +150,18 @@ const languageDescriptors = [ name: 'Khmer', exampleText: 'អាន' }, + { + iso: 'ko', + name: 'Korean', + exampleText: '읽어', + textPreprocessors: { + disassembleHangul + }, + textPostprocessors: { + reassembleHangul + }, + languageTransforms: koreanTransforms + }, { iso: 'nl', name: 'Dutch', -- cgit v1.2.3