summaryrefslogtreecommitdiff
path: root/ext/js/language/language-descriptors.js
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-05-22 22:45:39 +0200
committerGitHub <noreply@github.com>2024-05-22 20:45:39 +0000
commitd19b898792bffed8ab2d5724472e5b65a5f5b146 (patch)
treeb3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f /ext/js/language/language-descriptors.js
parent125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff)
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters * try combining with numeric to improve performance * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * fix tests --------- Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Diffstat (limited to 'ext/js/language/language-descriptors.js')
-rw-r--r--ext/js/language/language-descriptors.js12
1 files changed, 9 insertions, 3 deletions
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index 726842f1..baf53f81 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
import {germanTransforms} from './de/german-transforms.js';
import {englishTransforms} from './en/english-transforms.js';
-import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {
+ alphabeticToHiragana,
+ alphanumericWidthVariants,
+ collapseEmphaticSequences,
+ convertHalfWidthCharacters,
+ convertHiraganaToKatakana
+} from './ja/japanese-text-preprocessors.js';
import {japaneseTransforms} from './ja/japanese-transforms.js';
import {isStringPartiallyJapanese} from './ja/japanese.js';
import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js';
@@ -143,8 +149,8 @@ const languageDescriptors = [
isTextLookupWorthy: isStringPartiallyJapanese,
textPreprocessors: {
convertHalfWidthCharacters,
- convertNumericCharacters,
- convertAlphabeticCharacters,
+ alphabeticToHiragana,
+ alphanumericWidthVariants,
convertHiraganaToKatakana,
collapseEmphaticSequences
},