diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-05-22 22:45:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-22 20:45:39 +0000 |
commit | d19b898792bffed8ab2d5724472e5b65a5f5b146 (patch) | |
tree | b3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f /ext/js/language/language-descriptors.js | |
parent | 125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff) |
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters
* try combining with numeric to improve performance
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* fix tests
---------
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Diffstat (limited to 'ext/js/language/language-descriptors.js')
-rw-r--r-- | ext/js/language/language-descriptors.js | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 726842f1..baf53f81 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; import {eszettPreprocessor} from './de/german-text-preprocessors.js'; import {germanTransforms} from './de/german-transforms.js'; import {englishTransforms} from './en/english-transforms.js'; -import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import { + alphabeticToHiragana, + alphanumericWidthVariants, + collapseEmphaticSequences, + convertHalfWidthCharacters, + convertHiraganaToKatakana +} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js'; @@ -143,8 +149,8 @@ const languageDescriptors = [ isTextLookupWorthy: isStringPartiallyJapanese, textPreprocessors: { convertHalfWidthCharacters, - convertNumericCharacters, - convertAlphabeticCharacters, + alphabeticToHiragana, + alphanumericWidthVariants, convertHiraganaToKatakana, collapseEmphaticSequences }, |