diff options
| author | Kuuuube <61125188+Kuuuube@users.noreply.github.com> | 2024-06-26 13:05:23 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-26 17:05:23 +0000 | 
| commit | dfc3710108fe2617a98fca0f57f5a2f6bb7d1830 (patch) | |
| tree | ad402a4028c1b4f678262776c5a8b84d25e7dcf9 /ext/js/language/language-descriptors.js | |
| parent | 2a92a0b98c2bb08c2adaca24ff8af3322874ef59 (diff) | |
Add normalization of combining dakuten and handakuten to ja preprocessors (#1136)
* Add normalization of combining dakuten and handakuten to ja preprocessors
* Fix typo
* Remove redundant variable assignment
* Fix first character processed incorrectly when it is a character that gets combined
* Add test for combining dakuten and handakuten
Diffstat (limited to 'ext/js/language/language-descriptors.js')
| -rw-r--r-- | ext/js/language/language-descriptors.js | 6 | 
1 files changed, 4 insertions, 2 deletions
| diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 7965ff30..f9fb4f09 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -26,6 +26,7 @@ import {      collapseEmphaticSequences,      convertHalfWidthCharacters,      convertHiraganaToKatakana, +    normalizeCombiningCharacters,  } from './ja/japanese-text-preprocessors.js';  import {japaneseTransforms} from './ja/japanese-transforms.js';  import {isStringPartiallyJapanese} from './ja/japanese.js'; @@ -36,9 +37,9 @@ import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js  import {oldIrishTransforms} from './sga/old-irish-transforms.js';  import {removeSerboCroatianAccentMarks} from './sh/serbo-croatian-text-preprocessors.js';  import {albanianTransforms} from './sq/albanian-transforms.js'; -import {normalizeDiacritics} from './vi/viet-text-preprocessors.js';  import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js'; -import {normalizePinyin, isStringPartiallyChinese} from './zh/chinese.js'; +import {normalizeDiacritics} from './vi/viet-text-preprocessors.js'; +import {isStringPartiallyChinese, normalizePinyin} from './zh/chinese.js';  const capitalizationPreprocessors = {      decapitalize, @@ -155,6 +156,7 @@ const languageDescriptors = [          textPreprocessors: {              convertHalfWidthCharacters,              alphabeticToHiragana, +            normalizeCombiningCharacters,              alphanumericWidthVariants,              convertHiraganaToKatakana,              collapseEmphaticSequences, |