diff options
author | Kuuuube <61125188+Kuuuube@users.noreply.github.com> | 2024-06-26 13:05:23 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-26 17:05:23 +0000 |
commit | dfc3710108fe2617a98fca0f57f5a2f6bb7d1830 (patch) | |
tree | ad402a4028c1b4f678262776c5a8b84d25e7dcf9 /ext/js/language/ja/japanese-text-preprocessors.js | |
parent | 2a92a0b98c2bb08c2adaca24ff8af3322874ef59 (diff) |
Add normalization of combining dakuten and handakuten to ja preprocessors (#1136)
* Add normalization of combining dakuten and handakuten to ja preprocessors
* Fix typo
* Remove redundant variable assignment
* Fix first character processed incorrectly when it is a character that gets combined
* Add test for combining dakuten and handakuten
Diffstat (limited to 'ext/js/language/ja/japanese-text-preprocessors.js')
-rw-r--r-- | ext/js/language/ja/japanese-text-preprocessors.js | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js index 2d0d23b3..cdd8ce9a 100644 --- a/ext/js/language/ja/japanese-text-preprocessors.js +++ b/ext/js/language/ja/japanese-text-preprocessors.js @@ -24,6 +24,7 @@ import { convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana as convertHiraganaToKatakanaFunction, convertKatakanaToHiragana as convertKatakanaToHiraganaFunction, + normalizeCombiningCharacters as normalizeCombiningCharactersFunction, } from './japanese.js'; /** @type {import('language').TextProcessor<boolean>} */ @@ -90,3 +91,11 @@ export const collapseEmphaticSequences = { return str; }, }; + +/** @type {import('language').TextProcessor<boolean>} */ +export const normalizeCombiningCharacters = { + name: 'Normalize combining characters', + description: 'ド → ド (U+30C8 U+3099 → U+30C9)', + options: basicTextProcessorOptions, + process: (str, setting) => (setting ? normalizeCombiningCharactersFunction(str) : str), +}; |