summaryrefslogtreecommitdiff
path: root/ext/js/language/ja/japanese-text-preprocessors.js
diff options
context:
space:
mode:
authorKuuuube <61125188+Kuuuube@users.noreply.github.com>2024-06-26 13:05:23 -0400
committerGitHub <noreply@github.com>2024-06-26 17:05:23 +0000
commitdfc3710108fe2617a98fca0f57f5a2f6bb7d1830 (patch)
treead402a4028c1b4f678262776c5a8b84d25e7dcf9 /ext/js/language/ja/japanese-text-preprocessors.js
parent2a92a0b98c2bb08c2adaca24ff8af3322874ef59 (diff)
Add normalization of combining dakuten and handakuten to ja preprocessors (#1136)
* Add normalization of combining dakuten and handakuten to ja preprocessors * Fix typo * Remove redundant variable assignment * Fix first character processed incorrectly when it is a character that gets combined * Add test for combining dakuten and handakuten
Diffstat (limited to 'ext/js/language/ja/japanese-text-preprocessors.js')
-rw-r--r--ext/js/language/ja/japanese-text-preprocessors.js9
1 files changed, 9 insertions, 0 deletions
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js
index 2d0d23b3..cdd8ce9a 100644
--- a/ext/js/language/ja/japanese-text-preprocessors.js
+++ b/ext/js/language/ja/japanese-text-preprocessors.js
@@ -24,6 +24,7 @@ import {
convertHalfWidthKanaToFullWidth,
convertHiraganaToKatakana as convertHiraganaToKatakanaFunction,
convertKatakanaToHiragana as convertKatakanaToHiraganaFunction,
+ normalizeCombiningCharacters as normalizeCombiningCharactersFunction,
} from './japanese.js';
/** @type {import('language').TextProcessor<boolean>} */
@@ -90,3 +91,11 @@ export const collapseEmphaticSequences = {
return str;
},
};
+
+/** @type {import('language').TextProcessor<boolean>} */
+export const normalizeCombiningCharacters = {
+ name: 'Normalize combining characters',
+ description: 'ド → ド (U+30C8 U+3099 → U+30C9)',
+ options: basicTextProcessorOptions,
+ process: (str, setting) => (setting ? normalizeCombiningCharactersFunction(str) : str),
+};