From 42a2917bf7aa3ab424ada2fc3acf224b74020a7f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 10 Apr 2020 11:56:18 -0400 Subject: Add support for collapsing emphatic character sequences --- ext/bg/js/japanese.js | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'ext/bg/js/japanese.js') diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 2a2b39fd..e8b258cb 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -83,6 +83,8 @@ const ITERATION_MARK_CODE_POINT = 0x3005; + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; + const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; // Existing functions @@ -373,6 +375,39 @@ } + // Miscellaneous + + function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) { + let result = ''; + let collapseCodePoint = -1; + const hasSourceMap = (sourceMap !== null); + for (const char of sourceText) { + const c = char.codePointAt(0); + if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { + if (collapseCodePoint !== c) { + collapseCodePoint = c; + if (!fullCollapse) { + result += char; + continue; + } + } + } else { + collapseCodePoint = -1; + result += char; + continue; + } + + if (hasSourceMap) { + const index = result.length; + if (index > 0) { + sourceMap.combine(index - 1, 1); + } + } + } + return result; + } + + // Exports Object.assign(jp, { @@ -384,6 +419,7 @@ convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, - distributeFuriganaInflected + distributeFuriganaInflected, + collapseEmphaticSequences }); })(); -- cgit v1.2.3