diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-04-10 11:56:18 -0400 |
---|---|---|
committer | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-04-10 11:56:18 -0400 |
commit | 42a2917bf7aa3ab424ada2fc3acf224b74020a7f (patch) | |
tree | 647438015b7b526ddc2969d2c00e9e43b8bb792f /ext/bg/js/japanese.js | |
parent | f177e3699ae85bafa3a648f5c03fbb8e303a6be3 (diff) |
Add support for collapsing emphatic character sequences
Diffstat (limited to 'ext/bg/js/japanese.js')
-rw-r--r-- | ext/bg/js/japanese.js | 38 |
1 files changed, 37 insertions, 1 deletions
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 2a2b39fd..e8b258cb 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -83,6 +83,8 @@ const ITERATION_MARK_CODE_POINT = 0x3005; + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; + const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; // Existing functions @@ -373,6 +375,39 @@ } + // Miscellaneous + + function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) { + let result = ''; + let collapseCodePoint = -1; + const hasSourceMap = (sourceMap !== null); + for (const char of sourceText) { + const c = char.codePointAt(0); + if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { + if (collapseCodePoint !== c) { + collapseCodePoint = c; + if (!fullCollapse) { + result += char; + continue; + } + } + } else { + collapseCodePoint = -1; + result += char; + continue; + } + + if (hasSourceMap) { + const index = result.length; + if (index > 0) { + sourceMap.combine(index - 1, 1); + } + } + } + return result; + } + + // Exports Object.assign(jp, { @@ -384,6 +419,7 @@ convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, - distributeFuriganaInflected + distributeFuriganaInflected, + collapseEmphaticSequences }); })(); |