From 42a2917bf7aa3ab424ada2fc3acf224b74020a7f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 10 Apr 2020 11:56:18 -0400 Subject: Add support for collapsing emphatic character sequences --- ext/bg/js/japanese.js | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'ext/bg/js/japanese.js') diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 2a2b39fd..e8b258cb 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -83,6 +83,8 @@ const ITERATION_MARK_CODE_POINT = 0x3005; + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; + const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; // Existing functions @@ -373,6 +375,39 @@ } + // Miscellaneous + + function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) { + let result = ''; + let collapseCodePoint = -1; + const hasSourceMap = (sourceMap !== null); + for (const char of sourceText) { + const c = char.codePointAt(0); + if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { + if (collapseCodePoint !== c) { + collapseCodePoint = c; + if (!fullCollapse) { + result += char; + continue; + } + } + } else { + collapseCodePoint = -1; + result += char; + continue; + } + + if (hasSourceMap) { + const index = result.length; + if (index > 0) { + sourceMap.combine(index - 1, 1); + } + } + } + return result; + } + + // Exports Object.assign(jp, { @@ -384,6 +419,7 @@ convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, - distributeFuriganaInflected + distributeFuriganaInflected, + collapseEmphaticSequences }); })(); -- cgit v1.2.3 From 0b7791c103508e4b23d57717a97644993edf76d5 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 10 Apr 2020 12:25:24 -0400 Subject: Fix source map for characters collapsed at the start of a string --- ext/bg/js/japanese.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'ext/bg/js/japanese.js') diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index e8b258cb..71fbebb5 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -377,11 +377,11 @@ // Miscellaneous - function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) { + function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) { let result = ''; let collapseCodePoint = -1; const hasSourceMap = (sourceMap !== null); - for (const char of sourceText) { + for (const char of text) { const c = char.codePointAt(0); if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { if (collapseCodePoint !== c) { @@ -398,10 +398,7 @@ } if (hasSourceMap) { - const index = result.length; - if (index > 0) { - sourceMap.combine(index - 1, 1); - } + sourceMap.combine(Math.max(0, result.length - 1), 1); } } return result; -- cgit v1.2.3 From 90392ac9d6d3b54f811e3d056043a1ffe26fa963 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 11 Apr 2020 15:43:12 -0400 Subject: Add support for collapsing the Katakana-Hiragana Prolonged Sound Mark --- ext/bg/js/japanese.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'ext/bg/js/japanese.js') diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 71fbebb5..78f5b48f 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -85,6 +85,7 @@ const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; + const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc; // Existing functions @@ -383,7 +384,11 @@ const hasSourceMap = (sourceMap !== null); for (const char of text) { const c = char.codePointAt(0); - if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { + if ( + c === HIRAGANA_SMALL_TSU_CODE_POINT || + c === KATAKANA_SMALL_TSU_CODE_POINT || + c === KANA_PROLONGED_SOUND_MARK_CODE_POINT + ) { if (collapseCodePoint !== c) { collapseCodePoint = c; if (!fullCollapse) { -- cgit v1.2.3