From 42a2917bf7aa3ab424ada2fc3acf224b74020a7f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 10 Apr 2020 11:56:18 -0400 Subject: Add support for collapsing emphatic character sequences --- ext/bg/js/japanese.js | 38 +++++++++++++++++++++++++++++++++++++- ext/bg/js/options.js | 3 ++- ext/bg/js/settings/main.js | 2 ++ ext/bg/js/translator.js | 17 +++++++++++++++-- 4 files changed, 56 insertions(+), 4 deletions(-) (limited to 'ext/bg/js') diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 2a2b39fd..e8b258cb 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -83,6 +83,8 @@ const ITERATION_MARK_CODE_POINT = 0x3005; + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; + const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; // Existing functions @@ -373,6 +375,39 @@ } + // Miscellaneous + + function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) { + let result = ''; + let collapseCodePoint = -1; + const hasSourceMap = (sourceMap !== null); + for (const char of sourceText) { + const c = char.codePointAt(0); + if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { + if (collapseCodePoint !== c) { + collapseCodePoint = c; + if (!fullCollapse) { + result += char; + continue; + } + } + } else { + collapseCodePoint = -1; + result += char; + continue; + } + + if (hasSourceMap) { + const index = result.length; + if (index > 0) { + sourceMap.combine(index - 1, 1); + } + } + } + return result; + } + + // Exports Object.assign(jp, { @@ -384,6 +419,7 @@ convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, - distributeFuriganaInflected + distributeFuriganaInflected, + collapseEmphaticSequences }); })(); diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index abb054d4..fa96c96c 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -171,7 +171,8 @@ function profileOptionsCreateDefaults() { convertNumericCharacters: 'false', convertAlphabeticCharacters: 'false', convertHiraganaToKatakana: 'false', - convertKatakanaToHiragana: 'variant' + convertKatakanaToHiragana: 'variant', + collapseEmphaticSequences: 'false' }, dictionaries: {}, diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js index 1653ee35..18c2da73 100644 --- a/ext/bg/js/settings/main.js +++ b/ext/bg/js/settings/main.js @@ -119,6 +119,7 @@ async function formRead(options) { options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val(); options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val(); options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val(); + options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val(); options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked'); options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); @@ -200,6 +201,7 @@ async function formWrite(options) { $('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters); $('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana); $('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana); + $('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences); $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser); $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 27f91c05..402ac6bd 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -348,17 +348,27 @@ class Translator { getAllDeinflections(text, options) { const translationOptions = options.translation; + const collapseEmphaticOptions = [[false, false]]; + switch (translationOptions.collapseEmphaticSequences) { + case 'true': + collapseEmphaticOptions.push([true, false]); + break; + case 'full': + collapseEmphaticOptions.push([true, true]); + break; + } const textOptionVariantArray = [ Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), - Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana) + Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), + collapseEmphaticOptions ]; const deinflections = []; const used = new Set(); - for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { + for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) { let text2 = text; const sourceMap = new TextSourceMap(text2); if (halfWidth) { @@ -376,6 +386,9 @@ class Translator { if (hiragana) { text2 = jp.convertKatakanaToHiragana(text2); } + if (collapseEmphatic) { + text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap); + } for (let i = text2.length; i > 0; --i) { const text2Substring = text2.substring(0, i); -- cgit v1.2.3 From 0b7791c103508e4b23d57717a97644993edf76d5 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 10 Apr 2020 12:25:24 -0400 Subject: Fix source map for characters collapsed at the start of a string --- ext/bg/js/japanese.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'ext/bg/js') diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index e8b258cb..71fbebb5 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -377,11 +377,11 @@ // Miscellaneous - function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) { + function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) { let result = ''; let collapseCodePoint = -1; const hasSourceMap = (sourceMap !== null); - for (const char of sourceText) { + for (const char of text) { const c = char.codePointAt(0); if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { if (collapseCodePoint !== c) { @@ -398,10 +398,7 @@ } if (hasSourceMap) { - const index = result.length; - if (index > 0) { - sourceMap.combine(index - 1, 1); - } + sourceMap.combine(Math.max(0, result.length - 1), 1); } } return result; -- cgit v1.2.3 From 90392ac9d6d3b54f811e3d056043a1ffe26fa963 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 11 Apr 2020 15:43:12 -0400 Subject: Add support for collapsing the Katakana-Hiragana Prolonged Sound Mark --- ext/bg/js/japanese.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'ext/bg/js') diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 71fbebb5..78f5b48f 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -85,6 +85,7 @@ const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; + const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc; // Existing functions @@ -383,7 +384,11 @@ const hasSourceMap = (sourceMap !== null); for (const char of text) { const c = char.codePointAt(0); - if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { + if ( + c === HIRAGANA_SMALL_TSU_CODE_POINT || + c === KATAKANA_SMALL_TSU_CODE_POINT || + c === KANA_PROLONGED_SOUND_MARK_CODE_POINT + ) { if (collapseCodePoint !== c) { collapseCodePoint = c; if (!fullCollapse) { -- cgit v1.2.3 From 70f0b8b0cd7c85bd8af230cf6a74a0d0e1d0bbc2 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 11 Apr 2020 18:58:14 -0400 Subject: Fix 'full' mode not being a superset of 'true' mode --- ext/bg/js/translator.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ext/bg/js') diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 402ac6bd..fd14b72d 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -354,7 +354,7 @@ class Translator { collapseEmphaticOptions.push([true, false]); break; case 'full': - collapseEmphaticOptions.push([true, true]); + collapseEmphaticOptions.push([true, false], [true, true]); break; } const textOptionVariantArray = [ -- cgit v1.2.3