From 42a2917bf7aa3ab424ada2fc3acf224b74020a7f Mon Sep 17 00:00:00 2001
From: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Date: Fri, 10 Apr 2020 11:56:18 -0400
Subject: Add support for collapsing emphatic character sequences

---
 ext/bg/data/options-schema.json |  8 +++++++-
 ext/bg/js/japanese.js           | 38 +++++++++++++++++++++++++++++++++++++-
 ext/bg/js/options.js            |  3 ++-
 ext/bg/js/settings/main.js      |  2 ++
 ext/bg/js/translator.js         | 17 +++++++++++++++--
 ext/bg/settings.html            | 11 ++++++++++-
 6 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json
index da1f1ce0..4f9e694d 100644
--- a/ext/bg/data/options-schema.json
+++ b/ext/bg/data/options-schema.json
@@ -388,7 +388,8 @@
                                     "convertNumericCharacters",
                                     "convertAlphabeticCharacters",
                                     "convertHiraganaToKatakana",
-                                    "convertKatakanaToHiragana"
+                                    "convertKatakanaToHiragana",
+                                    "collapseEmphaticSequences"
                                 ],
                                 "properties": {
                                     "convertHalfWidthCharacters": {
@@ -415,6 +416,11 @@
                                         "type": "string",
                                         "enum": ["false", "true", "variant"],
                                         "default": "variant"
+                                    },
+                                    "collapseEmphaticSequences": {
+                                        "type": "string",
+                                        "enum": ["false", "true", "full"],
+                                        "default": "false"
                                     }
                                 }
                             },
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index 2a2b39fd..e8b258cb 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -83,6 +83,8 @@
 
     const ITERATION_MARK_CODE_POINT = 0x3005;
 
+    const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
+    const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
 
     // Existing functions
 
@@ -373,6 +375,39 @@
     }
 
 
+    // Miscellaneous
+
+    function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) {
+        let result = '';
+        let collapseCodePoint = -1;
+        const hasSourceMap = (sourceMap !== null);
+        for (const char of sourceText) {
+            const c = char.codePointAt(0);
+            if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) {
+                if (collapseCodePoint !== c) {
+                    collapseCodePoint = c;
+                    if (!fullCollapse) {
+                        result += char;
+                        continue;
+                    }
+                }
+            } else {
+                collapseCodePoint = -1;
+                result += char;
+                continue;
+            }
+
+            if (hasSourceMap) {
+                const index = result.length;
+                if (index > 0) {
+                    sourceMap.combine(index - 1, 1);
+                }
+            }
+        }
+        return result;
+    }
+
+
     // Exports
 
     Object.assign(jp, {
@@ -384,6 +419,7 @@
         convertHalfWidthKanaToFullWidth,
         convertAlphabeticToKana,
         distributeFurigana,
-        distributeFuriganaInflected
+        distributeFuriganaInflected,
+        collapseEmphaticSequences
     });
 })();
diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js
index abb054d4..fa96c96c 100644
--- a/ext/bg/js/options.js
+++ b/ext/bg/js/options.js
@@ -171,7 +171,8 @@ function profileOptionsCreateDefaults() {
             convertNumericCharacters: 'false',
             convertAlphabeticCharacters: 'false',
             convertHiraganaToKatakana: 'false',
-            convertKatakanaToHiragana: 'variant'
+            convertKatakanaToHiragana: 'variant',
+            collapseEmphaticSequences: 'false'
         },
 
         dictionaries: {},
diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js
index 1653ee35..18c2da73 100644
--- a/ext/bg/js/settings/main.js
+++ b/ext/bg/js/settings/main.js
@@ -119,6 +119,7 @@ async function formRead(options) {
     options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val();
     options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val();
     options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val();
+    options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val();
 
     options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked');
     options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked');
@@ -200,6 +201,7 @@ async function formWrite(options) {
     $('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters);
     $('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana);
     $('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana);
+    $('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences);
 
     $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser);
     $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser);
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 27f91c05..402ac6bd 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -348,17 +348,27 @@ class Translator {
 
     getAllDeinflections(text, options) {
         const translationOptions = options.translation;
+        const collapseEmphaticOptions = [[false, false]];
+        switch (translationOptions.collapseEmphaticSequences) {
+            case 'true':
+                collapseEmphaticOptions.push([true, false]);
+                break;
+            case 'full':
+                collapseEmphaticOptions.push([true, true]);
+                break;
+        }
         const textOptionVariantArray = [
             Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
             Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
             Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
             Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
-            Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana)
+            Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
+            collapseEmphaticOptions
         ];
 
         const deinflections = [];
         const used = new Set();
-        for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) {
+        for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {
             let text2 = text;
             const sourceMap = new TextSourceMap(text2);
             if (halfWidth) {
@@ -376,6 +386,9 @@ class Translator {
             if (hiragana) {
                 text2 = jp.convertKatakanaToHiragana(text2);
             }
+            if (collapseEmphatic) {
+                text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
+            }
 
             for (let i = text2.length; i > 0; --i) {
                 const text2Substring = text2.substring(0, i);
diff --git a/ext/bg/settings.html b/ext/bg/settings.html
index 1297a9cc..91051f3e 100644
--- a/ext/bg/settings.html
+++ b/ext/bg/settings.html
@@ -427,7 +427,7 @@
 
                 <p class="help-block">
                     The conversion options below are listed in the order that the conversions are applied to the input text.
-                    Each conversion has three possible values:
+                    Conversions commonly have three possible values:
                 </p>
 
                 <ul class="help-block">
@@ -490,6 +490,15 @@
                         <option value="variant">Use both variants</option>
                     </select>
                 </div>
+
+                <div class="form-group">
+                    <label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(かっっっこいい &rarr; かっこいい)</span></label>
+                    <select class="form-control" id="translation-collapse-emphatic-sequences">
+                        <option value="false">Disabled</option>
+                        <option value="true">Collapse into single character</option>
+                        <option value="full">Remove all characters</option>
+                    </select>
+                </div>
             </div>
 
             <div id="popup-content-scanning">
-- 
cgit v1.2.3


From 0b7791c103508e4b23d57717a97644993edf76d5 Mon Sep 17 00:00:00 2001
From: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Date: Fri, 10 Apr 2020 12:25:24 -0400
Subject: Fix source map for characters collapsed at the start of a string

---
 ext/bg/js/japanese.js | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index e8b258cb..71fbebb5 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -377,11 +377,11 @@
 
     // Miscellaneous
 
-    function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) {
+    function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) {
         let result = '';
         let collapseCodePoint = -1;
         const hasSourceMap = (sourceMap !== null);
-        for (const char of sourceText) {
+        for (const char of text) {
             const c = char.codePointAt(0);
             if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) {
                 if (collapseCodePoint !== c) {
@@ -398,10 +398,7 @@
             }
 
             if (hasSourceMap) {
-                const index = result.length;
-                if (index > 0) {
-                    sourceMap.combine(index - 1, 1);
-                }
+                sourceMap.combine(Math.max(0, result.length - 1), 1);
             }
         }
         return result;
-- 
cgit v1.2.3


From fb87b1ad69b37e75d1e2e46c91235aa6b44c2980 Mon Sep 17 00:00:00 2001
From: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Date: Fri, 10 Apr 2020 12:31:06 -0400
Subject: Add tests

---
 test/test-japanese.js | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/test/test-japanese.js b/test/test-japanese.js
index ca65dde2..ac28a579 100644
--- a/test/test-japanese.js
+++ b/test/test-japanese.js
@@ -394,6 +394,50 @@ function testDistributeFuriganaInflected() {
     }
 }
 
+function testCollapseEmphaticSequences() {
+    const data = [
+        [['かこい', false], ['かこい', [1, 1, 1]]],
+        [['かこい', true], ['かこい', [1, 1, 1]]],
+        [['かっこい', false], ['かっこい', [1, 1, 1, 1]]],
+        [['かっこい', true], ['かこい', [2, 1, 1]]],
+        [['かっっこい', false], ['かっこい', [1, 2, 1, 1]]],
+        [['かっっこい', true], ['かこい', [3, 1, 1]]],
+        [['かっっっこい', false], ['かっこい', [1, 3, 1, 1]]],
+        [['かっっっこい', true], ['かこい', [4, 1, 1]]],
+
+        [['こい', false], ['こい', [1, 1]]],
+        [['こい', true], ['こい', [1, 1]]],
+        [['っこい', false], ['っこい', [1, 1, 1]]],
+        [['っこい', true], ['こい', [2, 1]]],
+        [['っっこい', false], ['っこい', [2, 1, 1]]],
+        [['っっこい', true], ['こい', [3, 1]]],
+        [['っっっこい', false], ['っこい', [3, 1, 1]]],
+        [['っっっこい', true], ['こい', [4, 1]]],
+
+        [['', false], ['', []]],
+        [['', true], ['', []]],
+        [['っ', false], ['っ', [1]]],
+        [['っ', true], ['', [1]]],
+        [['っっ', false], ['っ', [2]]],
+        [['っっ', true], ['', [2]]],
+        [['っっっ', false], ['っ', [3]]],
+        [['っっっ', true], ['', [3]]]
+    ];
+
+    for (const [[text, fullCollapse], [expected, expectedSourceMapping]] of data) {
+        const sourceMap = new TextSourceMap(text);
+        const actual1 = jp.collapseEmphaticSequences(text, fullCollapse, null);
+        const actual2 = jp.collapseEmphaticSequences(text, fullCollapse, sourceMap);
+        assert.strictEqual(actual1, expected);
+        assert.strictEqual(actual2, expected);
+        if (typeof expectedSourceMapping !== 'undefined') {
+            console.log('actual', JSON.stringify(actual1), sourceMap);
+            console.log('expected', JSON.stringify(expected), new TextSourceMap(text, expectedSourceMapping));
+            assert.ok(sourceMap.equals(new TextSourceMap(text, expectedSourceMapping)));
+        }
+    }
+}
+
 function testIsMoraPitchHigh() {
     const data = [
         [[0, 0], false],
@@ -463,6 +507,7 @@ function main() {
     testConvertAlphabeticToKana();
     testDistributeFurigana();
     testDistributeFuriganaInflected();
+    testCollapseEmphaticSequences();
     testIsMoraPitchHigh();
     testGetKanaMorae();
 }
-- 
cgit v1.2.3


From 90392ac9d6d3b54f811e3d056043a1ffe26fa963 Mon Sep 17 00:00:00 2001
From: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Date: Sat, 11 Apr 2020 15:43:12 -0400
Subject: Add support for collapsing the Katakana-Hiragana Prolonged Sound Mark

---
 ext/bg/js/japanese.js | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index 71fbebb5..78f5b48f 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -85,6 +85,7 @@
 
     const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
     const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
+    const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
 
     // Existing functions
 
@@ -383,7 +384,11 @@
         const hasSourceMap = (sourceMap !== null);
         for (const char of text) {
             const c = char.codePointAt(0);
-            if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) {
+            if (
+                c === HIRAGANA_SMALL_TSU_CODE_POINT ||
+                c === KATAKANA_SMALL_TSU_CODE_POINT ||
+                c === KANA_PROLONGED_SOUND_MARK_CODE_POINT
+            ) {
                 if (collapseCodePoint !== c) {
                     collapseCodePoint = c;
                     if (!fullCollapse) {
-- 
cgit v1.2.3


From 92f2466cfff40d47fb5e6350dae5d7ff82770973 Mon Sep 17 00:00:00 2001
From: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Date: Sat, 11 Apr 2020 16:53:29 -0400
Subject: Add tests, remove logs

---
 test/test-japanese.js | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/test/test-japanese.js b/test/test-japanese.js
index ac28a579..b1bba9a7 100644
--- a/test/test-japanese.js
+++ b/test/test-japanese.js
@@ -414,6 +414,17 @@ function testCollapseEmphaticSequences() {
         [['っっっこい', false], ['っこい', [3, 1, 1]]],
         [['っっっこい', true], ['こい', [4, 1]]],
 
+        [['すごい', false], ['すごい', [1, 1, 1]]],
+        [['すごい', true], ['すごい', [1, 1, 1]]],
+        [['すごーい', false], ['すごーい', [1, 1, 1, 1]]],
+        [['すごーい', true], ['すごい', [1, 2, 1]]],
+        [['すごーーい', false], ['すごーい', [1, 1, 2, 1]]],
+        [['すごーーい', true], ['すごい', [1, 3, 1]]],
+        [['すっごーい', false], ['すっごーい', [1, 1, 1, 1, 1]]],
+        [['すっごーい', true], ['すごい', [2, 2, 1]]],
+        [['すっっごーーい', false], ['すっごーい', [1, 2, 1, 2, 1]]],
+        [['すっっごーーい', true], ['すごい', [3, 3, 1]]],
+
         [['', false], ['', []]],
         [['', true], ['', []]],
         [['っ', false], ['っ', [1]]],
@@ -431,8 +442,6 @@ function testCollapseEmphaticSequences() {
         assert.strictEqual(actual1, expected);
         assert.strictEqual(actual2, expected);
         if (typeof expectedSourceMapping !== 'undefined') {
-            console.log('actual', JSON.stringify(actual1), sourceMap);
-            console.log('expected', JSON.stringify(expected), new TextSourceMap(text, expectedSourceMapping));
             assert.ok(sourceMap.equals(new TextSourceMap(text, expectedSourceMapping)));
         }
     }
-- 
cgit v1.2.3


From 70f0b8b0cd7c85bd8af230cf6a74a0d0e1d0bbc2 Mon Sep 17 00:00:00 2001
From: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Date: Sat, 11 Apr 2020 18:58:14 -0400
Subject: Fix 'full' mode not being a superset of 'true' mode

---
 ext/bg/js/translator.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 402ac6bd..fd14b72d 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -354,7 +354,7 @@ class Translator {
                 collapseEmphaticOptions.push([true, false]);
                 break;
             case 'full':
-                collapseEmphaticOptions.push([true, true]);
+                collapseEmphaticOptions.push([true, false], [true, true]);
                 break;
         }
         const textOptionVariantArray = [
-- 
cgit v1.2.3


From c2bf474d1f71c29b848e12a4af4b0860d8adb4ab Mon Sep 17 00:00:00 2001
From: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Date: Sat, 11 Apr 2020 19:00:01 -0400
Subject: Update example

---
 ext/bg/settings.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/bg/settings.html b/ext/bg/settings.html
index 91051f3e..96c1db82 100644
--- a/ext/bg/settings.html
+++ b/ext/bg/settings.html
@@ -492,7 +492,7 @@
                 </div>
 
                 <div class="form-group">
-                    <label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(かっっっこいい &rarr; かっこいい)</span></label>
+                    <label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(すっっごーーい &rarr; すっごーい / すごい)</span></label>
                     <select class="form-control" id="translation-collapse-emphatic-sequences">
                         <option value="false">Disabled</option>
                         <option value="true">Collapse into single character</option>
-- 
cgit v1.2.3