diff options
| -rw-r--r-- | ext/bg/data/options-schema.json | 8 | ||||
| -rw-r--r-- | ext/bg/js/japanese.js | 40 | ||||
| -rw-r--r-- | ext/bg/js/options.js | 3 | ||||
| -rw-r--r-- | ext/bg/js/settings/main.js | 2 | ||||
| -rw-r--r-- | ext/bg/js/translator.js | 17 | ||||
| -rw-r--r-- | ext/bg/settings.html | 11 | ||||
| -rw-r--r-- | test/test-japanese.js | 54 | 
7 files changed, 129 insertions, 6 deletions
| diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index da1f1ce0..4f9e694d 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -388,7 +388,8 @@                                      "convertNumericCharacters",                                      "convertAlphabeticCharacters",                                      "convertHiraganaToKatakana", -                                    "convertKatakanaToHiragana" +                                    "convertKatakanaToHiragana", +                                    "collapseEmphaticSequences"                                  ],                                  "properties": {                                      "convertHalfWidthCharacters": { @@ -415,6 +416,11 @@                                          "type": "string",                                          "enum": ["false", "true", "variant"],                                          "default": "variant" +                                    }, +                                    "collapseEmphaticSequences": { +                                        "type": "string", +                                        "enum": ["false", "true", "full"], +                                        "default": "false"                                      }                                  }                              }, diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 5c49cca7..5fef27a7 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -82,6 +82,9 @@      const ITERATION_MARK_CODE_POINT = 0x3005; +    const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; +    const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; +    const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;      // Existing functions @@ -372,6 +375,40 @@      } +    // Miscellaneous + +    function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) { +        let result = ''; +        let collapseCodePoint = -1; +        const hasSourceMap = (sourceMap !== null); +        for (const char of text) { +            const c = char.codePointAt(0); +            if ( +                c === HIRAGANA_SMALL_TSU_CODE_POINT || +                c === KATAKANA_SMALL_TSU_CODE_POINT || +                c === KANA_PROLONGED_SOUND_MARK_CODE_POINT +            ) { +                if (collapseCodePoint !== c) { +                    collapseCodePoint = c; +                    if (!fullCollapse) { +                        result += char; +                        continue; +                    } +                } +            } else { +                collapseCodePoint = -1; +                result += char; +                continue; +            } + +            if (hasSourceMap) { +                sourceMap.combine(Math.max(0, result.length - 1), 1); +            } +        } +        return result; +    } + +      // Exports      Object.assign(jp, { @@ -383,6 +420,7 @@          convertHalfWidthKanaToFullWidth,          convertAlphabeticToKana,          distributeFurigana, -        distributeFuriganaInflected +        distributeFuriganaInflected, +        collapseEmphaticSequences      });  })(); diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 20df2a68..f3e5f60d 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -170,7 +170,8 @@ function profileOptionsCreateDefaults() {              convertNumericCharacters: 'false',              convertAlphabeticCharacters: 'false',              convertHiraganaToKatakana: 'false', -            convertKatakanaToHiragana: 'variant' +            convertKatakanaToHiragana: 'variant', +            collapseEmphaticSequences: 'false'          },          dictionaries: {}, diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js index 8fd94562..308e92eb 100644 --- a/ext/bg/js/settings/main.js +++ b/ext/bg/js/settings/main.js @@ -118,6 +118,7 @@ async function formRead(options) {      options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val();      options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val();      options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val(); +    options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val();      options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked');      options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); @@ -199,6 +200,7 @@ async function formWrite(options) {      $('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters);      $('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana);      $('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana); +    $('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences);      $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser);      $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index e4441384..aaa1a0ec 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -347,17 +347,27 @@ class Translator {      getAllDeinflections(text, options) {          const translationOptions = options.translation; +        const collapseEmphaticOptions = [[false, false]]; +        switch (translationOptions.collapseEmphaticSequences) { +            case 'true': +                collapseEmphaticOptions.push([true, false]); +                break; +            case 'full': +                collapseEmphaticOptions.push([true, false], [true, true]); +                break; +        }          const textOptionVariantArray = [              Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),              Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),              Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),              Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), -            Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana) +            Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), +            collapseEmphaticOptions          ];          const deinflections = [];          const used = new Set(); -        for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { +        for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {              let text2 = text;              const sourceMap = new TextSourceMap(text2);              if (halfWidth) { @@ -375,6 +385,9 @@ class Translator {              if (hiragana) {                  text2 = jp.convertKatakanaToHiragana(text2);              } +            if (collapseEmphatic) { +                text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap); +            }              for (let i = text2.length; i > 0; --i) {                  const text2Substring = text2.substring(0, i); diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 1297a9cc..96c1db82 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -427,7 +427,7 @@                  <p class="help-block">                      The conversion options below are listed in the order that the conversions are applied to the input text. -                    Each conversion has three possible values: +                    Conversions commonly have three possible values:                  </p>                  <ul class="help-block"> @@ -490,6 +490,15 @@                          <option value="variant">Use both variants</option>                      </select>                  </div> + +                <div class="form-group"> +                    <label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(すっっごーーい → すっごーい / すごい)</span></label> +                    <select class="form-control" id="translation-collapse-emphatic-sequences"> +                        <option value="false">Disabled</option> +                        <option value="true">Collapse into single character</option> +                        <option value="full">Remove all characters</option> +                    </select> +                </div>              </div>              <div id="popup-content-scanning"> diff --git a/test/test-japanese.js b/test/test-japanese.js index f4b084ac..89e41c36 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -393,6 +393,59 @@ function testDistributeFuriganaInflected() {      }  } +function testCollapseEmphaticSequences() { +    const data = [ +        [['かこい', false], ['かこい', [1, 1, 1]]], +        [['かこい', true], ['かこい', [1, 1, 1]]], +        [['かっこい', false], ['かっこい', [1, 1, 1, 1]]], +        [['かっこい', true], ['かこい', [2, 1, 1]]], +        [['かっっこい', false], ['かっこい', [1, 2, 1, 1]]], +        [['かっっこい', true], ['かこい', [3, 1, 1]]], +        [['かっっっこい', false], ['かっこい', [1, 3, 1, 1]]], +        [['かっっっこい', true], ['かこい', [4, 1, 1]]], + +        [['こい', false], ['こい', [1, 1]]], +        [['こい', true], ['こい', [1, 1]]], +        [['っこい', false], ['っこい', [1, 1, 1]]], +        [['っこい', true], ['こい', [2, 1]]], +        [['っっこい', false], ['っこい', [2, 1, 1]]], +        [['っっこい', true], ['こい', [3, 1]]], +        [['っっっこい', false], ['っこい', [3, 1, 1]]], +        [['っっっこい', true], ['こい', [4, 1]]], + +        [['すごい', false], ['すごい', [1, 1, 1]]], +        [['すごい', true], ['すごい', [1, 1, 1]]], +        [['すごーい', false], ['すごーい', [1, 1, 1, 1]]], +        [['すごーい', true], ['すごい', [1, 2, 1]]], +        [['すごーーい', false], ['すごーい', [1, 1, 2, 1]]], +        [['すごーーい', true], ['すごい', [1, 3, 1]]], +        [['すっごーい', false], ['すっごーい', [1, 1, 1, 1, 1]]], +        [['すっごーい', true], ['すごい', [2, 2, 1]]], +        [['すっっごーーい', false], ['すっごーい', [1, 2, 1, 2, 1]]], +        [['すっっごーーい', true], ['すごい', [3, 3, 1]]], + +        [['', false], ['', []]], +        [['', true], ['', []]], +        [['っ', false], ['っ', [1]]], +        [['っ', true], ['', [1]]], +        [['っっ', false], ['っ', [2]]], +        [['っっ', true], ['', [2]]], +        [['っっっ', false], ['っ', [3]]], +        [['っっっ', true], ['', [3]]] +    ]; + +    for (const [[text, fullCollapse], [expected, expectedSourceMapping]] of data) { +        const sourceMap = new TextSourceMap(text); +        const actual1 = jp.collapseEmphaticSequences(text, fullCollapse, null); +        const actual2 = jp.collapseEmphaticSequences(text, fullCollapse, sourceMap); +        assert.strictEqual(actual1, expected); +        assert.strictEqual(actual2, expected); +        if (typeof expectedSourceMapping !== 'undefined') { +            assert.ok(sourceMap.equals(new TextSourceMap(text, expectedSourceMapping))); +        } +    } +} +  function testIsMoraPitchHigh() {      const data = [          [[0, 0], false], @@ -462,6 +515,7 @@ function main() {      testConvertAlphabeticToKana();      testDistributeFurigana();      testDistributeFuriganaInflected(); +    testCollapseEmphaticSequences();      testIsMoraPitchHigh();      testGetKanaMorae();  } |