diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-04-10 11:56:18 -0400 | 
|---|---|---|
| committer | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-04-10 11:56:18 -0400 | 
| commit | 42a2917bf7aa3ab424ada2fc3acf224b74020a7f (patch) | |
| tree | 647438015b7b526ddc2969d2c00e9e43b8bb792f | |
| parent | f177e3699ae85bafa3a648f5c03fbb8e303a6be3 (diff) | |
Add support for collapsing emphatic character sequences
| -rw-r--r-- | ext/bg/data/options-schema.json | 8 | ||||
| -rw-r--r-- | ext/bg/js/japanese.js | 38 | ||||
| -rw-r--r-- | ext/bg/js/options.js | 3 | ||||
| -rw-r--r-- | ext/bg/js/settings/main.js | 2 | ||||
| -rw-r--r-- | ext/bg/js/translator.js | 17 | ||||
| -rw-r--r-- | ext/bg/settings.html | 11 | 
6 files changed, 73 insertions, 6 deletions
| diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index da1f1ce0..4f9e694d 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -388,7 +388,8 @@                                      "convertNumericCharacters",                                      "convertAlphabeticCharacters",                                      "convertHiraganaToKatakana", -                                    "convertKatakanaToHiragana" +                                    "convertKatakanaToHiragana", +                                    "collapseEmphaticSequences"                                  ],                                  "properties": {                                      "convertHalfWidthCharacters": { @@ -415,6 +416,11 @@                                          "type": "string",                                          "enum": ["false", "true", "variant"],                                          "default": "variant" +                                    }, +                                    "collapseEmphaticSequences": { +                                        "type": "string", +                                        "enum": ["false", "true", "full"], +                                        "default": "false"                                      }                                  }                              }, diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 2a2b39fd..e8b258cb 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -83,6 +83,8 @@      const ITERATION_MARK_CODE_POINT = 0x3005; +    const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; +    const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;      // Existing functions @@ -373,6 +375,39 @@      } +    // Miscellaneous + +    function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) { +        let result = ''; +        let collapseCodePoint = -1; +        const hasSourceMap = (sourceMap !== null); +        for (const char of sourceText) { +            const c = char.codePointAt(0); +            if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) { +                if (collapseCodePoint !== c) { +                    collapseCodePoint = c; +                    if (!fullCollapse) { +                        result += char; +                        continue; +                    } +                } +            } else { +                collapseCodePoint = -1; +                result += char; +                continue; +            } + +            if (hasSourceMap) { +                const index = result.length; +                if (index > 0) { +                    sourceMap.combine(index - 1, 1); +                } +            } +        } +        return result; +    } + +      // Exports      Object.assign(jp, { @@ -384,6 +419,7 @@          convertHalfWidthKanaToFullWidth,          convertAlphabeticToKana,          distributeFurigana, -        distributeFuriganaInflected +        distributeFuriganaInflected, +        collapseEmphaticSequences      });  })(); diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index abb054d4..fa96c96c 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -171,7 +171,8 @@ function profileOptionsCreateDefaults() {              convertNumericCharacters: 'false',              convertAlphabeticCharacters: 'false',              convertHiraganaToKatakana: 'false', -            convertKatakanaToHiragana: 'variant' +            convertKatakanaToHiragana: 'variant', +            collapseEmphaticSequences: 'false'          },          dictionaries: {}, diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js index 1653ee35..18c2da73 100644 --- a/ext/bg/js/settings/main.js +++ b/ext/bg/js/settings/main.js @@ -119,6 +119,7 @@ async function formRead(options) {      options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val();      options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val();      options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val(); +    options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val();      options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked');      options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); @@ -200,6 +201,7 @@ async function formWrite(options) {      $('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters);      $('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana);      $('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana); +    $('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences);      $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser);      $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 27f91c05..402ac6bd 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -348,17 +348,27 @@ class Translator {      getAllDeinflections(text, options) {          const translationOptions = options.translation; +        const collapseEmphaticOptions = [[false, false]]; +        switch (translationOptions.collapseEmphaticSequences) { +            case 'true': +                collapseEmphaticOptions.push([true, false]); +                break; +            case 'full': +                collapseEmphaticOptions.push([true, true]); +                break; +        }          const textOptionVariantArray = [              Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),              Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),              Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),              Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), -            Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana) +            Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), +            collapseEmphaticOptions          ];          const deinflections = [];          const used = new Set(); -        for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { +        for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {              let text2 = text;              const sourceMap = new TextSourceMap(text2);              if (halfWidth) { @@ -376,6 +386,9 @@ class Translator {              if (hiragana) {                  text2 = jp.convertKatakanaToHiragana(text2);              } +            if (collapseEmphatic) { +                text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap); +            }              for (let i = text2.length; i > 0; --i) {                  const text2Substring = text2.substring(0, i); diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 1297a9cc..91051f3e 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -427,7 +427,7 @@                  <p class="help-block">                      The conversion options below are listed in the order that the conversions are applied to the input text. -                    Each conversion has three possible values: +                    Conversions commonly have three possible values:                  </p>                  <ul class="help-block"> @@ -490,6 +490,15 @@                          <option value="variant">Use both variants</option>                      </select>                  </div> + +                <div class="form-group"> +                    <label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(かっっっこいい → かっこいい)</span></label> +                    <select class="form-control" id="translation-collapse-emphatic-sequences"> +                        <option value="false">Disabled</option> +                        <option value="true">Collapse into single character</option> +                        <option value="full">Remove all characters</option> +                    </select> +                </div>              </div>              <div id="popup-content-scanning"> |