diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-04-12 11:42:46 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-12 11:42:46 -0400 |
commit | 649adb13d8e1a775bb54e28b12401f7f95c5ab8d (patch) | |
tree | fe48c094ab45c00729742962e05da407b42af73a /ext/bg | |
parent | 82f83970001682018f1f5b595ffdcd13123fed91 (diff) | |
parent | c2bf474d1f71c29b848e12a4af4b0860d8adb4ab (diff) |
Merge pull request #440 from toasted-nutbread/collapse-emphatic-sequences
Add support for collapsing emphatic character sequences
Diffstat (limited to 'ext/bg')
-rw-r--r-- | ext/bg/data/options-schema.json | 8 | ||||
-rw-r--r-- | ext/bg/js/japanese.js | 40 | ||||
-rw-r--r-- | ext/bg/js/options.js | 3 | ||||
-rw-r--r-- | ext/bg/js/settings/main.js | 2 | ||||
-rw-r--r-- | ext/bg/js/translator.js | 17 | ||||
-rw-r--r-- | ext/bg/settings.html | 11 |
6 files changed, 75 insertions, 6 deletions
diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index da1f1ce0..4f9e694d 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -388,7 +388,8 @@ "convertNumericCharacters", "convertAlphabeticCharacters", "convertHiraganaToKatakana", - "convertKatakanaToHiragana" + "convertKatakanaToHiragana", + "collapseEmphaticSequences" ], "properties": { "convertHalfWidthCharacters": { @@ -415,6 +416,11 @@ "type": "string", "enum": ["false", "true", "variant"], "default": "variant" + }, + "collapseEmphaticSequences": { + "type": "string", + "enum": ["false", "true", "full"], + "default": "false" } } }, diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 5c49cca7..5fef27a7 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -82,6 +82,9 @@ const ITERATION_MARK_CODE_POINT = 0x3005; + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; + const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; + const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc; // Existing functions @@ -372,6 +375,40 @@ } + // Miscellaneous + + function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) { + let result = ''; + let collapseCodePoint = -1; + const hasSourceMap = (sourceMap !== null); + for (const char of text) { + const c = char.codePointAt(0); + if ( + c === HIRAGANA_SMALL_TSU_CODE_POINT || + c === KATAKANA_SMALL_TSU_CODE_POINT || + c === KANA_PROLONGED_SOUND_MARK_CODE_POINT + ) { + if (collapseCodePoint !== c) { + collapseCodePoint = c; + if (!fullCollapse) { + result += char; + continue; + } + } + } else { + collapseCodePoint = -1; + result += char; + continue; + } + + if (hasSourceMap) { + sourceMap.combine(Math.max(0, result.length - 1), 1); + } + } + return result; + } + + // Exports Object.assign(jp, { @@ -383,6 +420,7 @@ convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, - distributeFuriganaInflected + distributeFuriganaInflected, + collapseEmphaticSequences }); })(); diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 20df2a68..f3e5f60d 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -170,7 +170,8 @@ function profileOptionsCreateDefaults() { convertNumericCharacters: 'false', convertAlphabeticCharacters: 'false', convertHiraganaToKatakana: 'false', - convertKatakanaToHiragana: 'variant' + convertKatakanaToHiragana: 'variant', + collapseEmphaticSequences: 'false' }, dictionaries: {}, diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js index 8fd94562..308e92eb 100644 --- a/ext/bg/js/settings/main.js +++ b/ext/bg/js/settings/main.js @@ -118,6 +118,7 @@ async function formRead(options) { options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val(); options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val(); options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val(); + options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val(); options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked'); options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); @@ -199,6 +200,7 @@ async function formWrite(options) { $('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters); $('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana); $('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana); + $('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences); $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser); $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index e4441384..aaa1a0ec 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -347,17 +347,27 @@ class Translator { getAllDeinflections(text, options) { const translationOptions = options.translation; + const collapseEmphaticOptions = [[false, false]]; + switch (translationOptions.collapseEmphaticSequences) { + case 'true': + collapseEmphaticOptions.push([true, false]); + break; + case 'full': + collapseEmphaticOptions.push([true, false], [true, true]); + break; + } const textOptionVariantArray = [ Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), - Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana) + Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), + collapseEmphaticOptions ]; const deinflections = []; const used = new Set(); - for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { + for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) { let text2 = text; const sourceMap = new TextSourceMap(text2); if (halfWidth) { @@ -375,6 +385,9 @@ class Translator { if (hiragana) { text2 = jp.convertKatakanaToHiragana(text2); } + if (collapseEmphatic) { + text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap); + } for (let i = text2.length; i > 0; --i) { const text2Substring = text2.substring(0, i); diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 1297a9cc..96c1db82 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -427,7 +427,7 @@ <p class="help-block"> The conversion options below are listed in the order that the conversions are applied to the input text. - Each conversion has three possible values: + Conversions commonly have three possible values: </p> <ul class="help-block"> @@ -490,6 +490,15 @@ <option value="variant">Use both variants</option> </select> </div> + + <div class="form-group"> + <label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(すっっごーーい → すっごーい / すごい)</span></label> + <select class="form-control" id="translation-collapse-emphatic-sequences"> + <option value="false">Disabled</option> + <option value="true">Collapse into single character</option> + <option value="full">Remove all characters</option> + </select> + </div> </div> <div id="popup-content-scanning"> |