diff options
author | Alex Yatskov <alex@foosoft.net> | 2020-04-18 11:05:35 -0700 |
---|---|---|
committer | Alex Yatskov <alex@foosoft.net> | 2020-04-18 11:05:35 -0700 |
commit | f2186c51e4ef219d158735d30a32bbf3e49c4e1a (patch) | |
tree | 61de35513b6182e83b626c1571bc2aa97234e3b0 /ext/bg/js/japanese.js | |
parent | b3e79d0e396353de2cbefb945412bfda4bd6ca99 (diff) | |
parent | 9e7750125c40d1c4121e8d015957606721bede7c (diff) |
Merge branch 'master' into testing
Diffstat (limited to 'ext/bg/js/japanese.js')
-rw-r--r-- | ext/bg/js/japanese.js | 64 |
1 files changed, 51 insertions, 13 deletions
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 5c49cca7..ac81acb5 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -82,6 +82,9 @@ const ITERATION_MARK_CODE_POINT = 0x3005; + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; + const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; + const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc; // Existing functions @@ -121,25 +124,25 @@ return wanakana.toRomaji(text); } - function convertReading(expressionFragment, readingFragment, readingMode) { + function convertReading(expression, reading, readingMode) { switch (readingMode) { case 'hiragana': - return convertKatakanaToHiragana(readingFragment || ''); + return convertKatakanaToHiragana(reading); case 'katakana': - return convertHiraganaToKatakana(readingFragment || ''); + return convertHiraganaToKatakana(reading); case 'romaji': - if (readingFragment) { - return convertToRomaji(readingFragment); + if (reading) { + return convertToRomaji(reading); } else { - if (isStringEntirelyKana(expressionFragment)) { - return convertToRomaji(expressionFragment); + if (isStringEntirelyKana(expression)) { + return convertToRomaji(expression); } } - return readingFragment; + return reading; case 'none': - return null; + return ''; default: - return readingFragment; + return reading; } } @@ -297,7 +300,7 @@ const readingLeft = reading2.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: group.text}].concat(segs); + return [{text: group.text, furigana: ''}].concat(segs); } } } else { @@ -365,13 +368,47 @@ } if (stemLength !== source.length) { - output.push({text: source.substring(stemLength)}); + output.push({text: source.substring(stemLength), furigana: ''}); } return output; } + // Miscellaneous + + function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) { + let result = ''; + let collapseCodePoint = -1; + const hasSourceMap = (sourceMap !== null); + for (const char of text) { + const c = char.codePointAt(0); + if ( + c === HIRAGANA_SMALL_TSU_CODE_POINT || + c === KATAKANA_SMALL_TSU_CODE_POINT || + c === KANA_PROLONGED_SOUND_MARK_CODE_POINT + ) { + if (collapseCodePoint !== c) { + collapseCodePoint = c; + if (!fullCollapse) { + result += char; + continue; + } + } + } else { + collapseCodePoint = -1; + result += char; + continue; + } + + if (hasSourceMap) { + sourceMap.combine(Math.max(0, result.length - 1), 1); + } + } + return result; + } + + // Exports Object.assign(jp, { @@ -383,6 +420,7 @@ convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, - distributeFuriganaInflected + distributeFuriganaInflected, + collapseEmphaticSequences }); })(); |