diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-02-26 23:23:16 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-26 23:23:16 -0500 |
commit | 0bf0620c3579a5fe94c529673db105a83d6c3755 (patch) | |
tree | 8a49078a858c4e8b41959f93bb0a8aea162e97cc /ext | |
parent | b994414b14b224c02359b5e31f6994653a3d4458 (diff) |
Improve kana segmentation (#1446)
* Improve edge case furigana distribution for mixed hiragana/katakana
* Update/add tests
Diffstat (limited to 'ext')
-rw-r--r-- | ext/js/language/japanese-util.js | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/ext/js/language/japanese-util.js b/ext/js/language/japanese-util.js index c2ce9627..e47cdf55 100644 --- a/ext/js/language/japanese-util.js +++ b/ext/js/language/japanese-util.js @@ -521,8 +521,11 @@ const JapaneseUtil = (() => { groupsStart + 1 ); if (segments !== null) { - const furigana = reading.startsWith(text) ? '' : reading.substring(0, textLength); - segments.unshift(this._createFuriganaSegment(text, furigana)); + if (reading.startsWith(text)) { + segments.unshift(this._createFuriganaSegment(text, '')); + } else { + segments.unshift(...this._getFuriganaKanaSegments(text, reading)); + } return segments; } } @@ -554,6 +557,22 @@ const JapaneseUtil = (() => { } } + _getFuriganaKanaSegments(text, reading) { + const textLength = text.length; + const newSegments = []; + let start = 0; + let state = (reading[0] === text[0]); + for (let i = 1; i < textLength; ++i) { + const newState = (reading[i] === text[i]); + if (state === newState) { continue; } + newSegments.push(this._createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i))); + state = newState; + start = i; + } + newSegments.push(this._createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength))); + return newSegments; + } + _getWanakana() { const wanakana = this._wanakana; if (wanakana === null) { throw new Error('Functions which use WanaKana are not supported in this context'); } |