diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-02-26 23:23:16 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-02-26 23:23:16 -0500 | 
| commit | 0bf0620c3579a5fe94c529673db105a83d6c3755 (patch) | |
| tree | 8a49078a858c4e8b41959f93bb0a8aea162e97cc /ext/js | |
| parent | b994414b14b224c02359b5e31f6994653a3d4458 (diff) | |
Improve kana segmentation (#1446)
* Improve edge case furigana distribution for mixed hiragana/katakana
* Update/add tests
Diffstat (limited to 'ext/js')
| -rw-r--r-- | ext/js/language/japanese-util.js | 23 | 
1 files changed, 21 insertions, 2 deletions
| diff --git a/ext/js/language/japanese-util.js b/ext/js/language/japanese-util.js index c2ce9627..e47cdf55 100644 --- a/ext/js/language/japanese-util.js +++ b/ext/js/language/japanese-util.js @@ -521,8 +521,11 @@ const JapaneseUtil = (() => {                          groupsStart + 1                      );                      if (segments !== null) { -                        const furigana = reading.startsWith(text) ? '' : reading.substring(0, textLength); -                        segments.unshift(this._createFuriganaSegment(text, furigana)); +                        if (reading.startsWith(text)) { +                            segments.unshift(this._createFuriganaSegment(text, '')); +                        } else { +                            segments.unshift(...this._getFuriganaKanaSegments(text, reading)); +                        }                          return segments;                      }                  } @@ -554,6 +557,22 @@ const JapaneseUtil = (() => {              }          } +        _getFuriganaKanaSegments(text, reading) { +            const textLength = text.length; +            const newSegments = []; +            let start = 0; +            let state = (reading[0] === text[0]); +            for (let i = 1; i < textLength; ++i) { +                const newState = (reading[i] === text[i]); +                if (state === newState) { continue; } +                newSegments.push(this._createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i))); +                state = newState; +                start = i; +            } +            newSegments.push(this._createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength))); +            return newSegments; +        } +          _getWanakana() {              const wanakana = this._wanakana;              if (wanakana === null) { throw new Error('Functions which use WanaKana are not supported in this context'); } |