diff options
author | siikamiika <siikamiika@users.noreply.github.com> | 2019-11-03 05:08:57 +0200 |
---|---|---|
committer | siikamiika <siikamiika@users.noreply.github.com> | 2019-11-23 17:45:44 +0200 |
commit | 41020289ab68ef22a0691a9f268a79d6a706df6b (patch) | |
tree | 0cd10c38b37cc475dc306c5cf95e8a2e4247a98a /ext/mixed/js/japanese.js | |
parent | 3881457e4ed3f9c7833ac21a5e7fc44c2ba00b0f (diff) |
add mecab support
Diffstat (limited to 'ext/mixed/js/japanese.js')
-rw-r--r-- | ext/mixed/js/japanese.js | 35 |
1 files changed, 32 insertions, 3 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index d24f56a6..78c419b2 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -61,12 +61,11 @@ function jpDistributeFurigana(expression, reading) { const group = groups[0]; if (group.mode === 'kana') { - if (reading.startsWith(group.text)) { - const readingUsed = reading.substring(0, group.text.length); + if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) { const readingLeft = reading.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: readingUsed}].concat(segs); + return [{text: group.text}].concat(segs); } } } else { @@ -95,3 +94,33 @@ function jpDistributeFurigana(expression, reading) { return segmentize(reading, groups) || fallback; } + +function jpDistributeFuriganaInflected(expression, reading, source) { + const output = []; + + let stemLength = 0; + const shortest = Math.min(source.length, expression.length); + const sourceHiragana = jpKatakanaToHiragana(source); + const expressionHiragana = jpKatakanaToHiragana(expression); + while ( + stemLength < shortest && + // sometimes an expression can use a kanji that's different from the source + (!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength])) + ) { + ++stemLength; + } + const offset = source.length - stemLength; + + for (const segment of jpDistributeFurigana( + source.slice(0, offset === 0 ? source.length : source.length - offset), + reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) + )) { + output.push(segment); + } + + if (stemLength !== source.length) { + output.push({text: source.slice(stemLength)}); + } + + return output; +} |