add mecab support

author: siikamiika <siikamiika@users.noreply.github.com> 2019-11-03 05:08:57 +0200
committer: siikamiika <siikamiika@users.noreply.github.com> 2019-11-23 17:45:44 +0200
commit: 41020289ab68ef22a0691a9f268a79d6a706df6b (patch)
tree: 0cd10c38b37cc475dc306c5cf95e8a2e4247a98a /ext/mixed/js/japanese.js
parent: 3881457e4ed3f9c7833ac21a5e7fc44c2ba00b0f (diff)
1 files changed, 32 insertions, 3 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
index d24f56a6..78c419b2 100644
--- a/ext/mixed/js/japanese.js
+++ b/ext/mixed/js/japanese.js
@@ -61,12 +61,11 @@ function jpDistributeFurigana(expression, reading) {
 
         const group = groups[0];
         if (group.mode === 'kana') {
-            if (reading.startsWith(group.text)) {
-                const readingUsed = reading.substring(0, group.text.length);
+            if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) {
                 const readingLeft = reading.substring(group.text.length);
                 const segs = segmentize(readingLeft, groups.splice(1));
                 if (segs) {
-                    return [{text: readingUsed}].concat(segs);
+                    return [{text: group.text}].concat(segs);
                 }
             }
         } else {
@@ -95,3 +94,33 @@ function jpDistributeFurigana(expression, reading) {
 
     return segmentize(reading, groups) || fallback;
 }
+
+function jpDistributeFuriganaInflected(expression, reading, source) {
+    const output = [];
+
+    let stemLength = 0;
+    const shortest = Math.min(source.length, expression.length);
+    const sourceHiragana = jpKatakanaToHiragana(source);
+    const expressionHiragana = jpKatakanaToHiragana(expression);
+    while (
+        stemLength < shortest &&
+        // sometimes an expression can use a kanji that's different from the source
+        (!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength]))
+    ) {
+        ++stemLength;
+    }
+    const offset = source.length - stemLength;
+
+    for (const segment of jpDistributeFurigana(
+        source.slice(0, offset === 0 ? source.length : source.length - offset),
+        reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
+    )) {
+        output.push(segment);
+    }
+
+    if (stemLength !== source.length) {
+        output.push({text: source.slice(stemLength)});
+    }
+
+    return output;
+}
author	siikamiika <siikamiika@users.noreply.github.com>	2019-11-03 05:08:57 +0200
committer	siikamiika <siikamiika@users.noreply.github.com>	2019-11-23 17:45:44 +0200
commit	41020289ab68ef22a0691a9f268a79d6a706df6b (patch)
tree	0cd10c38b37cc475dc306c5cf95e8a2e4247a98a /ext/mixed/js/japanese.js
parent	3881457e4ed3f9c7833ac21a5e7fc44c2ba00b0f (diff)