diff options
author | Yoitsumi <yoitsumi@gmail.com> | 2017-08-20 16:07:55 +0200 |
---|---|---|
committer | Yoitsumi <yoitsumi@gmail.com> | 2017-08-20 16:07:55 +0200 |
commit | c89678f5dbcb892225ce3781c350fc630d776373 (patch) | |
tree | 384f08c5084e4e0c9f31755b616cc61574d5a913 /ext/mixed/js/japanese.js | |
parent | c4525027daee6e3fdbe206ae1aab0a9c5405d753 (diff) |
Improve {furigana} marker for anki export
Diffstat (limited to 'ext/mixed/js/japanese.js')
-rw-r--r-- | ext/mixed/js/japanese.js | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index c11e955b..a8f72059 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -38,3 +38,81 @@ function jpKatakanaToHiragana(text) { return result; } + +function distributeFurigana(word, reading) { + reading = reading || wanakana.toHiragana(word); + function span(str, pred) { + let i = 0; + while (i < str.length && pred(str[i])) { + i++; + } + return [str.substring(0, i), str.substring(i)]; + } + const isKanji = c => jpIsKanji(c) || + c == "\u3005"; /* kurikaeshi */ + const isKana = c => jpIsKana(c) || + c == "\u30fc"; /* chouonpu */ + function parse(word) { + const res = []; + while (word.length > 0) { + const c = word.charAt(0); + if (isKana(c)) { + const [text, rest] = span(word, isKana); + res.push({ type: "kana", text }); + word = rest; + } else if (isKanji(c)) { + const [text, rest] = span(word, isKanji); + res.push({ type: "kanji", text }); + word = rest; + } else return null; + } + return res; + } + + let fallback = () => [{ text: word, furigana: reading }]; + let parts = parse(word); + if (parts == null) return fallback(); + let parti = 0; + let readingi = 0; + let res = []; + let current = null; + function backtrack() { + parti--; + const prev = res.pop(); + current = prev.furigana; + } + while (parti < parts.length) { + const part = parts[parti]; + switch (part.type) { + case 'kana': + if (reading.startsWith(wanakana.toHiragana(part.text), readingi)) { + if (parti == parts.length - 1 && readingi != reading.length - part.text.length) { + backtrack(); + } else { + readingi += part.text.length; + res.push({ text: part.text }); + parti++; + } + } else backtrack(); + break; + case "kanji": + current = current || ""; + if (parti == parts.length - 1) { + // last part, consume all + current += reading.substring(readingi); + } else { + const nextText = parts[parti + 1].text; + let end = reading.indexOf(nextText, readingi + 1); // consume at least one character + if (end == -1) { + return fallback(); + } + current += reading.substring(readingi, end); + readingi = end; + } + res.push({ text: part.text, furigana: current }); + current = null; + parti++; + } + } + return res; +} |