summaryrefslogtreecommitdiff
path: root/ext/mixed
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2017-08-20 09:34:20 -0700
committerAlex Yatskov <alex@foosoft.net>2017-08-20 09:34:20 -0700
commitb38450b6ce8b747685965153b266d9f4e49d9b7c (patch)
tree384f08c5084e4e0c9f31755b616cc61574d5a913 /ext/mixed
parentc4525027daee6e3fdbe206ae1aab0a9c5405d753 (diff)
parentc89678f5dbcb892225ce3781c350fc630d776373 (diff)
Merge branch 'kmltml-topic/furigana' into dev
Diffstat (limited to 'ext/mixed')
-rw-r--r--ext/mixed/js/japanese.js78
1 files changed, 78 insertions, 0 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
index c11e955b..a8f72059 100644
--- a/ext/mixed/js/japanese.js
+++ b/ext/mixed/js/japanese.js
@@ -38,3 +38,81 @@ function jpKatakanaToHiragana(text) {
return result;
}
+
+function distributeFurigana(word, reading) {
+ reading = reading || wanakana.toHiragana(word);
+ function span(str, pred) {
+ let i = 0;
+ while (i < str.length && pred(str[i])) {
+ i++;
+ }
+ return [str.substring(0, i), str.substring(i)];
+ }
+ const isKanji = c => jpIsKanji(c) ||
+ c == "\u3005"; /* kurikaeshi */
+ const isKana = c => jpIsKana(c) ||
+ c == "\u30fc"; /* chouonpu */
+ function parse(word) {
+ const res = [];
+ while (word.length > 0) {
+ const c = word.charAt(0);
+ if (isKana(c)) {
+ const [text, rest] = span(word, isKana);
+ res.push({ type: "kana", text });
+ word = rest;
+ } else if (isKanji(c)) {
+ const [text, rest] = span(word, isKanji);
+ res.push({ type: "kanji", text });
+ word = rest;
+ } else return null;
+ }
+ return res;
+ }
+
+ let fallback = () => [{ text: word, furigana: reading }];
+ let parts = parse(word);
+ if (parts == null) return fallback();
+ let parti = 0;
+ let readingi = 0;
+ let res = [];
+ let current = null;
+ function backtrack() {
+ parti--;
+ const prev = res.pop();
+ current = prev.furigana;
+ }
+ while (parti < parts.length) {
+ const part = parts[parti];
+ switch (part.type) {
+ case 'kana':
+ if (reading.startsWith(wanakana.toHiragana(part.text), readingi)) {
+ if (parti == parts.length - 1 && readingi != reading.length - part.text.length) {
+ backtrack();
+ } else {
+ readingi += part.text.length;
+ res.push({ text: part.text });
+ parti++;
+ }
+ } else backtrack();
+ break;
+ case "kanji":
+ current = current || "";
+ if (parti == parts.length - 1) {
+ // last part, consume all
+ current += reading.substring(readingi);
+ } else {
+ const nextText = parts[parti + 1].text;
+ let end = reading.indexOf(nextText, readingi + 1); // consume at least one character
+ if (end == -1) {
+ return fallback();
+ }
+ current += reading.substring(readingi, end);
+ readingi = end;
+ }
+ res.push({ text: part.text, furigana: current });
+ current = null;
+ parti++;
+ }
+ }
+ return res;
+}