summaryrefslogtreecommitdiff
path: root/ext/mixed/js
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2017-08-26 11:57:34 -0700
committerAlex Yatskov <alex@foosoft.net>2017-08-26 11:57:34 -0700
commit190c749527c5c5f8afec7ead6956a5f3d7c1a422 (patch)
tree2945b2c95657e71b1e99212d9646e6bd30961ab3 /ext/mixed/js
parent49f0243527e504aa3fe196bc6dc759b6948f8a0b (diff)
improved furigana support
Diffstat (limited to 'ext/mixed/js')
-rw-r--r--ext/mixed/js/japanese.js112
1 files changed, 41 insertions, 71 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
index f87ceeb0..341be1d9 100644
--- a/ext/mixed/js/japanese.js
+++ b/ext/mixed/js/japanese.js
@@ -9,7 +9,7 @@
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * MERCHANTABILITY or FITNESS FOR A indexPartCULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
@@ -39,80 +39,50 @@ function jpKatakanaToHiragana(text) {
return result;
}
-function distributeFurigana(word, reading) {
- reading = reading || wanakana.toHiragana(word);
- function span(str, pred) {
- let i = 0;
- while (i < str.length && pred(str[i])) {
- i++;
- }
- return [str.substring(0, i), str.substring(i)];
+function jpDistributeFurigana(expression, reading) {
+ const fallback = [{furigana: reading, text: expression}];
+ if (!reading) {
+ return fallback;
}
- const isKanji = c => jpIsKanji(c) ||
- c === '\u3005'; /* kurikaeshi */
- const isKana = c => jpIsKana(c) ||
- c === '\u30fc'; /* chouonpu */
- function parse(word) {
- const res = [];
- while (word.length > 0) {
- const c = word.charAt(0);
- if (isKana(c)) {
- const [text, rest] = span(word, isKana);
- res.push({ type: 'kana', text });
- word = rest;
- } else if (isKanji(c)) {
- const [text, rest] = span(word, isKanji);
- res.push({ type: 'kanji', text });
- word = rest;
- } else return null;
+
+ const segmentize = (reading, groups) => {
+ if (groups.length === 0) {
+ return [];
}
- return res;
- }
- const fallback = () => [{ text: word, furigana: reading }];
- const parts = parse(word);
- if (!parts) return fallback();
- let parti = 0;
- let readingi = 0;
- const res = [];
- let current = null;
- function backtrack() {
- parti--;
- const prev = res.pop();
- current = prev.furigana;
- }
- while (parti < parts.length) {
- const part = parts[parti];
- switch (part.type) {
- case 'kana':
- if (reading.startsWith(wanakana.toHiragana(part.text), readingi)) {
- if (parti === parts.length - 1 && readingi !== reading.length - part.text.length) {
- backtrack();
- } else {
- readingi += part.text.length;
- res.push({ text: part.text });
- parti++;
- }
- } else backtrack();
- break;
- case 'kanji':
- current = current || '';
- if (parti === parts.length - 1) {
- // last part, consume all
- current += reading.substring(readingi);
- } else {
- const nextText = parts[parti + 1].text;
- const end = reading.indexOf(nextText, readingi + 1); // consume at least one character
- if (end === -1) {
- return fallback();
- }
- current += reading.substring(readingi, end);
- readingi = end;
+ const group = groups[0];
+ if (group.mode === 'kana') {
+ if (reading.startsWith(group.text)) {
+ const readingUsed = reading.substring(0, group.text.length);
+ const readingLeft = reading.substring(group.text.length);
+ const segs = segmentize(readingLeft, groups.splice(1));
+ if (segs) {
+ return [{text: readingUsed}].concat(segs);
+ }
+ }
+ } else {
+ for (let i = reading.length; i >= group.text.length; --i) {
+ const readingUsed = reading.substring(0, i);
+ const readingLeft = reading.substring(i);
+ const segs = segmentize(readingLeft, groups.slice(1));
+ if (segs) {
+ return [{text: group.text, furigana: readingUsed}].concat(segs);
}
- res.push({ text: part.text, furigana: current });
- current = null;
- parti++;
+ }
+ }
+ };
+
+ const groups = [];
+ let modePrev = null;
+ for (const c of expression) {
+ const modeCurr = jpIsKanji(c) || c.charCodeAt(0) === 0x3005 /* noma */ ? 'kanji' : 'kana';
+ if (modeCurr === modePrev) {
+ groups[groups.length - 1].text += c;
+ } else {
+ groups.push({mode: modeCurr, text: c});
+ modePrev = modeCurr;
}
}
- return res;
+
+ return segmentize(reading, groups) || fallback;
}