summaryrefslogtreecommitdiff
path: root/ext/mixed/js/japanese.js
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2019-12-03 18:30:22 -0800
committerAlex Yatskov <alex@foosoft.net>2019-12-03 18:30:22 -0800
commitf9ea6206550ceee625ea93215a6e08d45a750086 (patch)
tree803fe11a788a631076b3fb11a98e50bb8b454396 /ext/mixed/js/japanese.js
parent08ad2779678cd447bd747c2b155ef9b5135fdf5d (diff)
parent3975aabf4dc283d49ec46d0ed7ead982b9fa7441 (diff)
Merge branch 'master' into testing
Diffstat (limited to 'ext/mixed/js/japanese.js')
-rw-r--r--ext/mixed/js/japanese.js100
1 files changed, 94 insertions, 6 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
index 9f401da7..8b841b2e 100644
--- a/ext/mixed/js/japanese.js
+++ b/ext/mixed/js/japanese.js
@@ -26,6 +26,15 @@ function jpIsKana(c) {
return wanakana.isKana(c);
}
+function jpIsJapaneseText(text) {
+ for (const c of text) {
+ if (jpIsKanji(c) || jpIsKana(c)) {
+ return true;
+ }
+ }
+ return false;
+}
+
function jpKatakanaToHiragana(text) {
let result = '';
for (const c of text) {
@@ -39,36 +48,84 @@ function jpKatakanaToHiragana(text) {
return result;
}
+function jpHiraganaToKatakana(text) {
+ let result = '';
+ for (const c of text) {
+ if (wanakana.isHiragana(c)) {
+ result += wanakana.toKatakana(c);
+ } else {
+ result += c;
+ }
+ }
+
+ return result;
+}
+
+function jpToRomaji(text) {
+ return wanakana.toRomaji(text);
+}
+
+function jpConvertReading(expressionFragment, readingFragment, readingMode) {
+ switch (readingMode) {
+ case 'hiragana':
+ return jpKatakanaToHiragana(readingFragment || '');
+ case 'katakana':
+ return jpHiraganaToKatakana(readingFragment || '');
+ case 'romaji':
+ if (readingFragment) {
+ return jpToRomaji(readingFragment);
+ } else {
+ if (jpIsKana(expressionFragment)) {
+ return jpToRomaji(expressionFragment);
+ }
+ }
+ return readingFragment;
+ default:
+ return readingFragment;
+ }
+}
+
function jpDistributeFurigana(expression, reading) {
const fallback = [{furigana: reading, text: expression}];
if (!reading) {
return fallback;
}
+ let isAmbiguous = false;
const segmentize = (reading, groups) => {
- if (groups.length === 0) {
+ if (groups.length === 0 || isAmbiguous) {
return [];
}
const group = groups[0];
if (group.mode === 'kana') {
- if (reading.startsWith(group.text)) {
- const readingUsed = reading.substring(0, group.text.length);
+ if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) {
const readingLeft = reading.substring(group.text.length);
const segs = segmentize(readingLeft, groups.splice(1));
if (segs) {
- return [{text: readingUsed}].concat(segs);
+ return [{text: group.text}].concat(segs);
}
}
} else {
+ let foundSegments = null;
for (let i = reading.length; i >= group.text.length; --i) {
const readingUsed = reading.substring(0, i);
const readingLeft = reading.substring(i);
const segs = segmentize(readingLeft, groups.slice(1));
if (segs) {
- return [{text: group.text, furigana: readingUsed}].concat(segs);
+ if (foundSegments !== null) {
+ // more than one way to segmentize the tail, mark as ambiguous
+ isAmbiguous = true;
+ return null;
+ }
+ foundSegments = [{text: group.text, furigana: readingUsed}].concat(segs);
+ }
+ // there is only one way to segmentize the last non-kana group
+ if (groups.length === 1) {
+ break;
}
}
+ return foundSegments;
}
};
@@ -84,5 +141,36 @@ function jpDistributeFurigana(expression, reading) {
}
}
- return segmentize(reading, groups) || fallback;
+ const segments = segmentize(reading, groups);
+ if (segments && !isAmbiguous) {
+ return segments;
+ }
+ return fallback;
+}
+
+function jpDistributeFuriganaInflected(expression, reading, source) {
+ const output = [];
+
+ let stemLength = 0;
+ const shortest = Math.min(source.length, expression.length);
+ const sourceHiragana = jpKatakanaToHiragana(source);
+ const expressionHiragana = jpKatakanaToHiragana(expression);
+ while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) {
+ ++stemLength;
+ }
+ const offset = source.length - stemLength;
+
+ const stemExpression = source.slice(0, source.length - offset);
+ const stemReading = reading.slice(
+ 0, offset === 0 ? reading.length : reading.length - expression.length + stemLength
+ );
+ for (const segment of jpDistributeFurigana(stemExpression, stemReading)) {
+ output.push(segment);
+ }
+
+ if (stemLength !== source.length) {
+ output.push({text: source.slice(stemLength)});
+ }
+
+ return output;
}