diff options
author | Alex Yatskov <alex@foosoft.net> | 2019-12-03 18:30:22 -0800 |
---|---|---|
committer | Alex Yatskov <alex@foosoft.net> | 2019-12-03 18:30:22 -0800 |
commit | f9ea6206550ceee625ea93215a6e08d45a750086 (patch) | |
tree | 803fe11a788a631076b3fb11a98e50bb8b454396 /ext/mixed/js/japanese.js | |
parent | 08ad2779678cd447bd747c2b155ef9b5135fdf5d (diff) | |
parent | 3975aabf4dc283d49ec46d0ed7ead982b9fa7441 (diff) |
Merge branch 'master' into testing
Diffstat (limited to 'ext/mixed/js/japanese.js')
-rw-r--r-- | ext/mixed/js/japanese.js | 100 |
1 files changed, 94 insertions, 6 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index 9f401da7..8b841b2e 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -26,6 +26,15 @@ function jpIsKana(c) { return wanakana.isKana(c); } +function jpIsJapaneseText(text) { + for (const c of text) { + if (jpIsKanji(c) || jpIsKana(c)) { + return true; + } + } + return false; +} + function jpKatakanaToHiragana(text) { let result = ''; for (const c of text) { @@ -39,36 +48,84 @@ function jpKatakanaToHiragana(text) { return result; } +function jpHiraganaToKatakana(text) { + let result = ''; + for (const c of text) { + if (wanakana.isHiragana(c)) { + result += wanakana.toKatakana(c); + } else { + result += c; + } + } + + return result; +} + +function jpToRomaji(text) { + return wanakana.toRomaji(text); +} + +function jpConvertReading(expressionFragment, readingFragment, readingMode) { + switch (readingMode) { + case 'hiragana': + return jpKatakanaToHiragana(readingFragment || ''); + case 'katakana': + return jpHiraganaToKatakana(readingFragment || ''); + case 'romaji': + if (readingFragment) { + return jpToRomaji(readingFragment); + } else { + if (jpIsKana(expressionFragment)) { + return jpToRomaji(expressionFragment); + } + } + return readingFragment; + default: + return readingFragment; + } +} + function jpDistributeFurigana(expression, reading) { const fallback = [{furigana: reading, text: expression}]; if (!reading) { return fallback; } + let isAmbiguous = false; const segmentize = (reading, groups) => { - if (groups.length === 0) { + if (groups.length === 0 || isAmbiguous) { return []; } const group = groups[0]; if (group.mode === 'kana') { - if (reading.startsWith(group.text)) { - const readingUsed = reading.substring(0, group.text.length); + if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) { const readingLeft = reading.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: readingUsed}].concat(segs); + return [{text: group.text}].concat(segs); } } } else { + let foundSegments = null; for (let i = reading.length; i >= group.text.length; --i) { const readingUsed = reading.substring(0, i); const readingLeft = reading.substring(i); const segs = segmentize(readingLeft, groups.slice(1)); if (segs) { - return [{text: group.text, furigana: readingUsed}].concat(segs); + if (foundSegments !== null) { + // more than one way to segmentize the tail, mark as ambiguous + isAmbiguous = true; + return null; + } + foundSegments = [{text: group.text, furigana: readingUsed}].concat(segs); + } + // there is only one way to segmentize the last non-kana group + if (groups.length === 1) { + break; } } + return foundSegments; } }; @@ -84,5 +141,36 @@ function jpDistributeFurigana(expression, reading) { } } - return segmentize(reading, groups) || fallback; + const segments = segmentize(reading, groups); + if (segments && !isAmbiguous) { + return segments; + } + return fallback; +} + +function jpDistributeFuriganaInflected(expression, reading, source) { + const output = []; + + let stemLength = 0; + const shortest = Math.min(source.length, expression.length); + const sourceHiragana = jpKatakanaToHiragana(source); + const expressionHiragana = jpKatakanaToHiragana(expression); + while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) { + ++stemLength; + } + const offset = source.length - stemLength; + + const stemExpression = source.slice(0, source.length - offset); + const stemReading = reading.slice( + 0, offset === 0 ? reading.length : reading.length - expression.length + stemLength + ); + for (const segment of jpDistributeFurigana(stemExpression, stemReading)) { + output.push(segment); + } + + if (stemLength !== source.length) { + output.push({text: source.slice(stemLength)}); + } + + return output; } |