1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
/*
* Copyright (C) 2016 Alex Yatskov <alex@foosoft.net>
* Author: Alex Yatskov <alex@foosoft.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
function jpIsKanji(c) {
const code = c.charCodeAt(0);
return code >= 0x4e00 && code < 0x9fb0 || code >= 0x3400 && code < 0x4dc0;
}
function jpIsKana(c) {
return wanakana.isKana(c);
}
function jpIsJapaneseText(text) {
for (const c of text) {
if (jpIsKanji(c) || jpIsKana(c)) {
return true;
}
}
return false;
}
function jpKatakanaToHiragana(text) {
let result = '';
for (const c of text) {
if (wanakana.isKatakana(c)) {
result += wanakana.toHiragana(c);
} else {
result += c;
}
}
return result;
}
function jpDistributeFurigana(expression, reading) {
const fallback = [{furigana: reading, text: expression}];
if (!reading) {
return fallback;
}
const segmentize = (reading, groups) => {
if (groups.length === 0) {
return [];
}
const group = groups[0];
if (group.mode === 'kana') {
if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) {
const readingLeft = reading.substring(group.text.length);
const segs = segmentize(readingLeft, groups.splice(1));
if (segs) {
return [{text: group.text}].concat(segs);
}
}
} else {
for (let i = reading.length; i >= group.text.length; --i) {
const readingUsed = reading.substring(0, i);
const readingLeft = reading.substring(i);
const segs = segmentize(readingLeft, groups.slice(1));
if (segs) {
return [{text: group.text, furigana: readingUsed}].concat(segs);
}
}
}
};
const groups = [];
let modePrev = null;
for (const c of expression) {
const modeCurr = jpIsKanji(c) || c.charCodeAt(0) === 0x3005 /* noma */ ? 'kanji' : 'kana';
if (modeCurr === modePrev) {
groups[groups.length - 1].text += c;
} else {
groups.push({mode: modeCurr, text: c});
modePrev = modeCurr;
}
}
return segmentize(reading, groups) || fallback;
}
function jpDistributeFuriganaInflected(expression, reading, source) {
const output = [];
let stemLength = 0;
const shortest = Math.min(source.length, expression.length);
const sourceHiragana = jpKatakanaToHiragana(source);
const expressionHiragana = jpKatakanaToHiragana(expression);
while (
stemLength < shortest &&
// sometimes an expression can use a kanji that's different from the source
(!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength]))
) {
++stemLength;
}
const offset = source.length - stemLength;
for (const segment of jpDistributeFurigana(
source.slice(0, offset === 0 ? source.length : source.length - offset),
reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
)) {
output.push(segment);
}
if (stemLength !== source.length) {
output.push({text: source.slice(stemLength)});
}
return output;
}
|