summaryrefslogtreecommitdiff
path: root/ext/bg/js/japanese.js
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2020-04-18 11:05:35 -0700
committerAlex Yatskov <alex@foosoft.net>2020-04-18 11:05:35 -0700
commitf2186c51e4ef219d158735d30a32bbf3e49c4e1a (patch)
tree61de35513b6182e83b626c1571bc2aa97234e3b0 /ext/bg/js/japanese.js
parentb3e79d0e396353de2cbefb945412bfda4bd6ca99 (diff)
parent9e7750125c40d1c4121e8d015957606721bede7c (diff)
Merge branch 'master' into testing
Diffstat (limited to 'ext/bg/js/japanese.js')
-rw-r--r--ext/bg/js/japanese.js64
1 files changed, 51 insertions, 13 deletions
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index 5c49cca7..ac81acb5 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -82,6 +82,9 @@
const ITERATION_MARK_CODE_POINT = 0x3005;
+ const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
+ const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
+ const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
// Existing functions
@@ -121,25 +124,25 @@
return wanakana.toRomaji(text);
}
- function convertReading(expressionFragment, readingFragment, readingMode) {
+ function convertReading(expression, reading, readingMode) {
switch (readingMode) {
case 'hiragana':
- return convertKatakanaToHiragana(readingFragment || '');
+ return convertKatakanaToHiragana(reading);
case 'katakana':
- return convertHiraganaToKatakana(readingFragment || '');
+ return convertHiraganaToKatakana(reading);
case 'romaji':
- if (readingFragment) {
- return convertToRomaji(readingFragment);
+ if (reading) {
+ return convertToRomaji(reading);
} else {
- if (isStringEntirelyKana(expressionFragment)) {
- return convertToRomaji(expressionFragment);
+ if (isStringEntirelyKana(expression)) {
+ return convertToRomaji(expression);
}
}
- return readingFragment;
+ return reading;
case 'none':
- return null;
+ return '';
default:
- return readingFragment;
+ return reading;
}
}
@@ -297,7 +300,7 @@
const readingLeft = reading2.substring(group.text.length);
const segs = segmentize(readingLeft, groups.splice(1));
if (segs) {
- return [{text: group.text}].concat(segs);
+ return [{text: group.text, furigana: ''}].concat(segs);
}
}
} else {
@@ -365,13 +368,47 @@
}
if (stemLength !== source.length) {
- output.push({text: source.substring(stemLength)});
+ output.push({text: source.substring(stemLength), furigana: ''});
}
return output;
}
+ // Miscellaneous
+
+ function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) {
+ let result = '';
+ let collapseCodePoint = -1;
+ const hasSourceMap = (sourceMap !== null);
+ for (const char of text) {
+ const c = char.codePointAt(0);
+ if (
+ c === HIRAGANA_SMALL_TSU_CODE_POINT ||
+ c === KATAKANA_SMALL_TSU_CODE_POINT ||
+ c === KANA_PROLONGED_SOUND_MARK_CODE_POINT
+ ) {
+ if (collapseCodePoint !== c) {
+ collapseCodePoint = c;
+ if (!fullCollapse) {
+ result += char;
+ continue;
+ }
+ }
+ } else {
+ collapseCodePoint = -1;
+ result += char;
+ continue;
+ }
+
+ if (hasSourceMap) {
+ sourceMap.combine(Math.max(0, result.length - 1), 1);
+ }
+ }
+ return result;
+ }
+
+
// Exports
Object.assign(jp, {
@@ -383,6 +420,7 @@
convertHalfWidthKanaToFullWidth,
convertAlphabeticToKana,
distributeFurigana,
- distributeFuriganaInflected
+ distributeFuriganaInflected,
+ collapseEmphaticSequences
});
})();