aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-10-13 21:48:21 -0400
committertoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-10-14 19:42:59 -0400
commit8058e491fe7e5f2d345d7a60dbfa906da72176c8 (patch)
treefeaf662b8d0737dcb05e87a552e2cec62a6c279a
parentda1b764272854086e3952d8b6fbc4c68cb5bbd77 (diff)
Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)
-rw-r--r--ext/mixed/js/japanese.js73
1 files changed, 59 insertions, 14 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
index 801dec84..ee0ac777 100644
--- a/ext/mixed/js/japanese.js
+++ b/ext/mixed/js/japanese.js
@@ -19,10 +19,16 @@ const jp = (() => {
const ITERATION_MARK_CODE_POINT = 0x3005;
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
+ const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
+ const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
const HIRAGANA_RANGE = [0x3040, 0x309f];
const KATAKANA_RANGE = [0x30a0, 0x30ff];
+
+ const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
+ const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
+
const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
@@ -129,6 +135,29 @@ const jp = (() => {
['ン', 'ン--']
]);
+ const VOWEL_TO_KANA_MAPPING = new Map([
+ ['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
+ ['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
+ ['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
+ ['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
+ ['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
+ ['', 'のノ']
+ ]);
+
+ const KANA_TO_VOWEL_MAPPING = (() => {
+ const map = new Map();
+ for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
+ for (const character of characters) {
+ map.set(character, vowel);
+ }
+ }
+ return map;
+ })();
+
+
+ function isCodePointInRange(codePoint, [min, max]) {
+ return (codePoint >= min && codePoint <= max);
+ }
function isCodePointInRanges(codePoint, ranges) {
for (const [min, max] of ranges) {
@@ -139,6 +168,17 @@ const jp = (() => {
return false;
}
+ function getProlongedHiragana(previousCharacter) {
+ switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
+ case 'a': return 'あ';
+ case 'i': return 'い';
+ case 'u': return 'う';
+ case 'e': return 'え';
+ case 'o': return 'う';
+ default: return null;
+ }
+ }
+
function getWanakana() {
try {
if (typeof wanakana !== 'undefined') {
@@ -219,30 +259,35 @@ const jp = (() => {
// Conversion functions
convertKatakanaToHiragana(text) {
- const wanakana = this._getWanakana();
let result = '';
- for (const c of text) {
- if (wanakana.isKatakana(c)) {
- result += wanakana.toHiragana(c);
- } else {
- result += c;
+ const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
+ for (let char of text) {
+ const codePoint = char.codePointAt(0);
+ if (codePoint === KATAKANA_SMALL_KA_CODE_POINT || codePoint === KATAKANA_SMALL_KE_CODE_POINT) {
+ // No change
+ } else if (codePoint === KANA_PROLONGED_SOUND_MARK_CODE_POINT) {
+ if (result.length > 0) {
+ const char2 = getProlongedHiragana(result[result.length - 1]);
+ if (char2 !== null) { char = char2; }
+ }
+ } else if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
+ char = String.fromCodePoint(codePoint + offset);
}
+ result += char;
}
-
return result;
}
convertHiraganaToKatakana(text) {
- const wanakana = this._getWanakana();
let result = '';
- for (const c of text) {
- if (wanakana.isHiragana(c)) {
- result += wanakana.toKatakana(c);
- } else {
- result += c;
+ const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
+ for (let char of text) {
+ const codePoint = char.codePointAt(0);
+ if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
+ char = String.fromCodePoint(codePoint + offset);
}
+ result += char;
}
-
return result;
}