JapaneseUtil refactor (#555)

* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions
author: toasted-nutbread <toasted-nutbread@users.noreply.github.com> 2024-01-28 07:22:47 -0500
committer: GitHub <noreply@github.com> 2024-01-28 12:22:47 +0000
commit: acc013a1a8051d34322f0f5f91d7bdecc0a18843 (patch)
tree: 601344b2f047f395548ddfb16a83319af10464f9 /ext/js/language/japanese-wanakana.js
parent: a51f1ab2dc675a49bfeeb08cc24b97eb8d888e4a (diff)
1 files changed, 122 insertions, 0 deletions
diff --git a/ext/js/language/japanese-wanakana.js b/ext/js/language/japanese-wanakana.js
new file mode 100644
index 00000000..b48ab6d6
--- /dev/null
+++ b/ext/js/language/japanese-wanakana.js
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import * as wanakana from '../../lib/wanakana.js';
+
+/**
+ * @param {string} text
+ * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
+ * @param {number} sourceMapStart
+ * @returns {string}
+ */
+function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
+    const result = wanakana.toHiragana(text);
+
+    // Generate source mapping
+    if (sourceMap !== null) {
+        let i = 0;
+        let resultPos = 0;
+        const ii = text.length;
+        while (i < ii) {
+            // Find smallest matching substring
+            let iNext = i + 1;
+            let resultPosNext = result.length;
+            while (iNext < ii) {
+                const t = wanakana.toHiragana(text.substring(0, iNext));
+                if (t === result.substring(0, t.length)) {
+                    resultPosNext = t.length;
+                    break;
+                }
+                ++iNext;
+            }
+
+            // Merge characters
+            const removals = iNext - i - 1;
+            if (removals > 0) {
+                sourceMap.combine(sourceMapStart, removals);
+            }
+            ++sourceMapStart;
+
+            // Empty elements
+            const additions = resultPosNext - resultPos - 1;
+            for (let j = 0; j < additions; ++j) {
+                sourceMap.insert(sourceMapStart, 0);
+                ++sourceMapStart;
+            }
+
+            i = iNext;
+            resultPos = resultPosNext;
+        }
+    }
+
+    return result;
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertToKana(text) {
+    return wanakana.toKana(text);
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertToRomaji(text) {
+    return wanakana.toRomaji(text);
+}
+
+/**
+ * @param {string} text
+ * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
+ * @returns {string}
+ */
+export function convertAlphabeticToKana(text, sourceMap = null) {
+    let part = '';
+    let result = '';
+
+    for (const char of text) {
+        // Note: 0x61 is the character code for 'a'
+        let c = /** @type {number} */ (char.codePointAt(0));
+        if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
+            c += (0x61 - 0x41);
+        } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
+            // NOP; c += (0x61 - 0x61);
+        } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
+            c += (0x61 - 0xff21);
+        } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
+            c += (0x61 - 0xff41);
+        } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
+            c = 0x2d; // '-'
+        } else {
+            if (part.length > 0) {
+                result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+                part = '';
+            }
+            result += char;
+            continue;
+        }
+        part += String.fromCodePoint(c);
+    }
+
+    if (part.length > 0) {
+        result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+    }
+    return result;
+}
author	toasted-nutbread <toasted-nutbread@users.noreply.github.com>	2024-01-28 07:22:47 -0500
committer	GitHub <noreply@github.com>	2024-01-28 12:22:47 +0000
commit	acc013a1a8051d34322f0f5f91d7bdecc0a18843 (patch)
tree	601344b2f047f395548ddfb16a83319af10464f9 /ext/js/language/japanese-wanakana.js
parent	a51f1ab2dc675a49bfeeb08cc24b97eb8d888e4a (diff)