summaryrefslogtreecommitdiff
path: root/ext/js/language/japanese-wanakana.js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2024-01-28 07:22:47 -0500
committerGitHub <noreply@github.com>2024-01-28 12:22:47 +0000
commitacc013a1a8051d34322f0f5f91d7bdecc0a18843 (patch)
tree601344b2f047f395548ddfb16a83319af10464f9 /ext/js/language/japanese-wanakana.js
parenta51f1ab2dc675a49bfeeb08cc24b97eb8d888e4a (diff)
JapaneseUtil refactor (#555)
* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions
Diffstat (limited to 'ext/js/language/japanese-wanakana.js')
-rw-r--r--ext/js/language/japanese-wanakana.js122
1 files changed, 122 insertions, 0 deletions
diff --git a/ext/js/language/japanese-wanakana.js b/ext/js/language/japanese-wanakana.js
new file mode 100644
index 00000000..b48ab6d6
--- /dev/null
+++ b/ext/js/language/japanese-wanakana.js
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import * as wanakana from '../../lib/wanakana.js';
+
+/**
+ * @param {string} text
+ * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
+ * @param {number} sourceMapStart
+ * @returns {string}
+ */
+function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
+ const result = wanakana.toHiragana(text);
+
+ // Generate source mapping
+ if (sourceMap !== null) {
+ let i = 0;
+ let resultPos = 0;
+ const ii = text.length;
+ while (i < ii) {
+ // Find smallest matching substring
+ let iNext = i + 1;
+ let resultPosNext = result.length;
+ while (iNext < ii) {
+ const t = wanakana.toHiragana(text.substring(0, iNext));
+ if (t === result.substring(0, t.length)) {
+ resultPosNext = t.length;
+ break;
+ }
+ ++iNext;
+ }
+
+ // Merge characters
+ const removals = iNext - i - 1;
+ if (removals > 0) {
+ sourceMap.combine(sourceMapStart, removals);
+ }
+ ++sourceMapStart;
+
+ // Empty elements
+ const additions = resultPosNext - resultPos - 1;
+ for (let j = 0; j < additions; ++j) {
+ sourceMap.insert(sourceMapStart, 0);
+ ++sourceMapStart;
+ }
+
+ i = iNext;
+ resultPos = resultPosNext;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertToKana(text) {
+ return wanakana.toKana(text);
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertToRomaji(text) {
+ return wanakana.toRomaji(text);
+}
+
+/**
+ * @param {string} text
+ * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
+ * @returns {string}
+ */
+export function convertAlphabeticToKana(text, sourceMap = null) {
+ let part = '';
+ let result = '';
+
+ for (const char of text) {
+ // Note: 0x61 is the character code for 'a'
+ let c = /** @type {number} */ (char.codePointAt(0));
+ if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
+ c += (0x61 - 0x41);
+ } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
+ // NOP; c += (0x61 - 0x61);
+ } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
+ c += (0x61 - 0xff21);
+ } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
+ c += (0x61 - 0xff41);
+ } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
+ c = 0x2d; // '-'
+ } else {
+ if (part.length > 0) {
+ result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+ part = '';
+ }
+ result += char;
+ continue;
+ }
+ part += String.fromCodePoint(c);
+ }
+
+ if (part.length > 0) {
+ result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+ }
+ return result;
+}