aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-03-21 13:18:34 -0400
committertoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-03-21 13:18:34 -0400
commit77a2cc60e9a4a89da354cadb1bf060204ee3b951 (patch)
tree32c35aaee08a247e95e65e8459c779c4837d0f62
parent487d4b239b88fd57fefb0ec3a9d1bd0d25984660 (diff)
Move basic string/character testing functions into a mixed/js/japanese.js
-rw-r--r--ext/bg/background.html1
-rw-r--r--ext/bg/js/japanese.js106
-rw-r--r--ext/bg/search.html1
-rw-r--r--ext/bg/settings.html1
-rw-r--r--ext/mixed/js/japanese.js124
-rw-r--r--test/test-japanese.js1
6 files changed, 135 insertions, 99 deletions
diff --git a/ext/bg/background.html b/ext/bg/background.html
index 44abe8fd..f7cf6e55 100644
--- a/ext/bg/background.html
+++ b/ext/bg/background.html
@@ -20,6 +20,7 @@
<script src="/mixed/js/core.js"></script>
<script src="/mixed/js/dom.js"></script>
+ <script src="/mixed/js/japanese.js"></script>
<script src="/bg/js/anki.js"></script>
<script src="/bg/js/anki-note-builder.js"></script>
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index d2a577e6..c5873cf1 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -17,10 +17,11 @@
*/
/* global
+ * jp
* wanakana
*/
-const jp = (() => {
+(() => {
const HALFWIDTH_KATAKANA_MAPPING = new Map([
['ヲ', 'ヲヺ-'],
['ァ', 'ァ--'],
@@ -80,101 +81,13 @@ const jp = (() => {
['ン', 'ン--']
]);
- const HIRAGANA_RANGE = [0x3040, 0x309f];
- const KATAKANA_RANGE = [0x30a0, 0x30ff];
- const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
-
- const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
- const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
- const CJK_UNIFIED_IDEOGRAPHS_RANGES = [
- CJK_UNIFIED_IDEOGRAPHS_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
- CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
- ];
-
const ITERATION_MARK_CODE_POINT = 0x3005;
- // Japanese character ranges, roughly ordered in order of expected frequency
- const JAPANESE_RANGES = [
- HIRAGANA_RANGE,
- KATAKANA_RANGE,
-
- ...CJK_UNIFIED_IDEOGRAPHS_RANGES,
-
- [0xff66, 0xff9f], // Halfwidth katakana
-
- [0x30fb, 0x30fc], // Katakana punctuation
- [0xff61, 0xff65], // Kana punctuation
- [0x3000, 0x303f], // CJK punctuation
-
- [0xff10, 0xff19], // Fullwidth numbers
- [0xff21, 0xff3a], // Fullwidth upper case Latin letters
- [0xff41, 0xff5a], // Fullwidth lower case Latin letters
-
- [0xff01, 0xff0f], // Fullwidth punctuation 1
- [0xff1a, 0xff1f], // Fullwidth punctuation 2
- [0xff3b, 0xff3f], // Fullwidth punctuation 3
- [0xff5b, 0xff60], // Fullwidth punctuation 4
- [0xffe0, 0xffee] // Currency markers
- ];
-
-
- // Character code testing functions
-
- function isCodePointKanji(codePoint) {
- return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES);
- }
-
- function isCodePointKana(codePoint) {
- return isCodePointInRanges(codePoint, KANA_RANGES);
- }
-
- function isCodePointJapanese(codePoint) {
- return isCodePointInRanges(codePoint, JAPANESE_RANGES);
- }
- function isCodePointInRanges(codePoint, ranges) {
- for (const [min, max] of ranges) {
- if (codePoint >= min && codePoint <= max) {
- return true;
- }
- }
- return false;
- }
+ // Existing functions
-
- // String testing functions
-
- function isStringEntirelyKana(str) {
- if (str.length === 0) { return false; }
- for (const c of str) {
- if (!isCodePointKana(c.codePointAt(0))) {
- return false;
- }
- }
- return true;
- }
-
- function isStringPartiallyJapanese(str) {
- if (str.length === 0) { return false; }
- for (const c of str) {
- if (isCodePointJapanese(c.codePointAt(0))) {
- return true;
- }
- }
- return false;
- }
+ const isCodePointKanji = jp.isCodePointKanji;
+ const isStringEntirelyKana = jp.isStringEntirelyKana;
// Conversion functions
@@ -469,12 +382,7 @@ const jp = (() => {
// Exports
- return {
- isCodePointKanji,
- isCodePointKana,
- isCodePointJapanese,
- isStringEntirelyKana,
- isStringPartiallyJapanese,
+ Object.assign(jp, {
convertKatakanaToHiragana,
convertHiraganaToKatakana,
convertToRomaji,
@@ -484,5 +392,5 @@ const jp = (() => {
convertAlphabeticToKana,
distributeFurigana,
distributeFuriganaInflected
- };
+ });
})();
diff --git a/ext/bg/search.html b/ext/bg/search.html
index f4c1a737..eacc1893 100644
--- a/ext/bg/search.html
+++ b/ext/bg/search.html
@@ -74,6 +74,7 @@
<script src="/mixed/js/core.js"></script>
<script src="/mixed/js/dom.js"></script>
<script src="/mixed/js/api.js"></script>
+ <script src="/mixed/js/japanese.js"></script>
<script src="/bg/js/dictionary.js"></script>
<script src="/bg/js/handlebars.js"></script>
diff --git a/ext/bg/settings.html b/ext/bg/settings.html
index 0db76d71..cfe20be4 100644
--- a/ext/bg/settings.html
+++ b/ext/bg/settings.html
@@ -1088,6 +1088,7 @@
<script src="/mixed/js/core.js"></script>
<script src="/mixed/js/dom.js"></script>
<script src="/mixed/js/api.js"></script>
+ <script src="/mixed/js/japanese.js"></script>
<script src="/bg/js/anki.js"></script>
<script src="/bg/js/anki-note-builder.js"></script>
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
new file mode 100644
index 00000000..61a247b2
--- /dev/null
+++ b/ext/mixed/js/japanese.js
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net>
+ * Author: Alex Yatskov <alex@foosoft.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+const jp = (() => {
+ const HIRAGANA_RANGE = [0x3040, 0x309f];
+ const KATAKANA_RANGE = [0x30a0, 0x30ff];
+ const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
+
+ const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
+ const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
+ const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
+ const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
+ const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
+ const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
+ const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
+ const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
+ const CJK_UNIFIED_IDEOGRAPHS_RANGES = [
+ CJK_UNIFIED_IDEOGRAPHS_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
+ CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
+ ];
+
+ // Japanese character ranges, roughly ordered in order of expected frequency
+ const JAPANESE_RANGES = [
+ HIRAGANA_RANGE,
+ KATAKANA_RANGE,
+
+ ...CJK_UNIFIED_IDEOGRAPHS_RANGES,
+
+ [0xff66, 0xff9f], // Halfwidth katakana
+
+ [0x30fb, 0x30fc], // Katakana punctuation
+ [0xff61, 0xff65], // Kana punctuation
+ [0x3000, 0x303f], // CJK punctuation
+
+ [0xff10, 0xff19], // Fullwidth numbers
+ [0xff21, 0xff3a], // Fullwidth upper case Latin letters
+ [0xff41, 0xff5a], // Fullwidth lower case Latin letters
+
+ [0xff01, 0xff0f], // Fullwidth punctuation 1
+ [0xff1a, 0xff1f], // Fullwidth punctuation 2
+ [0xff3b, 0xff3f], // Fullwidth punctuation 3
+ [0xff5b, 0xff60], // Fullwidth punctuation 4
+ [0xffe0, 0xffee] // Currency markers
+ ];
+
+
+ // Character code testing functions
+
+ function isCodePointKanji(codePoint) {
+ return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES);
+ }
+
+ function isCodePointKana(codePoint) {
+ return isCodePointInRanges(codePoint, KANA_RANGES);
+ }
+
+ function isCodePointJapanese(codePoint) {
+ return isCodePointInRanges(codePoint, JAPANESE_RANGES);
+ }
+
+ function isCodePointInRanges(codePoint, ranges) {
+ for (const [min, max] of ranges) {
+ if (codePoint >= min && codePoint <= max) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+ // String testing functions
+
+ function isStringEntirelyKana(str) {
+ if (str.length === 0) { return false; }
+ for (const c of str) {
+ if (!isCodePointKana(c.codePointAt(0))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ function isStringPartiallyJapanese(str) {
+ if (str.length === 0) { return false; }
+ for (const c of str) {
+ if (isCodePointJapanese(c.codePointAt(0))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+ // Exports
+
+ return {
+ isCodePointKanji,
+ isCodePointKana,
+ isCodePointJapanese,
+ isStringEntirelyKana,
+ isStringPartiallyJapanese
+ };
+})();
diff --git a/test/test-japanese.js b/test/test-japanese.js
index 78f63c0b..32e4d176 100644
--- a/test/test-japanese.js
+++ b/test/test-japanese.js
@@ -22,6 +22,7 @@ const {VM} = require('./yomichan-vm');
const vm = new VM();
vm.execute([
'mixed/lib/wanakana.min.js',
+ 'mixed/js/japanese.js',
'bg/js/japanese.js'
]);
const jp = vm.get('jp');