diff options
-rw-r--r-- | ext/bg/background.html | 1 | ||||
-rw-r--r-- | ext/bg/js/japanese.js | 106 | ||||
-rw-r--r-- | ext/bg/search.html | 1 | ||||
-rw-r--r-- | ext/bg/settings.html | 1 | ||||
-rw-r--r-- | ext/mixed/js/japanese.js | 124 | ||||
-rw-r--r-- | test/test-japanese.js | 1 |
6 files changed, 135 insertions, 99 deletions
diff --git a/ext/bg/background.html b/ext/bg/background.html index 44abe8fd..f7cf6e55 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -20,6 +20,7 @@ <script src="/mixed/js/core.js"></script> <script src="/mixed/js/dom.js"></script> + <script src="/mixed/js/japanese.js"></script> <script src="/bg/js/anki.js"></script> <script src="/bg/js/anki-note-builder.js"></script> diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index d2a577e6..c5873cf1 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -17,10 +17,11 @@ */ /* global + * jp * wanakana */ -const jp = (() => { +(() => { const HALFWIDTH_KATAKANA_MAPPING = new Map([ ['ヲ', 'ヲヺ-'], ['ァ', 'ァ--'], @@ -80,101 +81,13 @@ const jp = (() => { ['ン', 'ン--'] ]); - const HIRAGANA_RANGE = [0x3040, 0x309f]; - const KATAKANA_RANGE = [0x30a0, 0x30ff]; - const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; - - const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; - const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; - const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ - CJK_UNIFIED_IDEOGRAPHS_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, - CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE - ]; - const ITERATION_MARK_CODE_POINT = 0x3005; - // Japanese character ranges, roughly ordered in order of expected frequency - const JAPANESE_RANGES = [ - HIRAGANA_RANGE, - KATAKANA_RANGE, - - ...CJK_UNIFIED_IDEOGRAPHS_RANGES, - - [0xff66, 0xff9f], // Halfwidth katakana - - [0x30fb, 0x30fc], // Katakana punctuation - [0xff61, 0xff65], // Kana punctuation - [0x3000, 0x303f], // CJK punctuation - - [0xff10, 0xff19], // Fullwidth numbers - [0xff21, 0xff3a], // Fullwidth upper case Latin letters - [0xff41, 0xff5a], // Fullwidth lower case Latin letters - - [0xff01, 0xff0f], // Fullwidth punctuation 1 - [0xff1a, 0xff1f], // Fullwidth punctuation 2 - [0xff3b, 0xff3f], // Fullwidth punctuation 3 - [0xff5b, 0xff60], // Fullwidth punctuation 4 - [0xffe0, 0xffee] // Currency markers - ]; - - - // Character code testing functions - - function isCodePointKanji(codePoint) { - return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); - } - - function isCodePointKana(codePoint) { - return isCodePointInRanges(codePoint, KANA_RANGES); - } - - function isCodePointJapanese(codePoint) { - return isCodePointInRanges(codePoint, JAPANESE_RANGES); - } - function isCodePointInRanges(codePoint, ranges) { - for (const [min, max] of ranges) { - if (codePoint >= min && codePoint <= max) { - return true; - } - } - return false; - } + // Existing functions - - // String testing functions - - function isStringEntirelyKana(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (!isCodePointKana(c.codePointAt(0))) { - return false; - } - } - return true; - } - - function isStringPartiallyJapanese(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (isCodePointJapanese(c.codePointAt(0))) { - return true; - } - } - return false; - } + const isCodePointKanji = jp.isCodePointKanji; + const isStringEntirelyKana = jp.isStringEntirelyKana; // Conversion functions @@ -469,12 +382,7 @@ const jp = (() => { // Exports - return { - isCodePointKanji, - isCodePointKana, - isCodePointJapanese, - isStringEntirelyKana, - isStringPartiallyJapanese, + Object.assign(jp, { convertKatakanaToHiragana, convertHiraganaToKatakana, convertToRomaji, @@ -484,5 +392,5 @@ const jp = (() => { convertAlphabeticToKana, distributeFurigana, distributeFuriganaInflected - }; + }); })(); diff --git a/ext/bg/search.html b/ext/bg/search.html index f4c1a737..eacc1893 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -74,6 +74,7 @@ <script src="/mixed/js/core.js"></script> <script src="/mixed/js/dom.js"></script> <script src="/mixed/js/api.js"></script> + <script src="/mixed/js/japanese.js"></script> <script src="/bg/js/dictionary.js"></script> <script src="/bg/js/handlebars.js"></script> diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 0db76d71..cfe20be4 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -1088,6 +1088,7 @@ <script src="/mixed/js/core.js"></script> <script src="/mixed/js/dom.js"></script> <script src="/mixed/js/api.js"></script> + <script src="/mixed/js/japanese.js"></script> <script src="/bg/js/anki.js"></script> <script src="/bg/js/anki-note-builder.js"></script> diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js new file mode 100644 index 00000000..61a247b2 --- /dev/null +++ b/ext/mixed/js/japanese.js @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +const jp = (() => { + const HIRAGANA_RANGE = [0x3040, 0x309f]; + const KATAKANA_RANGE = [0x30a0, 0x30ff]; + const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; + + const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; + const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; + const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ + CJK_UNIFIED_IDEOGRAPHS_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE + ]; + + // Japanese character ranges, roughly ordered in order of expected frequency + const JAPANESE_RANGES = [ + HIRAGANA_RANGE, + KATAKANA_RANGE, + + ...CJK_UNIFIED_IDEOGRAPHS_RANGES, + + [0xff66, 0xff9f], // Halfwidth katakana + + [0x30fb, 0x30fc], // Katakana punctuation + [0xff61, 0xff65], // Kana punctuation + [0x3000, 0x303f], // CJK punctuation + + [0xff10, 0xff19], // Fullwidth numbers + [0xff21, 0xff3a], // Fullwidth upper case Latin letters + [0xff41, 0xff5a], // Fullwidth lower case Latin letters + + [0xff01, 0xff0f], // Fullwidth punctuation 1 + [0xff1a, 0xff1f], // Fullwidth punctuation 2 + [0xff3b, 0xff3f], // Fullwidth punctuation 3 + [0xff5b, 0xff60], // Fullwidth punctuation 4 + [0xffe0, 0xffee] // Currency markers + ]; + + + // Character code testing functions + + function isCodePointKanji(codePoint) { + return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); + } + + function isCodePointKana(codePoint) { + return isCodePointInRanges(codePoint, KANA_RANGES); + } + + function isCodePointJapanese(codePoint) { + return isCodePointInRanges(codePoint, JAPANESE_RANGES); + } + + function isCodePointInRanges(codePoint, ranges) { + for (const [min, max] of ranges) { + if (codePoint >= min && codePoint <= max) { + return true; + } + } + return false; + } + + + // String testing functions + + function isStringEntirelyKana(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (!isCodePointKana(c.codePointAt(0))) { + return false; + } + } + return true; + } + + function isStringPartiallyJapanese(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (isCodePointJapanese(c.codePointAt(0))) { + return true; + } + } + return false; + } + + + // Exports + + return { + isCodePointKanji, + isCodePointKana, + isCodePointJapanese, + isStringEntirelyKana, + isStringPartiallyJapanese + }; +})(); diff --git a/test/test-japanese.js b/test/test-japanese.js index 78f63c0b..32e4d176 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -22,6 +22,7 @@ const {VM} = require('./yomichan-vm'); const vm = new VM(); vm.execute([ 'mixed/lib/wanakana.min.js', + 'mixed/js/japanese.js', 'bg/js/japanese.js' ]); const jp = vm.get('jp'); |