summaryrefslogtreecommitdiff
path: root/ext/bg/js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/bg/js')
-rw-r--r--ext/bg/js/japanese.js106
1 files changed, 7 insertions, 99 deletions
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index d2a577e6..c5873cf1 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -17,10 +17,11 @@
*/
/* global
+ * jp
* wanakana
*/
-const jp = (() => {
+(() => {
const HALFWIDTH_KATAKANA_MAPPING = new Map([
['ヲ', 'ヲヺ-'],
['ァ', 'ァ--'],
@@ -80,101 +81,13 @@ const jp = (() => {
['ン', 'ン--']
]);
- const HIRAGANA_RANGE = [0x3040, 0x309f];
- const KATAKANA_RANGE = [0x30a0, 0x30ff];
- const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
-
- const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
- const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
- const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
- const CJK_UNIFIED_IDEOGRAPHS_RANGES = [
- CJK_UNIFIED_IDEOGRAPHS_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
- CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
- ];
-
const ITERATION_MARK_CODE_POINT = 0x3005;
- // Japanese character ranges, roughly ordered in order of expected frequency
- const JAPANESE_RANGES = [
- HIRAGANA_RANGE,
- KATAKANA_RANGE,
-
- ...CJK_UNIFIED_IDEOGRAPHS_RANGES,
-
- [0xff66, 0xff9f], // Halfwidth katakana
-
- [0x30fb, 0x30fc], // Katakana punctuation
- [0xff61, 0xff65], // Kana punctuation
- [0x3000, 0x303f], // CJK punctuation
-
- [0xff10, 0xff19], // Fullwidth numbers
- [0xff21, 0xff3a], // Fullwidth upper case Latin letters
- [0xff41, 0xff5a], // Fullwidth lower case Latin letters
-
- [0xff01, 0xff0f], // Fullwidth punctuation 1
- [0xff1a, 0xff1f], // Fullwidth punctuation 2
- [0xff3b, 0xff3f], // Fullwidth punctuation 3
- [0xff5b, 0xff60], // Fullwidth punctuation 4
- [0xffe0, 0xffee] // Currency markers
- ];
-
-
- // Character code testing functions
-
- function isCodePointKanji(codePoint) {
- return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES);
- }
-
- function isCodePointKana(codePoint) {
- return isCodePointInRanges(codePoint, KANA_RANGES);
- }
-
- function isCodePointJapanese(codePoint) {
- return isCodePointInRanges(codePoint, JAPANESE_RANGES);
- }
- function isCodePointInRanges(codePoint, ranges) {
- for (const [min, max] of ranges) {
- if (codePoint >= min && codePoint <= max) {
- return true;
- }
- }
- return false;
- }
+ // Existing functions
-
- // String testing functions
-
- function isStringEntirelyKana(str) {
- if (str.length === 0) { return false; }
- for (const c of str) {
- if (!isCodePointKana(c.codePointAt(0))) {
- return false;
- }
- }
- return true;
- }
-
- function isStringPartiallyJapanese(str) {
- if (str.length === 0) { return false; }
- for (const c of str) {
- if (isCodePointJapanese(c.codePointAt(0))) {
- return true;
- }
- }
- return false;
- }
+ const isCodePointKanji = jp.isCodePointKanji;
+ const isStringEntirelyKana = jp.isStringEntirelyKana;
// Conversion functions
@@ -469,12 +382,7 @@ const jp = (() => {
// Exports
- return {
- isCodePointKanji,
- isCodePointKana,
- isCodePointJapanese,
- isStringEntirelyKana,
- isStringPartiallyJapanese,
+ Object.assign(jp, {
convertKatakanaToHiragana,
convertHiraganaToKatakana,
convertToRomaji,
@@ -484,5 +392,5 @@ const jp = (() => {
convertAlphabeticToKana,
distributeFurigana,
distributeFuriganaInflected
- };
+ });
})();