diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-01-22 21:41:32 -0500 |
---|---|---|
committer | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-01-24 20:15:25 -0500 |
commit | 9b509d50a94110f92ac52db2ff9566d1104e33c6 (patch) | |
tree | 6ea6d8ce607e6c0f1b0982a397dd0f5534ae904a /ext/mixed | |
parent | 3c17388ff85daf477a1cae21103f705bce96bfcd (diff) |
Add character range definitions
Diffstat (limited to 'ext/mixed')
-rw-r--r-- | ext/mixed/js/japanese.js | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index 8dd5651c..956d246a 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -76,6 +76,41 @@ const JP_HALFWIDTH_KATAKANA_MAPPING = new Map([ ['ン', 'ン--'] ]); +const JP_HIRAGANA_RANGE = [0x3040, 0x309f]; +const JP_KATAKANA_RANGE = [0x30a0, 0x30ff]; +const JP_KANA_RANGES = [JP_HIRAGANA_RANGE, JP_KATAKANA_RANGE]; + +const JP_CJK_COMMON_RANGE = [0x4e00, 0x9fff]; +const JP_CJK_RARE_RANGE = [0x3400, 0x4dbf]; +const JP_CJK_RANGES = [JP_CJK_COMMON_RANGE, JP_CJK_RARE_RANGE]; + +const JP_ITERATION_MARK_CHAR_CODE = 0x3005; + +// Japanese character ranges, roughly ordered in order of expected frequency +const JP_JAPANESE_RANGES = [ + JP_HIRAGANA_RANGE, + JP_KATAKANA_RANGE, + + JP_CJK_COMMON_RANGE, + JP_CJK_RARE_RANGE, + + [0xff66, 0xff9f], // Halfwidth katakana + + [0x30fb, 0x30fc], // Katakana punctuation + [0xff61, 0xff65], // Kana punctuation + [0x3000, 0x303f], // CJK punctuation + + [0xff10, 0xff19], // Fullwidth numbers + [0xff21, 0xff3a], // Fullwidth upper case Latin letters + [0xff41, 0xff5a], // Fullwidth lower case Latin letters + + [0xff01, 0xff0f], // Fullwidth punctuation 1 + [0xff1a, 0xff1f], // Fullwidth punctuation 2 + [0xff3b, 0xff3f], // Fullwidth punctuation 3 + [0xff5b, 0xff60], // Fullwidth punctuation 4 + [0xffe0, 0xffee], // Currency markers +]; + function jpIsKanji(c) { const code = c.charCodeAt(0); |