summaryrefslogtreecommitdiff
path: root/ext/js/language
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-09-26 13:29:55 -0400
committerGitHub <noreply@github.com>2021-09-26 13:29:55 -0400
commitd739ccd63f0554f0f880e7463355dd5c4ff166e4 (patch)
tree41f06a949495e98adfac825419b82f4ab34a56d3 /ext/js/language
parent25fe3ba5149cda36e173ee7427324fa74e6784be (diff)
Fix japanese codepoint range issues (#1960)
* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE * Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES * Simplify isKana check * Update tests
Diffstat (limited to 'ext/js/language')
-rw-r--r--ext/js/language/sandbox/japanese-util.js11
1 files changed, 6 insertions, 5 deletions
diff --git a/ext/js/language/sandbox/japanese-util.js b/ext/js/language/sandbox/japanese-util.js
index 9257e1e5..9b58d255 100644
--- a/ext/js/language/sandbox/japanese-util.js
+++ b/ext/js/language/sandbox/japanese-util.js
@@ -16,7 +16,6 @@
*/
const JapaneseUtil = (() => {
- const ITERATION_MARK_CODE_POINT = 0x3005;
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
@@ -38,8 +37,9 @@ const JapaneseUtil = (() => {
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
+ const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];
const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
- const CJK_UNIFIED_IDEOGRAPHS_RANGES = [
+ const CJK_IDEOGRAPH_RANGES = [
CJK_UNIFIED_IDEOGRAPHS_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
@@ -47,6 +47,7 @@ const JapaneseUtil = (() => {
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
+ CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
];
@@ -55,7 +56,7 @@ const JapaneseUtil = (() => {
HIRAGANA_RANGE,
KATAKANA_RANGE,
- ...CJK_UNIFIED_IDEOGRAPHS_RANGES,
+ ...CJK_IDEOGRAPH_RANGES,
[0xff66, 0xff9f], // Halfwidth katakana
@@ -204,7 +205,7 @@ const JapaneseUtil = (() => {
// Character code testing functions
isCodePointKanji(codePoint) {
- return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES);
+ return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);
}
isCodePointKana(codePoint) {
@@ -450,7 +451,7 @@ const JapaneseUtil = (() => {
let isKanaPre = null;
for (const c of term) {
const codePoint = c.codePointAt(0);
- const isKana = !(this.isCodePointKanji(codePoint) || codePoint === ITERATION_MARK_CODE_POINT);
+ const isKana = this.isCodePointKana(codePoint);
if (isKana === isKanaPre) {
groupPre.text += c;
} else {