diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-09-26 13:29:55 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-09-26 13:29:55 -0400 | 
| commit | d739ccd63f0554f0f880e7463355dd5c4ff166e4 (patch) | |
| tree | 41f06a949495e98adfac825419b82f4ab34a56d3 | |
| parent | 25fe3ba5149cda36e173ee7427324fa74e6784be (diff) | |
Fix japanese codepoint range issues (#1960)
* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE
* Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES
* Simplify isKana check
* Update tests
| -rw-r--r-- | ext/js/language/sandbox/japanese-util.js | 11 | ||||
| -rw-r--r-- | test/test-japanese-util.js | 24 | 
2 files changed, 27 insertions, 8 deletions
| diff --git a/ext/js/language/sandbox/japanese-util.js b/ext/js/language/sandbox/japanese-util.js index 9257e1e5..9b58d255 100644 --- a/ext/js/language/sandbox/japanese-util.js +++ b/ext/js/language/sandbox/japanese-util.js @@ -16,7 +16,6 @@   */  const JapaneseUtil = (() => { -    const ITERATION_MARK_CODE_POINT = 0x3005;      const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;      const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;      const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5; @@ -38,8 +37,9 @@ const JapaneseUtil = (() => {      const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];      const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];      const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; +    const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];      const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; -    const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ +    const CJK_IDEOGRAPH_RANGES = [          CJK_UNIFIED_IDEOGRAPHS_RANGE,          CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,          CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, @@ -47,6 +47,7 @@ const JapaneseUtil = (() => {          CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,          CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,          CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, +        CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,          CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE      ]; @@ -55,7 +56,7 @@ const JapaneseUtil = (() => {          HIRAGANA_RANGE,          KATAKANA_RANGE, -        ...CJK_UNIFIED_IDEOGRAPHS_RANGES, +        ...CJK_IDEOGRAPH_RANGES,          [0xff66, 0xff9f], // Halfwidth katakana @@ -204,7 +205,7 @@ const JapaneseUtil = (() => {          // Character code testing functions          isCodePointKanji(codePoint) { -            return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); +            return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);          }          isCodePointKana(codePoint) { @@ -450,7 +451,7 @@ const JapaneseUtil = (() => {              let isKanaPre = null;              for (const c of term) {                  const codePoint = c.codePointAt(0); -                const isKana = !(this.isCodePointKanji(codePoint) || codePoint === ITERATION_MARK_CODE_POINT); +                const isKana = this.isCodePointKana(codePoint);                  if (isKana === isKanaPre) {                      groupPre.text += c;                  } else { diff --git a/test/test-japanese-util.js b/test/test-japanese-util.js index 48dc9af7..f3b53844 100644 --- a/test/test-japanese-util.js +++ b/test/test-japanese-util.js @@ -33,7 +33,8 @@ function testIsCodePointKanji() {      const data = [          ['力方', true],          ['\u53f1\u{20b9f}', true], -        ['かたカタ々kata、。?,.?', false] +        ['かたカタ々kata、。?,.?', false], +        ['逸逸', true]      ];      for (const [characters, expected] of data) { @@ -65,7 +66,8 @@ function testIsCodePointJapanese() {      const data = [          ['かたカタ力方々、。?', true],          ['\u53f1\u{20b9f}', true], -        ['kata,.?', false] +        ['kata,.?', false], +        ['逸逸', true]      ];      for (const [characters, expected] of data) { @@ -109,7 +111,8 @@ function testIsStringPartiallyJapanese() {          ['kata,.?', false],          ['かたカタ力方々、。?invalid', true],          ['\u53f1\u{20b9f}invalid', true], -        ['kata,.?かた', true] +        ['kata,.?かた', true], +        ['逸逸', true]      ];      for (const [string, expected] of data) { @@ -672,6 +675,21 @@ function testDistributeFurigana() {              [                  {text: 'シック', reading: 'シック・ビルしょうこうぐん'}              ] +        ], +        // Kanji distribution tests +        [ +            ['逸らす', 'そらす'], +            [ +                {text: '逸', reading: 'そ'}, +                {text: 'らす', reading: ''} +            ] +        ], +        [ +            ['逸らす', 'そらす'], +            [ +                {text: '逸', reading: 'そ'}, +                {text: 'らす', reading: ''} +            ]          ]      ]; |