aboutsummaryrefslogtreecommitdiff
path: root/ext/js/language/zh/chinese.js
blob: 3072d200f9af56b9253eae8034356aa93a550e3f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/*
 * Copyright (C) 2024  Yomitan Authors
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

import {CJK_IDEOGRAPH_RANGES, CJK_PUNCTUATION_RANGE, FULLWIDTH_CHARACTER_RANGES, isCodePointInRanges} from '../CJK-util.js';

/** @type {import('CJK-util').CodepointRange} */
const BOPOMOFO_RANGE = [0x3100, 0x312f];
/** @type {import('CJK-util').CodepointRange} */
const BOPOMOFO_EXTENDED_RANGE = [0x31a0, 0x31bf];
/** @type {import('CJK-util').CodepointRange} */
const IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_RANGE = [0x16fe0, 0x16fff];
/** @type {import('CJK-util').CodepointRange} */
const SMALL_FORM_RANGE = [0xfe50, 0xfe6f];
/** @type {import('CJK-util').CodepointRange} */
const VERTICAL_FORM_RANGE = [0xfe10, 0xfe1f];


/**
 * Chinese character ranges, roughly ordered in order of expected frequency.
 * @type {import('CJK-util').CodepointRange[]}
 */
const CHINESE_RANGES = [
    ...CJK_IDEOGRAPH_RANGES,
    CJK_PUNCTUATION_RANGE,

    ...FULLWIDTH_CHARACTER_RANGES,

    BOPOMOFO_RANGE,
    BOPOMOFO_EXTENDED_RANGE,
    IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_RANGE,
    SMALL_FORM_RANGE,
    VERTICAL_FORM_RANGE,
];


/**
 * @param {string} str
 * @returns {boolean}
 */
export function isStringPartiallyChinese(str) {
    if (str.length === 0) { return false; }
    for (const c of str) {
        if (isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), CHINESE_RANGES)) {
            return true;
        }
    }
    return false;
}

/** @type {import('language').ReadingNormalizer} */
export function normalizePinyin(str) {
    return str.normalize('NFC').toLowerCase().replace(/[\s・:]/g, '');
}