summaryrefslogtreecommitdiff
path: root/ext/js/language/japanese-wanakana.js
blob: b48ab6d6a0edd122557fff35027ab1cb75aaee81 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
 * Copyright (C) 2024  Yomitan Authors
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

import * as wanakana from '../../lib/wanakana.js';

/**
 * @param {string} text
 * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
 * @param {number} sourceMapStart
 * @returns {string}
 */
function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
    const result = wanakana.toHiragana(text);

    // Generate source mapping
    if (sourceMap !== null) {
        let i = 0;
        let resultPos = 0;
        const ii = text.length;
        while (i < ii) {
            // Find smallest matching substring
            let iNext = i + 1;
            let resultPosNext = result.length;
            while (iNext < ii) {
                const t = wanakana.toHiragana(text.substring(0, iNext));
                if (t === result.substring(0, t.length)) {
                    resultPosNext = t.length;
                    break;
                }
                ++iNext;
            }

            // Merge characters
            const removals = iNext - i - 1;
            if (removals > 0) {
                sourceMap.combine(sourceMapStart, removals);
            }
            ++sourceMapStart;

            // Empty elements
            const additions = resultPosNext - resultPos - 1;
            for (let j = 0; j < additions; ++j) {
                sourceMap.insert(sourceMapStart, 0);
                ++sourceMapStart;
            }

            i = iNext;
            resultPos = resultPosNext;
        }
    }

    return result;
}

/**
 * @param {string} text
 * @returns {string}
 */
export function convertToKana(text) {
    return wanakana.toKana(text);
}

/**
 * @param {string} text
 * @returns {string}
 */
export function convertToRomaji(text) {
    return wanakana.toRomaji(text);
}

/**
 * @param {string} text
 * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
 * @returns {string}
 */
export function convertAlphabeticToKana(text, sourceMap = null) {
    let part = '';
    let result = '';

    for (const char of text) {
        // Note: 0x61 is the character code for 'a'
        let c = /** @type {number} */ (char.codePointAt(0));
        if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
            c += (0x61 - 0x41);
        } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
            // NOP; c += (0x61 - 0x61);
        } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
            c += (0x61 - 0xff21);
        } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
            c += (0x61 - 0xff41);
        } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
            c = 0x2d; // '-'
        } else {
            if (part.length > 0) {
                result += convertAlphabeticPartToKana(part, sourceMap, result.length);
                part = '';
            }
            result += char;
            continue;
        }
        part += String.fromCodePoint(c);
    }

    if (part.length > 0) {
        result += convertAlphabeticPartToKana(part, sourceMap, result.length);
    }
    return result;
}