diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-05-22 22:45:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-22 20:45:39 +0000 |
commit | d19b898792bffed8ab2d5724472e5b65a5f5b146 (patch) | |
tree | b3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f /ext/js/language/ja/japanese.js | |
parent | 125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff) |
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters
* try combining with numeric to improve performance
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* fix tests
---------
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Diffstat (limited to 'ext/js/language/ja/japanese.js')
-rw-r--r-- | ext/js/language/ja/japanese.js | 32 |
1 files changed, 28 insertions, 4 deletions
diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js index 3507e5df..2200e077 100644 --- a/ext/js/language/ja/japanese.js +++ b/ext/js/language/ja/japanese.js @@ -15,6 +15,7 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5; @@ -523,16 +524,39 @@ export function convertHiraganaToKatakana(text) { * @param {string} text * @returns {string} */ -export function convertNumericToFullWidth(text) { +export function convertAlphanumericToFullWidth(text) { let result = ''; for (const char of text) { let c = /** @type {number} */ (char.codePointAt(0)); if (c >= 0x30 && c <= 0x39) { // ['0', '9'] c += 0xff10 - 0x30; // 0xff10 = '0' full width - result += String.fromCodePoint(c); - } else { - result += char; + } else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] + c += 0xff21 - 0x41; // 0xff21 = 'A' full width + } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] + c += 0xff41 - 0x61; // 0xff41 = 'a' full width + } + result += String.fromCodePoint(c); + } + return result; +} + +/** + * @param {string} text + * @returns {string} + */ +export function convertFullWidthAlphanumericToNormal(text) { + let result = ''; + const length = text.length; + for (let i = 0; i < length; i++) { + let c = /** @type {number} */ (text[i].codePointAt(0)); + if (c >= 0xff10 && c <= 0xff19) { // ['0', '9'] + c -= 0xff10 - 0x30; // 0x30 = '0' + } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] + c -= 0xff21 - 0x41; // 0x41 = 'A' + } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] + c -= 0xff41 - 0x61; // 0x61 = 'a' } + result += String.fromCodePoint(c); } return result; } |