summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/japanese-util.test.js182
-rw-r--r--test/language/japanese-preprocessors.test.js202
2 files changed, 182 insertions, 202 deletions
diff --git a/test/japanese-util.test.js b/test/japanese-util.test.js
index 22fad2f2..746c0e4d 100644
--- a/test/japanese-util.test.js
+++ b/test/japanese-util.test.js
@@ -863,3 +863,185 @@ describe('Japanese utility functions', () => {
});
});
});
+
+describe('combining dakuten/handakuten normalization', () => {
+ const testCasesDakuten = [
+ ['か\u3099', 'が'],
+ ['き\u3099', 'ぎ'],
+ ['く\u3099', 'ぐ'],
+ ['け\u3099', 'げ'],
+ ['こ\u3099', 'ご'],
+ ['さ\u3099', 'ざ'],
+ ['し\u3099', 'じ'],
+ ['す\u3099', 'ず'],
+ ['せ\u3099', 'ぜ'],
+ ['そ\u3099', 'ぞ'],
+ ['た\u3099', 'だ'],
+ ['ち\u3099', 'ぢ'],
+ ['つ\u3099', 'づ'],
+ ['て\u3099', 'で'],
+ ['と\u3099', 'ど'],
+ ['は\u3099', 'ば'],
+ ['ひ\u3099', 'び'],
+ ['ふ\u3099', 'ぶ'],
+ ['へ\u3099', 'べ'],
+ ['ほ\u3099', 'ぼ'],
+ ['カ\u3099', 'ガ'],
+ ['キ\u3099', 'ギ'],
+ ['ク\u3099', 'グ'],
+ ['ケ\u3099', 'ゲ'],
+ ['コ\u3099', 'ゴ'],
+ ['サ\u3099', 'ザ'],
+ ['シ\u3099', 'ジ'],
+ ['ス\u3099', 'ズ'],
+ ['セ\u3099', 'ゼ'],
+ ['ソ\u3099', 'ゾ'],
+ ['タ\u3099', 'ダ'],
+ ['チ\u3099', 'ヂ'],
+ ['ツ\u3099', 'ヅ'],
+ ['テ\u3099', 'デ'],
+ ['ト\u3099', 'ド'],
+ ['ハ\u3099', 'バ'],
+ ['ヒ\u3099', 'ビ'],
+ ['フ\u3099', 'ブ'],
+ ['ヘ\u3099', 'ベ'],
+ ['ホ\u3099', 'ボ'],
+ ];
+
+ const testCasesHandakuten = [
+ ['は\u309A', 'ぱ'],
+ ['ひ\u309A', 'ぴ'],
+ ['ふ\u309A', 'ぷ'],
+ ['へ\u309A', 'ぺ'],
+ ['ほ\u309A', 'ぽ'],
+ ['ハ\u309A', 'パ'],
+ ['ヒ\u309A', 'ピ'],
+ ['フ\u309A', 'プ'],
+ ['ヘ\u309A', 'ペ'],
+ ['ホ\u309A', 'ポ'],
+ ];
+
+ const testCasesIgnored = [
+ ['な\u3099', 'な\u3099'],
+ ['な\u309A', 'な\u309A'],
+ ['に\u3099', 'に\u3099'],
+ ['に\u309A', 'に\u309A'],
+ ['ぬ\u3099', 'ぬ\u3099'],
+ ['ぬ\u309A', 'ぬ\u309A'],
+ ['ね\u3099', 'ね\u3099'],
+ ['ね\u309A', 'ね\u309A'],
+ ['の\u3099', 'の\u3099'],
+ ['の\u309A', 'の\u309A'],
+ ['ま\u3099', 'ま\u3099'],
+ ['ま\u309A', 'ま\u309A'],
+ ['み\u3099', 'み\u3099'],
+ ['み\u309A', 'み\u309A'],
+ ['む\u3099', 'む\u3099'],
+ ['む\u309A', 'む\u309A'],
+ ['め\u3099', 'め\u3099'],
+ ['め\u309A', 'め\u309A'],
+ ['も\u3099', 'も\u3099'],
+ ['も\u309A', 'も\u309A'],
+ ['ゃ\u3099', 'ゃ\u3099'],
+ ['ゃ\u309A', 'ゃ\u309A'],
+ ['や\u3099', 'や\u3099'],
+ ['や\u309A', 'や\u309A'],
+ ['ゅ\u3099', 'ゅ\u3099'],
+ ['ゅ\u309A', 'ゅ\u309A'],
+ ['ゆ\u3099', 'ゆ\u3099'],
+ ['ゆ\u309A', 'ゆ\u309A'],
+ ['ょ\u3099', 'ょ\u3099'],
+ ['ょ\u309A', 'ょ\u309A'],
+ ['よ\u3099', 'よ\u3099'],
+ ['よ\u309A', 'よ\u309A'],
+ ['ら\u3099', 'ら\u3099'],
+ ['ら\u309A', 'ら\u309A'],
+ ['り\u3099', 'り\u3099'],
+ ['り\u309A', 'り\u309A'],
+ ['る\u3099', 'る\u3099'],
+ ['る\u309A', 'る\u309A'],
+ ['れ\u3099', 'れ\u3099'],
+ ['れ\u309A', 'れ\u309A'],
+ ['ろ\u3099', 'ろ\u3099'],
+ ['ろ\u309A', 'ろ\u309A'],
+ ['ゎ\u3099', 'ゎ\u3099'],
+ ['ゎ\u309A', 'ゎ\u309A'],
+ ['わ\u3099', 'わ\u3099'],
+ ['わ\u309A', 'わ\u309A'],
+ ['ゐ\u3099', 'ゐ\u3099'],
+ ['ゐ\u309A', 'ゐ\u309A'],
+ ['ゑ\u3099', 'ゑ\u3099'],
+ ['ゑ\u309A', 'ゑ\u309A'],
+ ['を\u3099', 'を\u3099'],
+ ['を\u309A', 'を\u309A'],
+ ['ん\u3099', 'ん\u3099'],
+ ['ん\u309A', 'ん\u309A'],
+ ['ナ\u3099', 'ナ\u3099'],
+ ['ナ\u309A', 'ナ\u309A'],
+ ['ニ\u3099', 'ニ\u3099'],
+ ['ニ\u309A', 'ニ\u309A'],
+ ['ヌ\u3099', 'ヌ\u3099'],
+ ['ヌ\u309A', 'ヌ\u309A'],
+ ['ネ\u3099', 'ネ\u3099'],
+ ['ネ\u309A', 'ネ\u309A'],
+ ['ノ\u3099', 'ノ\u3099'],
+ ['ノ\u309A', 'ノ\u309A'],
+ ['マ\u3099', 'マ\u3099'],
+ ['マ\u309A', 'マ\u309A'],
+ ['ミ\u3099', 'ミ\u3099'],
+ ['ミ\u309A', 'ミ\u309A'],
+ ['ム\u3099', 'ム\u3099'],
+ ['ム\u309A', 'ム\u309A'],
+ ['メ\u3099', 'メ\u3099'],
+ ['メ\u309A', 'メ\u309A'],
+ ['モ\u3099', 'モ\u3099'],
+ ['モ\u309A', 'モ\u309A'],
+ ['ャ\u3099', 'ャ\u3099'],
+ ['ャ\u309A', 'ャ\u309A'],
+ ['ヤ\u3099', 'ヤ\u3099'],
+ ['ヤ\u309A', 'ヤ\u309A'],
+ ['ュ\u3099', 'ュ\u3099'],
+ ['ュ\u309A', 'ュ\u309A'],
+ ['ユ\u3099', 'ユ\u3099'],
+ ['ユ\u309A', 'ユ\u309A'],
+ ['ョ\u3099', 'ョ\u3099'],
+ ['ョ\u309A', 'ョ\u309A'],
+ ['ヨ\u3099', 'ヨ\u3099'],
+ ['ヨ\u309A', 'ヨ\u309A'],
+ ['ラ\u3099', 'ラ\u3099'],
+ ['ラ\u309A', 'ラ\u309A'],
+ ['リ\u3099', 'リ\u3099'],
+ ['リ\u309A', 'リ\u309A'],
+ ['ル\u3099', 'ル\u3099'],
+ ['ル\u309A', 'ル\u309A'],
+ ['レ\u3099', 'レ\u3099'],
+ ['レ\u309A', 'レ\u309A'],
+ ['ロ\u3099', 'ロ\u3099'],
+ ['ロ\u309A', 'ロ\u309A'],
+ ['ヮ\u3099', 'ヮ\u3099'],
+ ['ヮ\u309A', 'ヮ\u309A'],
+ ['ワ\u3099', 'ワ\u3099'],
+ ['ワ\u309A', 'ワ\u309A'],
+ ['ヰ\u3099', 'ヰ\u3099'],
+ ['ヰ\u309A', 'ヰ\u309A'],
+ ['ヱ\u3099', 'ヱ\u3099'],
+ ['ヱ\u309A', 'ヱ\u309A'],
+ ['ヲ\u3099', 'ヲ\u3099'],
+ ['ヲ\u309A', 'ヲ\u309A'],
+ ['ン\u3099', 'ン\u3099'],
+ ['ン\u309A', 'ン\u309A'],
+ ];
+
+ const textCasesMisc = [
+ ['', ''],
+ ['\u3099ハ', '\u3099ハ'],
+ ['\u309Aハ', '\u309Aハ'],
+ ['さくらし\u3099また\u3099いこん', 'さくらじまだいこん'],
+ ['いっほ\u309Aん', 'いっぽん'],
+ ];
+
+ const testCases = [...testCasesDakuten, ...testCasesHandakuten, ...testCasesIgnored, ...textCasesMisc];
+ test.each(testCases)('%s normalizes to %s', (input, expected) => {
+ expect(jp.normalizeCombiningCharacters(input)).toStrictEqual(expected);
+ });
+});
diff --git a/test/language/japanese-preprocessors.test.js b/test/language/japanese-preprocessors.test.js
deleted file mode 100644
index 90313abd..00000000
--- a/test/language/japanese-preprocessors.test.js
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (C) 2024 Yomitan Authors
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-import {describe, expect, test} from 'vitest';
-import {normalizeCombiningCharacters} from '../../ext/js/language/ja/japanese-text-preprocessors.js';
-
-const testCasesDakuten = [
- ['か\u3099', 'が'],
- ['き\u3099', 'ぎ'],
- ['く\u3099', 'ぐ'],
- ['け\u3099', 'げ'],
- ['こ\u3099', 'ご'],
- ['さ\u3099', 'ざ'],
- ['し\u3099', 'じ'],
- ['す\u3099', 'ず'],
- ['せ\u3099', 'ぜ'],
- ['そ\u3099', 'ぞ'],
- ['た\u3099', 'だ'],
- ['ち\u3099', 'ぢ'],
- ['つ\u3099', 'づ'],
- ['て\u3099', 'で'],
- ['と\u3099', 'ど'],
- ['は\u3099', 'ば'],
- ['ひ\u3099', 'び'],
- ['ふ\u3099', 'ぶ'],
- ['へ\u3099', 'べ'],
- ['ほ\u3099', 'ぼ'],
- ['カ\u3099', 'ガ'],
- ['キ\u3099', 'ギ'],
- ['ク\u3099', 'グ'],
- ['ケ\u3099', 'ゲ'],
- ['コ\u3099', 'ゴ'],
- ['サ\u3099', 'ザ'],
- ['シ\u3099', 'ジ'],
- ['ス\u3099', 'ズ'],
- ['セ\u3099', 'ゼ'],
- ['ソ\u3099', 'ゾ'],
- ['タ\u3099', 'ダ'],
- ['チ\u3099', 'ヂ'],
- ['ツ\u3099', 'ヅ'],
- ['テ\u3099', 'デ'],
- ['ト\u3099', 'ド'],
- ['ハ\u3099', 'バ'],
- ['ヒ\u3099', 'ビ'],
- ['フ\u3099', 'ブ'],
- ['ヘ\u3099', 'ベ'],
- ['ホ\u3099', 'ボ'],
-];
-
-const testCasesHandakuten = [
- ['は\u309A', 'ぱ'],
- ['ひ\u309A', 'ぴ'],
- ['ふ\u309A', 'ぷ'],
- ['へ\u309A', 'ぺ'],
- ['ほ\u309A', 'ぽ'],
- ['ハ\u309A', 'パ'],
- ['ヒ\u309A', 'ピ'],
- ['フ\u309A', 'プ'],
- ['ヘ\u309A', 'ペ'],
- ['ホ\u309A', 'ポ'],
-];
-
-const testCasesIgnored = [
- ['な\u3099', 'な\u3099'],
- ['な\u309A', 'な\u309A'],
- ['に\u3099', 'に\u3099'],
- ['に\u309A', 'に\u309A'],
- ['ぬ\u3099', 'ぬ\u3099'],
- ['ぬ\u309A', 'ぬ\u309A'],
- ['ね\u3099', 'ね\u3099'],
- ['ね\u309A', 'ね\u309A'],
- ['の\u3099', 'の\u3099'],
- ['の\u309A', 'の\u309A'],
- ['ま\u3099', 'ま\u3099'],
- ['ま\u309A', 'ま\u309A'],
- ['み\u3099', 'み\u3099'],
- ['み\u309A', 'み\u309A'],
- ['む\u3099', 'む\u3099'],
- ['む\u309A', 'む\u309A'],
- ['め\u3099', 'め\u3099'],
- ['め\u309A', 'め\u309A'],
- ['も\u3099', 'も\u3099'],
- ['も\u309A', 'も\u309A'],
- ['ゃ\u3099', 'ゃ\u3099'],
- ['ゃ\u309A', 'ゃ\u309A'],
- ['や\u3099', 'や\u3099'],
- ['や\u309A', 'や\u309A'],
- ['ゅ\u3099', 'ゅ\u3099'],
- ['ゅ\u309A', 'ゅ\u309A'],
- ['ゆ\u3099', 'ゆ\u3099'],
- ['ゆ\u309A', 'ゆ\u309A'],
- ['ょ\u3099', 'ょ\u3099'],
- ['ょ\u309A', 'ょ\u309A'],
- ['よ\u3099', 'よ\u3099'],
- ['よ\u309A', 'よ\u309A'],
- ['ら\u3099', 'ら\u3099'],
- ['ら\u309A', 'ら\u309A'],
- ['り\u3099', 'り\u3099'],
- ['り\u309A', 'り\u309A'],
- ['る\u3099', 'る\u3099'],
- ['る\u309A', 'る\u309A'],
- ['れ\u3099', 'れ\u3099'],
- ['れ\u309A', 'れ\u309A'],
- ['ろ\u3099', 'ろ\u3099'],
- ['ろ\u309A', 'ろ\u309A'],
- ['ゎ\u3099', 'ゎ\u3099'],
- ['ゎ\u309A', 'ゎ\u309A'],
- ['わ\u3099', 'わ\u3099'],
- ['わ\u309A', 'わ\u309A'],
- ['ゐ\u3099', 'ゐ\u3099'],
- ['ゐ\u309A', 'ゐ\u309A'],
- ['ゑ\u3099', 'ゑ\u3099'],
- ['ゑ\u309A', 'ゑ\u309A'],
- ['を\u3099', 'を\u3099'],
- ['を\u309A', 'を\u309A'],
- ['ん\u3099', 'ん\u3099'],
- ['ん\u309A', 'ん\u309A'],
- ['ナ\u3099', 'ナ\u3099'],
- ['ナ\u309A', 'ナ\u309A'],
- ['ニ\u3099', 'ニ\u3099'],
- ['ニ\u309A', 'ニ\u309A'],
- ['ヌ\u3099', 'ヌ\u3099'],
- ['ヌ\u309A', 'ヌ\u309A'],
- ['ネ\u3099', 'ネ\u3099'],
- ['ネ\u309A', 'ネ\u309A'],
- ['ノ\u3099', 'ノ\u3099'],
- ['ノ\u309A', 'ノ\u309A'],
- ['マ\u3099', 'マ\u3099'],
- ['マ\u309A', 'マ\u309A'],
- ['ミ\u3099', 'ミ\u3099'],
- ['ミ\u309A', 'ミ\u309A'],
- ['ム\u3099', 'ム\u3099'],
- ['ム\u309A', 'ム\u309A'],
- ['メ\u3099', 'メ\u3099'],
- ['メ\u309A', 'メ\u309A'],
- ['モ\u3099', 'モ\u3099'],
- ['モ\u309A', 'モ\u309A'],
- ['ャ\u3099', 'ャ\u3099'],
- ['ャ\u309A', 'ャ\u309A'],
- ['ヤ\u3099', 'ヤ\u3099'],
- ['ヤ\u309A', 'ヤ\u309A'],
- ['ュ\u3099', 'ュ\u3099'],
- ['ュ\u309A', 'ュ\u309A'],
- ['ユ\u3099', 'ユ\u3099'],
- ['ユ\u309A', 'ユ\u309A'],
- ['ョ\u3099', 'ョ\u3099'],
- ['ョ\u309A', 'ョ\u309A'],
- ['ヨ\u3099', 'ヨ\u3099'],
- ['ヨ\u309A', 'ヨ\u309A'],
- ['ラ\u3099', 'ラ\u3099'],
- ['ラ\u309A', 'ラ\u309A'],
- ['リ\u3099', 'リ\u3099'],
- ['リ\u309A', 'リ\u309A'],
- ['ル\u3099', 'ル\u3099'],
- ['ル\u309A', 'ル\u309A'],
- ['レ\u3099', 'レ\u3099'],
- ['レ\u309A', 'レ\u309A'],
- ['ロ\u3099', 'ロ\u3099'],
- ['ロ\u309A', 'ロ\u309A'],
- ['ヮ\u3099', 'ヮ\u3099'],
- ['ヮ\u309A', 'ヮ\u309A'],
- ['ワ\u3099', 'ワ\u3099'],
- ['ワ\u309A', 'ワ\u309A'],
- ['ヰ\u3099', 'ヰ\u3099'],
- ['ヰ\u309A', 'ヰ\u309A'],
- ['ヱ\u3099', 'ヱ\u3099'],
- ['ヱ\u309A', 'ヱ\u309A'],
- ['ヲ\u3099', 'ヲ\u3099'],
- ['ヲ\u309A', 'ヲ\u309A'],
- ['ン\u3099', 'ン\u3099'],
- ['ン\u309A', 'ン\u309A'],
-];
-
-const textCasesMisc = [
- ['', ''],
- ['\u3099ハ', '\u3099ハ'],
- ['\u309Aハ', '\u309Aハ'],
- ['さくらし\u3099また\u3099いこん', 'さくらじまだいこん'],
- ['いっほ\u309Aん', 'いっぽん'],
-];
-
-describe('combining dakuten/handakuten normalization', () => {
- const {process} = normalizeCombiningCharacters;
- const testCases = [...testCasesDakuten, ...testCasesHandakuten, ...testCasesIgnored, ...textCasesMisc];
- test.each(testCases)('%s normalizes to %s', (input, expected) => {
- expect(process(input, true)).toStrictEqual(expected);
- });
-});