aboutsummaryrefslogtreecommitdiff
path: root/test/language/viet-text-preprocessors.test.js
diff options
context:
space:
mode:
authorCashew <52880648+cashewnuttynuts@users.noreply.github.com>2024-06-22 03:24:21 +0700
committerGitHub <noreply@github.com>2024-06-21 20:24:21 +0000
commitd724b403f94b7fd1ecec3f6d2e4f5a1ed805c6ec (patch)
tree1f696742b75b4f8377fe3b952863af141ff6494e /test/language/viet-text-preprocessors.test.js
parent1c609d972ae76f8779190d7a3621f77a664a6dec (diff)
Add diacritics normalization preprocessors for Vietnamese (#1107)
* add viet diacritics normalization * move regexp construction outside of function * fix eslint * add 'off' option * fix lint * fix type
Diffstat (limited to 'test/language/viet-text-preprocessors.test.js')
-rw-r--r--test/language/viet-text-preprocessors.test.js60
1 files changed, 60 insertions, 0 deletions
diff --git a/test/language/viet-text-preprocessors.test.js b/test/language/viet-text-preprocessors.test.js
new file mode 100644
index 00000000..56593c63
--- /dev/null
+++ b/test/language/viet-text-preprocessors.test.js
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2023-2024 Yomitan Authors
+ * Copyright (C) 2020-2022 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {normalizeDiacritics} from '../../ext/js/language/vi/viet-text-preprocessors.js';
+import {describe, expect, test} from 'vitest';
+
+const testCasesOldStyle = [
+ ['hoạ', 'họa'],
+ ['choàng', 'choàng'],
+ ['thuỷ', 'thủy'],
+ ['oà', 'òa'],
+ ['toà', 'tòa'],
+ ['toàn', 'toàn'],
+ ['tòan', 'toàn'],
+];
+
+const testCasesNewStyle = [
+ ['ngòăng', 'ngoằng'],
+ ['họa', 'hoạ'],
+ ['chòang', 'choàng'],
+ ['giừơng', 'giường'],
+ ['baỷ', 'bảy'],
+ ['cuả', 'của'],
+ ['òa', 'oà'],
+ ['toàn', 'toàn'],
+];
+
+describe('diacritics normalization', () => {
+ const {options, process} = normalizeDiacritics;
+ for (const option of options) {
+ if (option === 'off') { return; }
+
+ describe(`${option} style`, () => {
+ if (option === 'new') {
+ test.each(testCasesNewStyle)('%s normalizes to %s', (input, expected) => {
+ expect(process(input, option)).toStrictEqual(expected);
+ });
+ } else {
+ test.each(testCasesOldStyle)('%s normalizes to %s', (input, expected) => {
+ expect(process(input, option)).toStrictEqual(expected);
+ });
+ }
+ });
+ }
+});