From d724b403f94b7fd1ecec3f6d2e4f5a1ed805c6ec Mon Sep 17 00:00:00 2001 From: Cashew <52880648+cashewnuttynuts@users.noreply.github.com> Date: Sat, 22 Jun 2024 03:24:21 +0700 Subject: Add diacritics normalization preprocessors for Vietnamese (#1107) * add viet diacritics normalization * move regexp construction outside of function * fix eslint * add 'off' option * fix lint * fix type --- test/language/viet-text-preprocessors.test.js | 60 +++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 test/language/viet-text-preprocessors.test.js (limited to 'test') diff --git a/test/language/viet-text-preprocessors.test.js b/test/language/viet-text-preprocessors.test.js new file mode 100644 index 00000000..56593c63 --- /dev/null +++ b/test/language/viet-text-preprocessors.test.js @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2023-2024 Yomitan Authors + * Copyright (C) 2020-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {normalizeDiacritics} from '../../ext/js/language/vi/viet-text-preprocessors.js'; +import {describe, expect, test} from 'vitest'; + +const testCasesOldStyle = [ + ['hoạ', 'họa'], + ['choàng', 'choàng'], + ['thuỷ', 'thủy'], + ['oà', 'òa'], + ['toà', 'tòa'], + ['toàn', 'toàn'], + ['tòan', 'toàn'], +]; + +const testCasesNewStyle = [ + ['ngòăng', 'ngoằng'], + ['họa', 'hoạ'], + ['chòang', 'choàng'], + ['giừơng', 'giường'], + ['baỷ', 'bảy'], + ['cuả', 'của'], + ['òa', 'oà'], + ['toàn', 'toàn'], +]; + +describe('diacritics normalization', () => { + const {options, process} = normalizeDiacritics; + for (const option of options) { + if (option === 'off') { return; } + + describe(`${option} style`, () => { + if (option === 'new') { + test.each(testCasesNewStyle)('%s normalizes to %s', (input, expected) => { + expect(process(input, option)).toStrictEqual(expected); + }); + } else { + test.each(testCasesOldStyle)('%s normalizes to %s', (input, expected) => { + expect(process(input, option)).toStrictEqual(expected); + }); + } + }); + } +}); -- cgit v1.2.3