diff options
| author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-06-03 23:11:34 +0200 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-03 21:11:34 +0000 | 
| commit | 3a2a740b6517d18de726a44b75b34155fe8f1259 (patch) | |
| tree | 9ff8a0a7709abffe4a4cbea1e1f9da0445b3177d /ext/js | |
| parent | 7955fc85ac089d856b44bdea78eccd26ffbd690c (diff) | |
[sh] preprocess accent marks (#1024)
Diffstat (limited to 'ext/js')
| -rw-r--r-- | ext/js/language/language-descriptors.js | 6 | ||||
| -rw-r--r-- | ext/js/language/sh/serbo-croatian-text-preprocessors.js | 31 | 
2 files changed, 36 insertions, 1 deletions
| diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index defd73a8..98d3f6c8 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -34,6 +34,7 @@ import {koreanTransforms} from './ko/korean-transforms.js';  import {latinTransforms} from './la/latin-transforms.js';  import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';  import {oldIrishTransforms} from './sga/old-irish-transforms.js'; +import {removeSerboCroatianAccentMarks} from './sh/serbo-croatian-text-preprocessors.js';  import {albanianTransforms} from './sq/albanian-transforms.js';  import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js';  import {isStringPartiallyChinese} from './zh/chinese.js'; @@ -224,7 +225,10 @@ const languageDescriptors = [          iso: 'sh',          name: 'Serbo-Croatian',          exampleText: 'čitaše', -        textPreprocessors: capitalizationPreprocessors, +        textPreprocessors: { +            ...capitalizationPreprocessors, +            removeSerboCroatianAccentMarks, +        },      },      {          iso: 'sq', diff --git a/ext/js/language/sh/serbo-croatian-text-preprocessors.js b/ext/js/language/sh/serbo-croatian-text-preprocessors.js new file mode 100644 index 00000000..7b1b69a1 --- /dev/null +++ b/ext/js/language/sh/serbo-croatian-text-preprocessors.js @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {basicTextProcessorOptions} from '../text-processors.js'; + +/** @type {import('language').TextProcessor<boolean>} */ +export const removeSerboCroatianAccentMarks = { +    name: 'Remove diacritics', +    description: 'A\u0301 → A, a\u0301 → a', +    options: basicTextProcessorOptions, +    process: (str, setting) => ( +        setting ? +            str.normalize('NFD').replace(/[aeiourAEIOUR][\u0300-\u036f]/g, (match) => match[0]) : +            str +    ), + +}; |