diff options
| -rw-r--r-- | .eslintrc.json | 1 | ||||
| -rw-r--r-- | ext/js/language/language-descriptors.js | 6 | ||||
| -rw-r--r-- | ext/js/language/sh/serbo-croatian-text-preprocessors.js | 31 | ||||
| -rw-r--r-- | types/ext/language-descriptors.d.ts | 4 | 
4 files changed, 40 insertions, 2 deletions
| diff --git a/.eslintrc.json b/.eslintrc.json index 5a81064d..3f6b467b 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -660,6 +660,7 @@                  "ext/js/language/multi-language-transformer.js",                  "ext/js/language/ru/russian-text-preprocessors.js",                  "ext/js/language/sga/old-irish-transforms.js", +                "ext/js/language/sh/serbo-croatian-text-preprocessors.js",                  "ext/js/language/sq/albanian-transforms.js",                  "ext/js/language/text-processors.js",                  "ext/js/language/translator.js", diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index defd73a8..98d3f6c8 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -34,6 +34,7 @@ import {koreanTransforms} from './ko/korean-transforms.js';  import {latinTransforms} from './la/latin-transforms.js';  import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';  import {oldIrishTransforms} from './sga/old-irish-transforms.js'; +import {removeSerboCroatianAccentMarks} from './sh/serbo-croatian-text-preprocessors.js';  import {albanianTransforms} from './sq/albanian-transforms.js';  import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js';  import {isStringPartiallyChinese} from './zh/chinese.js'; @@ -224,7 +225,10 @@ const languageDescriptors = [          iso: 'sh',          name: 'Serbo-Croatian',          exampleText: 'čitaše', -        textPreprocessors: capitalizationPreprocessors, +        textPreprocessors: { +            ...capitalizationPreprocessors, +            removeSerboCroatianAccentMarks, +        },      },      {          iso: 'sq', diff --git a/ext/js/language/sh/serbo-croatian-text-preprocessors.js b/ext/js/language/sh/serbo-croatian-text-preprocessors.js new file mode 100644 index 00000000..7b1b69a1 --- /dev/null +++ b/ext/js/language/sh/serbo-croatian-text-preprocessors.js @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {basicTextProcessorOptions} from '../text-processors.js'; + +/** @type {import('language').TextProcessor<boolean>} */ +export const removeSerboCroatianAccentMarks = { +    name: 'Remove diacritics', +    description: 'A\u0301 → A, a\u0301 → a', +    options: basicTextProcessorOptions, +    process: (str, setting) => ( +        setting ? +            str.normalize('NFD').replace(/[aeiourAEIOUR][\u0300-\u036f]/g, (match) => match[0]) : +            str +    ), + +}; diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 69ccec44..270d753b 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -157,7 +157,9 @@ type AllTextProcessors = {          };      };      sh: { -        pre: CapitalizationPreprocessors; +        pre: CapitalizationPreprocessors & { +            removeSerboCroatianAccentMarks: TextProcessor<boolean>; +        };      };      sq: {          pre: CapitalizationPreprocessors; |