From 0663774b02faeb108d4b18d8f8a7e6e93e277313 Mon Sep 17 00:00:00 2001 From: Matttttt <18152455+martholomew@users.noreply.github.com> Date: Mon, 8 Apr 2024 19:54:04 +0100 Subject: Simplify diacratic removal; modify Latin & Greek preprocessors (#724) * Simplified diacratic removal and added preprocessors to LA and GRC * linted * Clarified the name of removeAlphabeticDiacritics * Add comment to removeAlphabeticDiacritics Signed-off-by: Darius Jahandarie * Change to NFD Signed-off-by: Matttttt <18152455+martholomew@users.noreply.github.com> * Remove trailing spaces in comment Signed-off-by: Darius Jahandarie * Remove latin preprocessors .eslintrc.json Signed-off-by: Matttttt <18152455+martholomew@users.noreply.github.com> * fix tests --------- Signed-off-by: Darius Jahandarie Signed-off-by: Matttttt <18152455+martholomew@users.noreply.github.com> Co-authored-by: martholomew Co-authored-by: Darius Jahandarie Co-authored-by: Stefan Vukovic --- ext/js/language/language-descriptors.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'ext/js/language/language-descriptors.js') diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index b4af2f8a..b5d7573b 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -21,10 +21,9 @@ import {englishTransforms} from './en/english-transforms.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; -import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {albanianTransforms} from './sq/albanian-transforms.js'; -import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; +import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-preprocessors.js'; const capitalizationPreprocessors = { decapitalize, @@ -87,7 +86,10 @@ const languageDescriptors = [ iso: 'grc', name: 'Ancient Greek', exampleText: 'γράφω', - textPreprocessors: capitalizationPreprocessors + textPreprocessors: { + ...capitalizationPreprocessors, + removeAlphabeticDiacritics + } }, { iso: 'hu', @@ -113,7 +115,7 @@ const languageDescriptors = [ exampleText: 'legere', textPreprocessors: { ...capitalizationPreprocessors, - removeLatinDiacritics + removeAlphabeticDiacritics } }, { -- cgit v1.2.3