diff options
author | Matttttt <18152455+martholomew@users.noreply.github.com> | 2024-04-08 19:54:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-08 18:54:04 +0000 |
commit | 0663774b02faeb108d4b18d8f8a7e6e93e277313 (patch) | |
tree | 8b9cddef6a6987e273c58bca79959c20d943dcfb /ext/js/language/language-descriptors.js | |
parent | 2c5af215ee533a18b4da39bad6b696701dd07978 (diff) |
Simplify diacratic removal; modify Latin & Greek preprocessors (#724)
* Simplified diacratic removal and added preprocessors to LA and GRC
* linted
* Clarified the name of removeAlphabeticDiacritics
* Add comment to removeAlphabeticDiacritics
Signed-off-by: Darius Jahandarie <djahandarie@gmail.com>
* Change to NFD
Signed-off-by: Matttttt <18152455+martholomew@users.noreply.github.com>
* Remove trailing spaces in comment
Signed-off-by: Darius Jahandarie <djahandarie@gmail.com>
* Remove latin preprocessors .eslintrc.json
Signed-off-by: Matttttt <18152455+martholomew@users.noreply.github.com>
* fix tests
---------
Signed-off-by: Darius Jahandarie <djahandarie@gmail.com>
Signed-off-by: Matttttt <18152455+martholomew@users.noreply.github.com>
Co-authored-by: martholomew <martholomew@users.noreply.github.com>
Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
Co-authored-by: Stefan Vukovic <stefanvukovic44@gmail.com>
Diffstat (limited to 'ext/js/language/language-descriptors.js')
-rw-r--r-- | ext/js/language/language-descriptors.js | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index b4af2f8a..b5d7573b 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -21,10 +21,9 @@ import {englishTransforms} from './en/english-transforms.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; -import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {albanianTransforms} from './sq/albanian-transforms.js'; -import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; +import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-preprocessors.js'; const capitalizationPreprocessors = { decapitalize, @@ -87,7 +86,10 @@ const languageDescriptors = [ iso: 'grc', name: 'Ancient Greek', exampleText: 'γράφω', - textPreprocessors: capitalizationPreprocessors + textPreprocessors: { + ...capitalizationPreprocessors, + removeAlphabeticDiacritics + } }, { iso: 'hu', @@ -113,7 +115,7 @@ const languageDescriptors = [ exampleText: 'legere', textPreprocessors: { ...capitalizationPreprocessors, - removeLatinDiacritics + removeAlphabeticDiacritics } }, { |