diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-03-22 15:27:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-22 14:27:35 +0000 |
commit | d2e9841f96ebff61d4a5c26a322484f6268115f1 (patch) | |
tree | 3c1351fa3b25994eab8456ea8457ee153bb98aa9 /ext/js/language/language-descriptors.js | |
parent | 7681131782d958997663b1fb443a3e32e8eef550 (diff) |
expand deinflection format (#745)
* abstract deinflections
* undo redundant changes
* remove cast
* switch to js
* MultiLanguageTransformer
* comments
* comments
* fix test
* suffixInflection
* fix bench
* substring instead of replace
* without heuristic
* suffixMap
* add other language deinflections
* wip
* catch cycles
* fix tests
* uninflect to deinflect
* use less regex
* add suru masu stem deinflection
Diffstat (limited to 'ext/js/language/language-descriptors.js')
-rw-r--r-- | ext/js/language/language-descriptors.js | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 6675f78e..b947649d 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -17,10 +17,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; import {eszettPreprocessor} from './de/german-text-preprocessors.js'; +import {englishTransforms} from './en/english-transforms.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; +import {albanianTransforms} from './sq/albanian-transforms.js'; import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; const capitalizationPreprocessors = { @@ -58,7 +61,7 @@ const languageDescriptors = [ name: 'English', exampleText: 'read', textPreprocessors: capitalizationPreprocessors, - languageTransformsFile: '/js/language/en/english-transforms.json' + languageTransforms: englishTransforms }, { iso: 'es', @@ -109,6 +112,7 @@ const languageDescriptors = [ name: 'Latin', exampleText: 'legere', textPreprocessors: { + ...capitalizationPreprocessors, removeLatinDiacritics } }, @@ -124,7 +128,7 @@ const languageDescriptors = [ convertHiraganaToKatakana, collapseEmphaticSequences }, - languageTransformsFile: '/js/language/ja/japanese-transforms.json' + languageTransforms: japaneseTransforms }, { iso: 'km', @@ -170,7 +174,8 @@ const languageDescriptors = [ iso: 'sq', name: 'Albanian', exampleText: 'ndihmojme', - textPreprocessors: capitalizationPreprocessors + textPreprocessors: capitalizationPreprocessors, + languageTransforms: albanianTransforms }, { iso: 'sv', |