diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-03-22 15:27:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-22 14:27:35 +0000 |
commit | d2e9841f96ebff61d4a5c26a322484f6268115f1 (patch) | |
tree | 3c1351fa3b25994eab8456ea8457ee153bb98aa9 /types | |
parent | 7681131782d958997663b1fb443a3e32e8eef550 (diff) |
expand deinflection format (#745)
* abstract deinflections
* undo redundant changes
* remove cast
* switch to js
* MultiLanguageTransformer
* comments
* comments
* fix test
* suffixInflection
* fix bench
* substring instead of replace
* without heuristic
* suffixMap
* add other language deinflections
* wip
* catch cycles
* fix tests
* uninflect to deinflect
* use less regex
* add suru masu stem deinflection
Diffstat (limited to 'types')
-rw-r--r-- | types/ext/language-descriptors.d.ts | 5 | ||||
-rw-r--r-- | types/ext/language-transformer-internal.d.ts | 8 | ||||
-rw-r--r-- | types/ext/language-transformer.d.ts | 14 | ||||
-rw-r--r-- | types/ext/language.d.ts | 7 | ||||
-rw-r--r-- | types/ext/offscreen.d.ts | 5 |
5 files changed, 27 insertions, 12 deletions
diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 79668da4..c85b5fc0 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -16,6 +16,7 @@ */ import type {TextPreprocessor, BidirectionalConversionPreprocessor} from './language'; +import type {LanguageTransformDescriptor} from './language-transformer'; import type {SafeAny} from './core'; export type IsTextLookupWorthyFunction = (text: string) => boolean; @@ -32,7 +33,7 @@ type LanguageDescriptor<TIso extends string, TTextPreprocessorDescriptor extends */ isTextLookupWorthy?: IsTextLookupWorthyFunction; textPreprocessors: TTextPreprocessorDescriptor; - languageTransformsFile?: string; + languageTransforms?: LanguageTransformDescriptor; }; type TextPreprocessorDescriptor = { @@ -72,7 +73,7 @@ type AllTextPreprocessors = { hu: CapitalizationPreprocessors; id: CapitalizationPreprocessors; it: CapitalizationPreprocessors; - la: { + la: CapitalizationPreprocessors & { removeLatinDiacritics: TextPreprocessor<boolean>; }; ja: { diff --git a/types/ext/language-transformer-internal.d.ts b/types/ext/language-transformer-internal.d.ts index fdabc0fb..9ae412d3 100644 --- a/types/ext/language-transformer-internal.d.ts +++ b/types/ext/language-transformer-internal.d.ts @@ -18,12 +18,13 @@ export type Transform = { name: string; rules: Rule[]; - suffixHeuristic: RegExp; + heuristic: RegExp; }; export type Rule = { - suffixIn: string; - suffixOut: string; + type: 'suffix' | 'prefix' | 'other'; + isInflected: RegExp; + deinflect: (inflectedWord: string) => string; conditionsIn: number; conditionsOut: number; }; @@ -37,6 +38,7 @@ export type TransformedText = { export type Trace = TraceFrame[]; export type TraceFrame = { + text: string; transform: string; ruleIndex: number; }; diff --git a/types/ext/language-transformer.d.ts b/types/ext/language-transformer.d.ts index 88dce0b5..95da602d 100644 --- a/types/ext/language-transformer.d.ts +++ b/types/ext/language-transformer.d.ts @@ -55,8 +55,18 @@ export type TransformI18n = { }; export type Rule = { - suffixIn: string; - suffixOut: string; + type: 'suffix' | 'prefix' | 'other'; + isInflected: RegExp; + deinflect: (inflectedWord: string) => string; + conditionsIn: string[]; + conditionsOut: string[]; +}; + +export type SuffixRule = { + type: 'suffix'; + isInflected: RegExp; + deinflected: string; + deinflect: (inflectedWord: string) => string; conditionsIn: string[]; conditionsOut: string[]; }; diff --git a/types/ext/language.d.ts b/types/ext/language.d.ts index a50d3d4b..8fa6f0e7 100644 --- a/types/ext/language.d.ts +++ b/types/ext/language.d.ts @@ -16,6 +16,7 @@ */ import type {TextSourceMap} from '../../ext/js/general/text-source-map.js'; +import type {LanguageTransformDescriptor} from './language-transformer.js'; export type TextPreprocessorOptions<T = unknown> = T[]; @@ -42,6 +43,11 @@ export type LanguageAndPreprocessors = { textPreprocessors: TextPreprocessorWithId<unknown>[]; }; +export type LanguageAndTransforms = { + iso: string; + languageTransforms: LanguageTransformDescriptor; +}; + export type TextPreprocessorWithId<T = unknown> = { id: string; textPreprocessor: TextPreprocessor<T>; @@ -51,5 +57,4 @@ export type LanguageSummary = { name: string; iso: string; exampleText: string; - languageTransformsFile?: string; }; diff --git a/types/ext/offscreen.d.ts b/types/ext/offscreen.d.ts index 777756b9..2f180e21 100644 --- a/types/ext/offscreen.d.ts +++ b/types/ext/offscreen.d.ts @@ -29,7 +29,6 @@ import type { ApiReturn as BaseApiReturn, ApiNames as BaseApiNames, } from './api-map'; -import type {LanguageTransformDescriptor} from './language-transformer'; type ApiSurface = { databasePrepareOffscreen: { @@ -51,9 +50,7 @@ type ApiSurface = { return: DictionaryDatabase.Media<string>[]; }; translatorPrepareOffscreen: { - params: { - descriptors: LanguageTransformDescriptor[]; - }; + params: void; return: void; }; findKanjiOffscreen: { |