From d2e9841f96ebff61d4a5c26a322484f6268115f1 Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Fri, 22 Mar 2024 15:27:35 +0100 Subject: expand deinflection format (#745) * abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * add suru masu stem deinflection --- types/ext/language-descriptors.d.ts | 5 +++-- types/ext/language-transformer-internal.d.ts | 8 +++++--- types/ext/language-transformer.d.ts | 14 ++++++++++++-- types/ext/language.d.ts | 7 ++++++- types/ext/offscreen.d.ts | 5 +---- 5 files changed, 27 insertions(+), 12 deletions(-) (limited to 'types/ext') diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 79668da4..c85b5fc0 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -16,6 +16,7 @@ */ import type {TextPreprocessor, BidirectionalConversionPreprocessor} from './language'; +import type {LanguageTransformDescriptor} from './language-transformer'; import type {SafeAny} from './core'; export type IsTextLookupWorthyFunction = (text: string) => boolean; @@ -32,7 +33,7 @@ type LanguageDescriptor; }; ja: { diff --git a/types/ext/language-transformer-internal.d.ts b/types/ext/language-transformer-internal.d.ts index fdabc0fb..9ae412d3 100644 --- a/types/ext/language-transformer-internal.d.ts +++ b/types/ext/language-transformer-internal.d.ts @@ -18,12 +18,13 @@ export type Transform = { name: string; rules: Rule[]; - suffixHeuristic: RegExp; + heuristic: RegExp; }; export type Rule = { - suffixIn: string; - suffixOut: string; + type: 'suffix' | 'prefix' | 'other'; + isInflected: RegExp; + deinflect: (inflectedWord: string) => string; conditionsIn: number; conditionsOut: number; }; @@ -37,6 +38,7 @@ export type TransformedText = { export type Trace = TraceFrame[]; export type TraceFrame = { + text: string; transform: string; ruleIndex: number; }; diff --git a/types/ext/language-transformer.d.ts b/types/ext/language-transformer.d.ts index 88dce0b5..95da602d 100644 --- a/types/ext/language-transformer.d.ts +++ b/types/ext/language-transformer.d.ts @@ -55,8 +55,18 @@ export type TransformI18n = { }; export type Rule = { - suffixIn: string; - suffixOut: string; + type: 'suffix' | 'prefix' | 'other'; + isInflected: RegExp; + deinflect: (inflectedWord: string) => string; + conditionsIn: string[]; + conditionsOut: string[]; +}; + +export type SuffixRule = { + type: 'suffix'; + isInflected: RegExp; + deinflected: string; + deinflect: (inflectedWord: string) => string; conditionsIn: string[]; conditionsOut: string[]; }; diff --git a/types/ext/language.d.ts b/types/ext/language.d.ts index a50d3d4b..8fa6f0e7 100644 --- a/types/ext/language.d.ts +++ b/types/ext/language.d.ts @@ -16,6 +16,7 @@ */ import type {TextSourceMap} from '../../ext/js/general/text-source-map.js'; +import type {LanguageTransformDescriptor} from './language-transformer.js'; export type TextPreprocessorOptions = T[]; @@ -42,6 +43,11 @@ export type LanguageAndPreprocessors = { textPreprocessors: TextPreprocessorWithId[]; }; +export type LanguageAndTransforms = { + iso: string; + languageTransforms: LanguageTransformDescriptor; +}; + export type TextPreprocessorWithId = { id: string; textPreprocessor: TextPreprocessor; @@ -51,5 +57,4 @@ export type LanguageSummary = { name: string; iso: string; exampleText: string; - languageTransformsFile?: string; }; diff --git a/types/ext/offscreen.d.ts b/types/ext/offscreen.d.ts index 777756b9..2f180e21 100644 --- a/types/ext/offscreen.d.ts +++ b/types/ext/offscreen.d.ts @@ -29,7 +29,6 @@ import type { ApiReturn as BaseApiReturn, ApiNames as BaseApiNames, } from './api-map'; -import type {LanguageTransformDescriptor} from './language-transformer'; type ApiSurface = { databasePrepareOffscreen: { @@ -51,9 +50,7 @@ type ApiSurface = { return: DictionaryDatabase.Media[]; }; translatorPrepareOffscreen: { - params: { - descriptors: LanguageTransformDescriptor[]; - }; + params: void; return: void; }; findKanjiOffscreen: { -- cgit v1.2.3