summaryrefslogtreecommitdiff
path: root/ext/js/language
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-05-22 20:52:43 +0200
committerGitHub <noreply@github.com>2024-05-22 18:52:43 +0000
commit125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (patch)
tree1deb93d435442964a93af7442d147c89be8bc237 /ext/js/language
parent590b35fb19e031e37dee340d09e0802fcd3a97a1 (diff)
improve preprocessing performance (#973)
* mvp * cleanup * cleanup types * descriptive variable * type
Diffstat (limited to 'ext/js/language')
-rw-r--r--ext/js/language/translator.js146
1 files changed, 54 insertions, 92 deletions
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index 350cbc57..51e1823b 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -50,18 +50,7 @@ export class Translator {
prepare() {
this._multiLanguageTransformer.prepare();
for (const {iso, textPreprocessors = [], textPostprocessors = []} of getAllLanguageTextProcessors()) {
- /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */
- const preprocessorOptionsSpace = new Map();
- /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */
- const postprocessorOptionsSpace = new Map();
-
- for (const {id, textProcessor} of textPreprocessors) {
- preprocessorOptionsSpace.set(id, textProcessor.options);
- }
- for (const {id, textProcessor} of textPostprocessors) {
- postprocessorOptionsSpace.set(id, textProcessor.options);
- }
- this._textProcessors.set(iso, {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace});
+ this._textProcessors.set(iso, {textPreprocessors, textPostprocessors});
}
}
@@ -445,18 +434,12 @@ export class Translator {
*/
_getAlgorithmDeinflections(text, options) {
const {language} = options;
- const info = this._textProcessors.get(language);
- if (typeof info === 'undefined') { throw new Error(`Unsupported language: ${language}`); }
- const {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace} = info;
-
- const preprocessorVariantSpace = new Map(preprocessorOptionsSpace);
- preprocessorVariantSpace.set('textReplacements', this._getTextReplacementsVariants(options));
- const preprocessorVariants = this._getArrayVariants(preprocessorVariantSpace);
- const postprocessorVariants = this._getArrayVariants(postprocessorOptionsSpace);
+ const processorsForLanguage = this._textProcessors.get(language);
+ if (typeof processorsForLanguage === 'undefined') { throw new Error(`Unsupported language: ${language}`); }
+ const {textPreprocessors, textPostprocessors} = processorsForLanguage;
/** @type {import('translation-internal').DatabaseDeinflection[]} */
const deinflections = [];
- const used = new Set();
/** @type {import('translation-internal').TextCache} */
const sourceCache = new Map(); // For reusing text processors' outputs
@@ -465,24 +448,13 @@ export class Translator {
rawSource.length > 0;
rawSource = this._getNextSubstring(options.searchResolution, rawSource)
) {
- for (const preprocessorVariant of preprocessorVariants) {
- let source = rawSource;
+ const preprocessedTextVariants = this._getTextVariants(rawSource, textPreprocessors, this._getTextReplacementsVariants(options), sourceCache);
- const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (preprocessorVariant.get('textReplacements'));
- if (textReplacements !== null) {
- source = this._applyTextReplacements(source, textReplacements);
- }
-
- source = this._applyTextProcessors(textPreprocessors, preprocessorVariant, source, sourceCache);
-
- if (used.has(source)) { continue; }
- used.add(source);
+ for (const source of preprocessedTextVariants) {
for (const deinflection of this._multiLanguageTransformer.transform(language, source)) {
const {trace, conditions} = deinflection;
- for (const postprocessorVariant of postprocessorVariants) {
- let {text: transformedText} = deinflection;
- transformedText = this._applyTextProcessors(textPostprocessors, postprocessorVariant, transformedText, sourceCache);
-
+ const postprocessedTextVariants = this._getTextVariants(deinflection.text, textPostprocessors, [null], sourceCache);
+ for (const transformedText of postprocessedTextVariants) {
/** @type {import('dictionary').InflectionRuleChainCandidate} */
const inflectionRuleChainCandidate = {
source: 'algorithm',
@@ -497,36 +469,59 @@ export class Translator {
}
/**
- * @param {import('language').TextProcessorWithId<unknown>[]} textProcessors
- * @param {import('translation-internal').TextProcessorVariant} processorVariant
* @param {string} text
+ * @param {import('language').TextProcessorWithId<unknown>[]} textProcessors
+ * @param {(import('translation').FindTermsTextReplacement[] | null)[]} textReplacements
* @param {import('translation-internal').TextCache} textCache
- * @returns {string}
- */
- _applyTextProcessors(textProcessors, processorVariant, text, textCache) {
- for (const {id, textProcessor: {process}} of textProcessors) {
- const setting = processorVariant.get(id);
-
- let level1 = textCache.get(text);
- if (!level1) {
- level1 = new Map();
- textCache.set(text, level1);
+ * @returns {Set<string>}
+ */
+ _getTextVariants(text, textProcessors, textReplacements, textCache) {
+ let variants = new Set([text]);
+ for (const textReplacement of textReplacements) {
+ if (textReplacement === null) { continue; }
+ variants.add(this._applyTextReplacements(text, textReplacement));
+ }
+ for (const {id, textProcessor: {process, options}} of textProcessors) {
+ /** @type {Set<string>} */
+ const newVariants = new Set();
+ for (const variant of variants) {
+ for (const option of options) {
+ const processed = this._getProcessedText(textCache, variant, id, option, process);
+ newVariants.add(processed);
+ }
}
+ variants = newVariants;
+ }
+ return variants;
+ }
- let level2 = level1.get(id);
- if (!level2) {
- level2 = new Map();
- level1.set(id, level2);
- }
+ /**
+ * @param {import('translation-internal').TextCache} textCache
+ * @param {string} text
+ * @param {string} id
+ * @param {unknown} setting
+ * @param {import('language').TextProcessorFunction} process
+ * @returns {string}
+ */
+ _getProcessedText(textCache, text, id, setting, process) {
+ let level1 = textCache.get(text);
+ if (!level1) {
+ level1 = new Map();
+ textCache.set(text, level1);
+ }
- if (!level2.has(setting)) {
- text = process(text, setting);
- level2.set(setting, text);
- } else {
- text = level2.get(setting) || '';
- }
+ let level2 = level1.get(id);
+ if (!level2) {
+ level2 = new Map();
+ level1.set(id, level2);
}
+ if (!level2.has(setting)) {
+ text = process(text, setting);
+ level2.set(setting, text);
+ } else {
+ text = level2.get(setting) || '';
+ }
return text;
}
@@ -1361,39 +1356,6 @@ export class Translator {
}
/**
- * @param {Map<string, unknown[]>} arrayVariants
- * @returns {import('translation-internal').TextProcessorVariant[]}
- */
- _getArrayVariants(arrayVariants) {
- /** @type {import('translation-internal').TextProcessorVariant[]} */
- const results = [];
- const variantKeys = [...arrayVariants.keys()];
- const entryVariantLengths = [];
- for (const key of variantKeys) {
- const entryVariants = /** @type {unknown[]} */ (arrayVariants.get(key));
- entryVariantLengths.push(entryVariants.length);
- }
- const totalVariants = entryVariantLengths.reduce((acc, length) => acc * length, 1);
-
- for (let variantIndex = 0; variantIndex < totalVariants; ++variantIndex) {
- /** @type {import('translation-internal').TextProcessorVariant}} */
- const variant = new Map();
- let remainingIndex = variantIndex;
-
- for (let keyIndex = 0; keyIndex < variantKeys.length; ++keyIndex) {
- const key = variantKeys[keyIndex];
- const entryVariants = /** @type {unknown[]} */ (arrayVariants.get(key));
- const entryIndex = remainingIndex % entryVariants.length;
- variant.set(key, entryVariants[entryIndex]);
- remainingIndex = Math.floor(remainingIndex / entryVariants.length);
- }
-
- results.push(variant);
- }
- return results;
- }
-
- /**
* @param {unknown[]} array
* @returns {string}
*/