summaryrefslogtreecommitdiff
path: root/ext/js/language/translator.js
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-04-21 17:15:08 +0200
committerGitHub <noreply@github.com>2024-04-21 15:15:08 +0000
commit07258ecc35c1a05aa1581a54c9f47a40ce3d76c9 (patch)
tree0a73bc6c1224710906ef3cded2a19399fc626f12 /ext/js/language/translator.js
parent22904d166d5ea33667458ccd0fde36e77d0ff65d (diff)
rework text processors (#793)24.4.21.0
* rework text processors * rename text-preprocessors file * Fix search header left margins on small screens (#839) * Refocuses search input on backspace (#840) Fixes #775. Note that this behavior gets overridden if backspace is set as a shortcut action. * Change hotkey triggering condition to account for IME usage (#837) _isKeyCharacterInput only worked when not using an IME, as inside of an IME when a keydown event is fired, the key is reported as "Process", which does not have a key.length equal to 1. This resulted in hotkeys being triggered while typing, which this commit fixes. --------- Co-authored-by: James Maa <jmaa@berkeley.edu> Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Co-authored-by: Andrew Thomas Sartor <andrew@sartor.net>
Diffstat (limited to 'ext/js/language/translator.js')
-rw-r--r--ext/js/language/translator.js147
1 files changed, 93 insertions, 54 deletions
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index 6132ee82..845d53d5 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -17,10 +17,9 @@
*/
import {applyTextReplacement} from '../general/regex-util.js';
-import {TextSourceMap} from '../general/text-source-map.js';
import {isCodePointJapanese} from './ja/japanese.js';
import {LanguageTransformer} from './language-transformer.js';
-import {getAllLanguageTextPreprocessors} from './languages.js';
+import {getAllLanguageTextProcessors} from './languages.js';
import {MultiLanguageTransformer} from './multi-language-transformer.js';
/**
@@ -41,8 +40,8 @@ export class Translator {
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
/** @type {RegExp} */
this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/;
- /** @type {Map<string, {textPreprocessors: import('language').TextPreprocessorWithId<unknown>[], optionSpace: import('translation-internal').PreprocessorOptionsSpace}>} */
- this._textPreprocessors = new Map();
+ /** @type {import('translation-internal').TextProcessorMap} */
+ this._textProcessors = new Map();
}
/**
@@ -50,13 +49,19 @@ export class Translator {
*/
prepare() {
this._multiLanguageTransformer.prepare();
- for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) {
- /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
- const optionSpace = new Map();
- for (const {id, textPreprocessor} of textPreprocessors) {
- optionSpace.set(id, textPreprocessor.options);
+ for (const {iso, textPreprocessors = [], textPostprocessors = []} of getAllLanguageTextProcessors()) {
+ /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */
+ const preprocessorOptionsSpace = new Map();
+ /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */
+ const postprocessorOptionsSpace = new Map();
+
+ for (const {id, textProcessor} of textPreprocessors) {
+ preprocessorOptionsSpace.set(id, textProcessor.options);
}
- this._textPreprocessors.set(iso, {textPreprocessors, optionSpace});
+ for (const {id, textProcessor} of textPostprocessors) {
+ postprocessorOptionsSpace.set(id, textProcessor.options);
+ }
+ this._textProcessors.set(iso, {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace});
}
}
@@ -428,7 +433,7 @@ export class Translator {
}
}
- // Deinflections and text preprocessing
+ // Deinflections and text processing
/**
* @param {string} text
@@ -438,57 +443,90 @@ export class Translator {
*/
_getAlgorithmDeinflections(text, options) {
const {language} = options;
- const info = this._textPreprocessors.get(language);
+ const info = this._textProcessors.get(language);
if (typeof info === 'undefined') { throw new Error(`Unsupported language: ${language}`); }
- const {textPreprocessors, optionSpace: textPreprocessorOptionsSpace} = info;
+ const {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace} = info;
- /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
- const variantSpace = new Map();
- variantSpace.set('textReplacements', this._getTextReplacementsVariants(options));
- for (const [key, value] of textPreprocessorOptionsSpace) {
- variantSpace.set(key, value);
- }
+ const preprocessorVariantSpace = new Map(preprocessorOptionsSpace);
+ preprocessorVariantSpace.set('textReplacements', this._getTextReplacementsVariants(options));
+ const preprocessorVariants = this._getArrayVariants(preprocessorVariantSpace);
+ const postprocessorVariants = this._getArrayVariants(postprocessorOptionsSpace);
/** @type {import('translation-internal').DatabaseDeinflection[]} */
const deinflections = [];
const used = new Set();
+ /** @type {Map<string, import('core').SafeAny>} */
+ const sourceCache = new Map(); // For reusing text processors' outputs
+
+ for (
+ let i = text.length;
+ i > 0;
+ i = this._getNextSubstringLength(options.searchResolution, i, text)
+ ) {
+ const rawSource = text.substring(0, i);
+
+ for (const preprocessorVariant of preprocessorVariants) {
+ let source = rawSource;
+
+ const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (preprocessorVariant.get('textReplacements'));
+ if (textReplacements !== null) {
+ source = this._applyTextReplacements(source, textReplacements);
+ }
- for (const arrayVariant of this._generateArrayVariants(variantSpace)) {
- const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (arrayVariant.get('textReplacements'));
+ source = this._applyTextProcessors(textPreprocessors, preprocessorVariant, source, sourceCache);
- let text2 = text;
- const sourceMap = new TextSourceMap(text2);
+ if (used.has(source)) { continue; }
+ used.add(source);
+ for (const deinflection of this._multiLanguageTransformer.transform(language, source)) {
+ const {trace, conditions} = deinflection;
+ for (const postprocessorVariant of postprocessorVariants) {
+ let {text: transformedText} = deinflection;
+ transformedText = this._applyTextProcessors(textPostprocessors, postprocessorVariant, transformedText, sourceCache);
+
+ /** @type {import('dictionary').InflectionRuleChainCandidate} */
+ const inflectionRuleChainCandidate = {
+ source: 'algorithm',
+ inflectionRules: trace.map((frame) => frame.transform)
+ };
+ deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate]));
+ }
+ }
+ }
+ }
+ return deinflections;
+ }
- if (textReplacements !== null) {
- text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
+ /**
+ * @param {import('language').TextProcessorWithId<unknown>[]} textProcessors
+ * @param {Map<string, unknown>} processorVariant
+ * @param {string} text
+ * @param {Map<string, import('core').SafeAny>} textCache
+ * @returns {string}
+ */
+ _applyTextProcessors(textProcessors, processorVariant, text, textCache) {
+ for (const {id, textProcessor: {process}} of textProcessors) {
+ const setting = processorVariant.get(id);
+ let level1 = textCache.get(text);
+ if (!level1) {
+ level1 = new Map();
+ textCache.set(text, level1);
}
- for (const preprocessor of textPreprocessors.values()) {
- const {id, textPreprocessor} = preprocessor;
- const setting = arrayVariant.get(id);
- text2 = textPreprocessor.process(text2, setting, sourceMap);
+ let level2 = level1.get(id);
+ if (!level2) {
+ level2 = new Map();
+ level1.set(id, level2);
}
- for (
- let source = text2, i = text2.length;
- i > 0;
- i = this._getNextSubstringLength(options.searchResolution, i, source)
- ) {
- source = text2.substring(0, i);
- if (used.has(source)) { break; }
- used.add(source);
- const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
- for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) {
- /** @type {import('dictionary').InflectionRuleChainCandidate} */
- const inflectionRuleChainCandidate = {
- source: 'algorithm',
- inflectionRules: trace.map((frame) => frame.transform)
- };
- deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate]));
- }
+ if (!level2.has(setting)) {
+ text = process(text, setting);
+ level2.set(setting, text);
+ } else {
+ text = level2.get(setting);
}
}
- return deinflections;
+
+ return text;
}
/**
@@ -507,13 +545,12 @@ export class Translator {
/**
* @param {string} text
- * @param {TextSourceMap} sourceMap
* @param {import('translation').FindTermsTextReplacement[]} replacements
* @returns {string}
*/
- _applyTextReplacements(text, sourceMap, replacements) {
+ _applyTextReplacements(text, replacements) {
for (const {pattern, replacement} of replacements) {
- text = applyTextReplacement(text, sourceMap, pattern, replacement);
+ text = applyTextReplacement(text, pattern, replacement);
}
return text;
}
@@ -1325,10 +1362,11 @@ export class Translator {
/**
* @param {Map<string, unknown[]>} arrayVariants
- * @yields {Map<string, unknown>}
- * @returns {Generator<Map<string, unknown>, void, void>}
+ * @returns {Map<string, unknown>[]}
*/
- *_generateArrayVariants(arrayVariants) {
+ _getArrayVariants(arrayVariants) {
+ /** @type {Map<string, unknown>[]} */
+ const results = [];
const variantKeys = [...arrayVariants.keys()];
const entryVariantLengths = [];
for (const key of variantKeys) {
@@ -1350,8 +1388,9 @@ export class Translator {
remainingIndex = Math.floor(remainingIndex / entryVariants.length);
}
- yield variant;
+ results.push(variant);
}
+ return results;
}
/**