diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-04-21 17:15:08 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-21 15:15:08 +0000 |
commit | 07258ecc35c1a05aa1581a54c9f47a40ce3d76c9 (patch) | |
tree | 0a73bc6c1224710906ef3cded2a19399fc626f12 /ext/js/language/translator.js | |
parent | 22904d166d5ea33667458ccd0fde36e77d0ff65d (diff) |
rework text processors (#793)24.4.21.0
* rework text processors
* rename text-preprocessors file
* Fix search header left margins on small screens (#839)
* Refocuses search input on backspace (#840)
Fixes #775. Note that this behavior gets overridden if backspace is set
as a shortcut action.
* Change hotkey triggering condition to account for IME usage (#837)
_isKeyCharacterInput only worked when not using an IME, as inside of an
IME when a keydown event is fired, the key is reported as "Process",
which does not have a key.length equal to 1. This resulted in hotkeys
being triggered while typing, which this commit fixes.
---------
Co-authored-by: James Maa <jmaa@berkeley.edu>
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Co-authored-by: Andrew Thomas Sartor <andrew@sartor.net>
Diffstat (limited to 'ext/js/language/translator.js')
-rw-r--r-- | ext/js/language/translator.js | 147 |
1 files changed, 93 insertions, 54 deletions
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 6132ee82..845d53d5 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -17,10 +17,9 @@ */ import {applyTextReplacement} from '../general/regex-util.js'; -import {TextSourceMap} from '../general/text-source-map.js'; import {isCodePointJapanese} from './ja/japanese.js'; import {LanguageTransformer} from './language-transformer.js'; -import {getAllLanguageTextPreprocessors} from './languages.js'; +import {getAllLanguageTextProcessors} from './languages.js'; import {MultiLanguageTransformer} from './multi-language-transformer.js'; /** @@ -41,8 +40,8 @@ export class Translator { this._stringComparer = new Intl.Collator('en-US'); // Invariant locale /** @type {RegExp} */ this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/; - /** @type {Map<string, {textPreprocessors: import('language').TextPreprocessorWithId<unknown>[], optionSpace: import('translation-internal').PreprocessorOptionsSpace}>} */ - this._textPreprocessors = new Map(); + /** @type {import('translation-internal').TextProcessorMap} */ + this._textProcessors = new Map(); } /** @@ -50,13 +49,19 @@ export class Translator { */ prepare() { this._multiLanguageTransformer.prepare(); - for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) { - /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ - const optionSpace = new Map(); - for (const {id, textPreprocessor} of textPreprocessors) { - optionSpace.set(id, textPreprocessor.options); + for (const {iso, textPreprocessors = [], textPostprocessors = []} of getAllLanguageTextProcessors()) { + /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */ + const preprocessorOptionsSpace = new Map(); + /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */ + const postprocessorOptionsSpace = new Map(); + + for (const {id, textProcessor} of textPreprocessors) { + preprocessorOptionsSpace.set(id, textProcessor.options); } - this._textPreprocessors.set(iso, {textPreprocessors, optionSpace}); + for (const {id, textProcessor} of textPostprocessors) { + postprocessorOptionsSpace.set(id, textProcessor.options); + } + this._textProcessors.set(iso, {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace}); } } @@ -428,7 +433,7 @@ export class Translator { } } - // Deinflections and text preprocessing + // Deinflections and text processing /** * @param {string} text @@ -438,57 +443,90 @@ export class Translator { */ _getAlgorithmDeinflections(text, options) { const {language} = options; - const info = this._textPreprocessors.get(language); + const info = this._textProcessors.get(language); if (typeof info === 'undefined') { throw new Error(`Unsupported language: ${language}`); } - const {textPreprocessors, optionSpace: textPreprocessorOptionsSpace} = info; + const {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace} = info; - /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ - const variantSpace = new Map(); - variantSpace.set('textReplacements', this._getTextReplacementsVariants(options)); - for (const [key, value] of textPreprocessorOptionsSpace) { - variantSpace.set(key, value); - } + const preprocessorVariantSpace = new Map(preprocessorOptionsSpace); + preprocessorVariantSpace.set('textReplacements', this._getTextReplacementsVariants(options)); + const preprocessorVariants = this._getArrayVariants(preprocessorVariantSpace); + const postprocessorVariants = this._getArrayVariants(postprocessorOptionsSpace); /** @type {import('translation-internal').DatabaseDeinflection[]} */ const deinflections = []; const used = new Set(); + /** @type {Map<string, import('core').SafeAny>} */ + const sourceCache = new Map(); // For reusing text processors' outputs + + for ( + let i = text.length; + i > 0; + i = this._getNextSubstringLength(options.searchResolution, i, text) + ) { + const rawSource = text.substring(0, i); + + for (const preprocessorVariant of preprocessorVariants) { + let source = rawSource; + + const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (preprocessorVariant.get('textReplacements')); + if (textReplacements !== null) { + source = this._applyTextReplacements(source, textReplacements); + } - for (const arrayVariant of this._generateArrayVariants(variantSpace)) { - const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (arrayVariant.get('textReplacements')); + source = this._applyTextProcessors(textPreprocessors, preprocessorVariant, source, sourceCache); - let text2 = text; - const sourceMap = new TextSourceMap(text2); + if (used.has(source)) { continue; } + used.add(source); + for (const deinflection of this._multiLanguageTransformer.transform(language, source)) { + const {trace, conditions} = deinflection; + for (const postprocessorVariant of postprocessorVariants) { + let {text: transformedText} = deinflection; + transformedText = this._applyTextProcessors(textPostprocessors, postprocessorVariant, transformedText, sourceCache); + + /** @type {import('dictionary').InflectionRuleChainCandidate} */ + const inflectionRuleChainCandidate = { + source: 'algorithm', + inflectionRules: trace.map((frame) => frame.transform) + }; + deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate])); + } + } + } + } + return deinflections; + } - if (textReplacements !== null) { - text2 = this._applyTextReplacements(text2, sourceMap, textReplacements); + /** + * @param {import('language').TextProcessorWithId<unknown>[]} textProcessors + * @param {Map<string, unknown>} processorVariant + * @param {string} text + * @param {Map<string, import('core').SafeAny>} textCache + * @returns {string} + */ + _applyTextProcessors(textProcessors, processorVariant, text, textCache) { + for (const {id, textProcessor: {process}} of textProcessors) { + const setting = processorVariant.get(id); + let level1 = textCache.get(text); + if (!level1) { + level1 = new Map(); + textCache.set(text, level1); } - for (const preprocessor of textPreprocessors.values()) { - const {id, textPreprocessor} = preprocessor; - const setting = arrayVariant.get(id); - text2 = textPreprocessor.process(text2, setting, sourceMap); + let level2 = level1.get(id); + if (!level2) { + level2 = new Map(); + level1.set(id, level2); } - for ( - let source = text2, i = text2.length; - i > 0; - i = this._getNextSubstringLength(options.searchResolution, i, source) - ) { - source = text2.substring(0, i); - if (used.has(source)) { break; } - used.add(source); - const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); - for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) { - /** @type {import('dictionary').InflectionRuleChainCandidate} */ - const inflectionRuleChainCandidate = { - source: 'algorithm', - inflectionRules: trace.map((frame) => frame.transform) - }; - deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate])); - } + if (!level2.has(setting)) { + text = process(text, setting); + level2.set(setting, text); + } else { + text = level2.get(setting); } } - return deinflections; + + return text; } /** @@ -507,13 +545,12 @@ export class Translator { /** * @param {string} text - * @param {TextSourceMap} sourceMap * @param {import('translation').FindTermsTextReplacement[]} replacements * @returns {string} */ - _applyTextReplacements(text, sourceMap, replacements) { + _applyTextReplacements(text, replacements) { for (const {pattern, replacement} of replacements) { - text = applyTextReplacement(text, sourceMap, pattern, replacement); + text = applyTextReplacement(text, pattern, replacement); } return text; } @@ -1325,10 +1362,11 @@ export class Translator { /** * @param {Map<string, unknown[]>} arrayVariants - * @yields {Map<string, unknown>} - * @returns {Generator<Map<string, unknown>, void, void>} + * @returns {Map<string, unknown>[]} */ - *_generateArrayVariants(arrayVariants) { + _getArrayVariants(arrayVariants) { + /** @type {Map<string, unknown>[]} */ + const results = []; const variantKeys = [...arrayVariants.keys()]; const entryVariantLengths = []; for (const key of variantKeys) { @@ -1350,8 +1388,9 @@ export class Translator { remainingIndex = Math.floor(remainingIndex / entryVariants.length); } - yield variant; + results.push(variant); } + return results; } /** |