diff options
Diffstat (limited to 'ext/js')
-rw-r--r-- | ext/js/general/regex-util.js | 12 | ||||
-rw-r--r-- | ext/js/general/text-source-map.js | 153 | ||||
-rw-r--r-- | ext/js/language/ar/arabic-text-preprocessors.js | 6 | ||||
-rw-r--r-- | ext/js/language/ja/japanese-text-preprocessors.js | 24 | ||||
-rw-r--r-- | ext/js/language/ja/japanese-wanakana.js | 52 | ||||
-rw-r--r-- | ext/js/language/ja/japanese.js | 15 | ||||
-rw-r--r-- | ext/js/language/language-descriptors.js | 11 | ||||
-rwxr-xr-x | ext/js/language/languages.js | 20 | ||||
-rw-r--r-- | ext/js/language/ru/russian-text-preprocessors.js | 10 | ||||
-rwxr-xr-x | ext/js/language/text-processors.js (renamed from ext/js/language/text-preprocessors.js) | 16 | ||||
-rw-r--r-- | ext/js/language/translator.js | 147 |
11 files changed, 147 insertions, 319 deletions
diff --git a/ext/js/general/regex-util.js b/ext/js/general/regex-util.js index e0982154..c633ec06 100644 --- a/ext/js/general/regex-util.js +++ b/ext/js/general/regex-util.js @@ -23,13 +23,12 @@ const matchReplacementPattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g; * Applies string.replace using a regular expression and replacement string as arguments. * A source map of the changes is also maintained. * @param {string} text A string of the text to replace. - * @param {import('./text-source-map.js').TextSourceMap} sourceMap An instance of `TextSourceMap` which corresponds to `text`. * @param {RegExp} pattern A regular expression to use as the replacement. * @param {string} replacement A replacement string that follows the format of the standard * JavaScript regular expression replacement string. * @returns {string} A new string with the pattern replacements applied and the source map updated. */ -export function applyTextReplacement(text, sourceMap, pattern, replacement) { +export function applyTextReplacement(text, pattern, replacement) { const isGlobal = pattern.global; if (isGlobal) { pattern.lastIndex = 0; } for (let loop = true; loop; loop = isGlobal) { @@ -44,15 +43,6 @@ export function applyTextReplacement(text, sourceMap, pattern, replacement) { text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`; pattern.lastIndex += delta; - - if (actualReplacementLength > 0) { - /** @type {number[]} */ - const zeroes = new Array(actualReplacementLength).fill(0); - sourceMap.insert(index, ...zeroes); - sourceMap.combine(index - 1 + actualReplacementLength, matchText.length); - } else { - sourceMap.combine(index, matchText.length); - } } return text; } diff --git a/ext/js/general/text-source-map.js b/ext/js/general/text-source-map.js deleted file mode 100644 index 527c232b..00000000 --- a/ext/js/general/text-source-map.js +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (C) 2023-2024 Yomitan Authors - * Copyright (C) 2020-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -export class TextSourceMap { - /** - * @param {string} source - * @param {number[]|null} [mapping=null] - */ - constructor(source, mapping = null) { - /** @type {string} */ - this._source = source; - /** @type {?number[]} */ - this._mapping = (mapping !== null ? TextSourceMap.normalizeMapping(mapping) : null); - } - - /** @type {string} */ - get source() { - return this._source; - } - - /** - * @param {unknown} other - * @returns {boolean} - */ - equals(other) { - if (this === other) { - return true; - } - - const source = this._source; - if (!(other instanceof TextSourceMap && source === other.source)) { - return false; - } - - let mapping = this._mapping; - let otherMapping = other.getMappingCopy(); - if (mapping === null) { - if (otherMapping === null) { - return true; - } - mapping = TextSourceMap.createMapping(source); - } else if (otherMapping === null) { - otherMapping = TextSourceMap.createMapping(source); - } - - const mappingLength = mapping.length; - if (mappingLength !== otherMapping.length) { - return false; - } - - for (let i = 0; i < mappingLength; ++i) { - if (mapping[i] !== otherMapping[i]) { - return false; - } - } - - return true; - } - - /** - * @param {number} finalLength - * @returns {number} - */ - getSourceLength(finalLength) { - const mapping = this._mapping; - if (mapping === null) { - return finalLength; - } - - let sourceLength = 0; - for (let i = 0; i < finalLength; ++i) { - sourceLength += mapping[i]; - } - return sourceLength; - } - - /** - * @param {number} index - * @param {number} count - */ - combine(index, count) { - if (count <= 0) { return; } - - if (this._mapping === null) { - this._mapping = TextSourceMap.createMapping(this._source); - } - - let sum = this._mapping[index]; - const parts = this._mapping.splice(index + 1, count); - for (const part of parts) { - sum += part; - } - this._mapping[index] = sum; - } - - /** - * @param {number} index - * @param {number[]} items - */ - insert(index, ...items) { - if (this._mapping === null) { - this._mapping = TextSourceMap.createMapping(this._source); - } - - this._mapping.splice(index, 0, ...items); - } - - /** - * @returns {?number[]} - */ - getMappingCopy() { - return this._mapping !== null ? [...this._mapping] : null; - } - - /** - * @param {string} text - * @returns {number[]} - */ - static createMapping(text) { - return new Array(text.length).fill(1); - } - - /** - * @param {number[]} mapping - * @returns {number[]} - */ - static normalizeMapping(mapping) { - const result = []; - for (const value of mapping) { - result.push( - (typeof value === 'number' && Number.isFinite(value)) ? - Math.floor(value) : - 0 - ); - } - return result; - } -} diff --git a/ext/js/language/ar/arabic-text-preprocessors.js b/ext/js/language/ar/arabic-text-preprocessors.js index 6007d770..91535ccd 100644 --- a/ext/js/language/ar/arabic-text-preprocessors.js +++ b/ext/js/language/ar/arabic-text-preprocessors.js @@ -15,7 +15,7 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; +import {basicTextProcessorOptions} from '../text-processors.js'; const optionalDiacritics = [ '\u0618', // Small Fatha @@ -38,11 +38,11 @@ const optionalDiacritics = [ const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g'); -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const removeArabicScriptDiacritics = { name: 'Remove diacritics', description: 'وَلَدَ ⬅️ ولد', - options: basicTextPreprocessorOptions, + options: basicTextProcessorOptions, process: (text, setting) => { return setting ? text.replace(diacriticsRegex, '') : text; } diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js index 06f944c1..b3d50817 100644 --- a/ext/js/language/ja/japanese-text-preprocessors.js +++ b/ext/js/language/ja/japanese-text-preprocessors.js @@ -15,7 +15,7 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; +import {basicTextProcessorOptions} from '../text-processors.js'; import {convertAlphabeticToKana} from './japanese-wanakana.js'; import { collapseEmphaticSequences as collapseEmphaticSequencesFunction, @@ -25,28 +25,28 @@ import { convertNumericToFullWidth } from './japanese.js'; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const convertHalfWidthCharacters = { name: 'Convert half width characters to full width', description: 'ヨミチャン → ヨミチャン', - options: basicTextPreprocessorOptions, - process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str) + options: basicTextProcessorOptions, + process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str) }; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const convertNumericCharacters = { name: 'Convert numeric characters to full width', description: '1234 → 1234', - options: basicTextPreprocessorOptions, + options: basicTextProcessorOptions, process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str) }; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const convertAlphabeticCharacters = { name: 'Convert alphabetic characters to hiragana', description: 'yomichan → よみちゃん', - options: basicTextPreprocessorOptions, - process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str) + options: basicTextProcessorOptions, + process: (str, setting) => (setting ? convertAlphabeticToKana(str) : str) }; /** @type {import('language').BidirectionalConversionPreprocessor} */ @@ -66,15 +66,15 @@ export const convertHiraganaToKatakana = { } }; -/** @type {import('language').TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */ +/** @type {import('language').TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */ export const collapseEmphaticSequences = { name: 'Collapse emphatic character sequences', description: 'すっっごーーい → すっごーい / すごい', options: [[false, false], [true, false], [true, true]], - process: (str, setting, sourceMap) => { + process: (str, setting) => { const [collapseEmphatic, collapseEmphaticFull] = setting; if (collapseEmphatic) { - str = collapseEmphaticSequencesFunction(str, collapseEmphaticFull, sourceMap); + str = collapseEmphaticSequencesFunction(str, collapseEmphaticFull); } return str; } diff --git a/ext/js/language/ja/japanese-wanakana.js b/ext/js/language/ja/japanese-wanakana.js index 32260489..a87db6b7 100644 --- a/ext/js/language/ja/japanese-wanakana.js +++ b/ext/js/language/ja/japanese-wanakana.js @@ -19,51 +19,10 @@ import * as wanakana from '../../../lib/wanakana.js'; /** * @param {string} text - * @param {?import('../../general/text-source-map.js').TextSourceMap} sourceMap - * @param {number} sourceMapStart * @returns {string} */ -function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) { - const result = wanakana.toHiragana(text); - - // Generate source mapping - if (sourceMap !== null) { - let i = 0; - let resultPos = 0; - const ii = text.length; - while (i < ii) { - // Find smallest matching substring - let iNext = i + 1; - let resultPosNext = result.length; - while (iNext < ii) { - const t = wanakana.toHiragana(text.substring(0, iNext)); - if (t === result.substring(0, t.length)) { - resultPosNext = t.length; - break; - } - ++iNext; - } - - // Merge characters - const removals = iNext - i - 1; - if (removals > 0) { - sourceMap.combine(sourceMapStart, removals); - } - ++sourceMapStart; - - // Empty elements - const additions = resultPosNext - resultPos - 1; - for (let j = 0; j < additions; ++j) { - sourceMap.insert(sourceMapStart, 0); - ++sourceMapStart; - } - - i = iNext; - resultPos = resultPosNext; - } - } - - return result; +function convertAlphabeticPartToKana(text) { + return wanakana.toHiragana(text); } /** @@ -84,10 +43,9 @@ export function convertToRomaji(text) { /** * @param {string} text - * @param {?import('../../general/text-source-map.js').TextSourceMap} sourceMap * @returns {string} */ -export function convertAlphabeticToKana(text, sourceMap = null) { +export function convertAlphabeticToKana(text) { let part = ''; let result = ''; @@ -106,7 +64,7 @@ export function convertAlphabeticToKana(text, sourceMap = null) { c = 0x2d; // '-' } else { if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMap, result.length); + result += convertAlphabeticPartToKana(part); part = ''; } result += char; @@ -116,7 +74,7 @@ export function convertAlphabeticToKana(text, sourceMap = null) { } if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMap, result.length); + result += convertAlphabeticPartToKana(part); } return result; } diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js index 2c9a1f7f..3507e5df 100644 --- a/ext/js/language/ja/japanese.js +++ b/ext/js/language/ja/japanese.js @@ -539,10 +539,9 @@ export function convertNumericToFullWidth(text) { /** * @param {string} text - * @param {?import('../../general/text-source-map.js').TextSourceMap} [sourceMap] * @returns {string} */ -export function convertHalfWidthKanaToFullWidth(text, sourceMap = null) { +export function convertHalfWidthKanaToFullWidth(text) { let result = ''; // This function is safe to use charCodeAt instead of codePointAt, since all @@ -575,9 +574,6 @@ export function convertHalfWidthKanaToFullWidth(text, sourceMap = null) { } } - if (sourceMap !== null && index > 0) { - sourceMap.combine(result.length, 1); - } result += c2; } @@ -705,13 +701,11 @@ export function distributeFuriganaInflected(term, reading, source) { /** * @param {string} text * @param {boolean} fullCollapse - * @param {?import('../../general/text-source-map.js').TextSourceMap} [sourceMap] * @returns {string} */ -export function collapseEmphaticSequences(text, fullCollapse, sourceMap = null) { +export function collapseEmphaticSequences(text, fullCollapse) { let result = ''; let collapseCodePoint = -1; - const hasSourceMap = (sourceMap !== null); for (const char of text) { const c = char.codePointAt(0); if ( @@ -729,11 +723,6 @@ export function collapseEmphaticSequences(text, fullCollapse, sourceMap = null) } else { collapseCodePoint = -1; result += char; - continue; - } - - if (hasSourceMap) { - sourceMap.combine(Math.max(0, result.length - 1), 1); } } return result; diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index c5c3e01e..2df2f794 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -25,7 +25,7 @@ import {isStringPartiallyJapanese} from './ja/japanese.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {oldIrishTransforms} from './sga/old-irish-transforms.js'; import {albanianTransforms} from './sq/albanian-transforms.js'; -import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-preprocessors.js'; +import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js'; const capitalizationPreprocessors = { decapitalize, @@ -138,8 +138,7 @@ const languageDescriptors = [ { iso: 'km', name: 'Khmer', - exampleText: 'អាន', - textPreprocessors: {} + exampleText: 'អាន' }, { iso: 'pl', @@ -201,8 +200,7 @@ const languageDescriptors = [ { iso: 'th', name: 'Thai', - exampleText: 'อ่าน', - textPreprocessors: {} + exampleText: 'อ่าน' }, { iso: 'tr', @@ -219,8 +217,7 @@ const languageDescriptors = [ { iso: 'zh', name: 'Chinese', - exampleText: '读', - textPreprocessors: {} + exampleText: '读' } ]; diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index 4b196c2c..b3890aa6 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -29,21 +29,29 @@ export function getLanguageSummaries() { } /** - * @returns {import('language').LanguageAndPreprocessors[]} + * @returns {import('language').LanguageAndProcessors[]} * @throws {Error} */ -export function getAllLanguageTextPreprocessors() { +export function getAllLanguageTextProcessors() { const results = []; - for (const {iso, textPreprocessors} of languageDescriptorMap.values()) { - /** @type {import('language').TextPreprocessorWithId<unknown>[]} */ + for (const {iso, textPreprocessors = {}, textPostprocessors = {}} of languageDescriptorMap.values()) { + /** @type {import('language').TextProcessorWithId<unknown>[]} */ const textPreprocessorsArray = []; for (const [id, textPreprocessor] of Object.entries(textPreprocessors)) { textPreprocessorsArray.push({ id, - textPreprocessor: /** @type {import('language').TextPreprocessor<unknown>} */ (textPreprocessor) + textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPreprocessor) }); } - results.push({iso, textPreprocessors: textPreprocessorsArray}); + /** @type {import('language').TextProcessorWithId<unknown>[]} */ + const textPostprocessorsArray = []; + for (const [id, textPostprocessor] of Object.entries(textPostprocessors)) { + textPostprocessorsArray.push({ + id, + textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPostprocessor) + }); + } + results.push({iso, textPreprocessors: textPreprocessorsArray, textPostprocessors: textPostprocessorsArray}); } return results; } diff --git a/ext/js/language/ru/russian-text-preprocessors.js b/ext/js/language/ru/russian-text-preprocessors.js index fc4472e9..fbda38c7 100644 --- a/ext/js/language/ru/russian-text-preprocessors.js +++ b/ext/js/language/ru/russian-text-preprocessors.js @@ -15,23 +15,23 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; +import {basicTextProcessorOptions} from '../text-processors.js'; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const removeRussianDiacritics = { name: 'Remove diacritics', description: 'A\u0301 → A, a\u0301 → a', - options: basicTextPreprocessorOptions, + options: basicTextProcessorOptions, process: (str, setting) => { return setting ? str.replace(/\u0301/g, '') : str; } }; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const yoToE = { name: 'Yo to E', description: 'ё → е, Ё → Е', - options: basicTextPreprocessorOptions, + options: basicTextProcessorOptions, process: (str, setting) => { return setting ? str.replace(/ё/g, 'е').replace(/Ё/g, 'Е') : str; } diff --git a/ext/js/language/text-preprocessors.js b/ext/js/language/text-processors.js index e33fccda..e7855df2 100755 --- a/ext/js/language/text-preprocessors.js +++ b/ext/js/language/text-processors.js @@ -15,22 +15,22 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -/** @type {import('language').TextPreprocessorOptions<boolean>} */ -export const basicTextPreprocessorOptions = [false, true]; +/** @type {import('language').TextProcessorOptions<boolean>} */ +export const basicTextProcessorOptions = [false, true]; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const decapitalize = { name: 'Decapitalize text', description: 'CAPITALIZED TEXT → capitalized text', - options: basicTextPreprocessorOptions, + options: basicTextProcessorOptions, process: (str, setting) => (setting ? str.toLowerCase() : str) }; -/** @type {import('language').TextPreprocessor<boolean>} */ +/** @type {import('language').TextProcessor<boolean>} */ export const capitalizeFirstLetter = { name: 'Capitalize first letter', description: 'lowercase text → Lowercase text', - options: basicTextPreprocessorOptions, + options: basicTextProcessorOptions, process: (str, setting) => (setting ? str.charAt(0).toUpperCase() + str.slice(1) : str) }; @@ -39,11 +39,11 @@ export const capitalizeFirstLetter = { * as it can result in undesirable normalization: * - '\u9038'.normalize('NFD') => '\u9038' (逸) * - '\ufa67'.normalize('NFD') => '\u9038' (逸 => 逸) - * @type {import('language').TextPreprocessor<boolean>} + * @type {import('language').TextProcessor<boolean>} */ export const removeAlphabeticDiacritics = { name: 'Remove Alphabetic Diacritics', description: 'ἄήé -> αηe', - options: basicTextPreprocessorOptions, + options: basicTextProcessorOptions, process: (str, setting) => (setting ? str.normalize('NFD').replace(/[\u0300-\u036f]/g, '') : str) }; diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 6132ee82..845d53d5 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -17,10 +17,9 @@ */ import {applyTextReplacement} from '../general/regex-util.js'; -import {TextSourceMap} from '../general/text-source-map.js'; import {isCodePointJapanese} from './ja/japanese.js'; import {LanguageTransformer} from './language-transformer.js'; -import {getAllLanguageTextPreprocessors} from './languages.js'; +import {getAllLanguageTextProcessors} from './languages.js'; import {MultiLanguageTransformer} from './multi-language-transformer.js'; /** @@ -41,8 +40,8 @@ export class Translator { this._stringComparer = new Intl.Collator('en-US'); // Invariant locale /** @type {RegExp} */ this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/; - /** @type {Map<string, {textPreprocessors: import('language').TextPreprocessorWithId<unknown>[], optionSpace: import('translation-internal').PreprocessorOptionsSpace}>} */ - this._textPreprocessors = new Map(); + /** @type {import('translation-internal').TextProcessorMap} */ + this._textProcessors = new Map(); } /** @@ -50,13 +49,19 @@ export class Translator { */ prepare() { this._multiLanguageTransformer.prepare(); - for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) { - /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ - const optionSpace = new Map(); - for (const {id, textPreprocessor} of textPreprocessors) { - optionSpace.set(id, textPreprocessor.options); + for (const {iso, textPreprocessors = [], textPostprocessors = []} of getAllLanguageTextProcessors()) { + /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */ + const preprocessorOptionsSpace = new Map(); + /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */ + const postprocessorOptionsSpace = new Map(); + + for (const {id, textProcessor} of textPreprocessors) { + preprocessorOptionsSpace.set(id, textProcessor.options); } - this._textPreprocessors.set(iso, {textPreprocessors, optionSpace}); + for (const {id, textProcessor} of textPostprocessors) { + postprocessorOptionsSpace.set(id, textProcessor.options); + } + this._textProcessors.set(iso, {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace}); } } @@ -428,7 +433,7 @@ export class Translator { } } - // Deinflections and text preprocessing + // Deinflections and text processing /** * @param {string} text @@ -438,57 +443,90 @@ export class Translator { */ _getAlgorithmDeinflections(text, options) { const {language} = options; - const info = this._textPreprocessors.get(language); + const info = this._textProcessors.get(language); if (typeof info === 'undefined') { throw new Error(`Unsupported language: ${language}`); } - const {textPreprocessors, optionSpace: textPreprocessorOptionsSpace} = info; + const {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace} = info; - /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ - const variantSpace = new Map(); - variantSpace.set('textReplacements', this._getTextReplacementsVariants(options)); - for (const [key, value] of textPreprocessorOptionsSpace) { - variantSpace.set(key, value); - } + const preprocessorVariantSpace = new Map(preprocessorOptionsSpace); + preprocessorVariantSpace.set('textReplacements', this._getTextReplacementsVariants(options)); + const preprocessorVariants = this._getArrayVariants(preprocessorVariantSpace); + const postprocessorVariants = this._getArrayVariants(postprocessorOptionsSpace); /** @type {import('translation-internal').DatabaseDeinflection[]} */ const deinflections = []; const used = new Set(); + /** @type {Map<string, import('core').SafeAny>} */ + const sourceCache = new Map(); // For reusing text processors' outputs + + for ( + let i = text.length; + i > 0; + i = this._getNextSubstringLength(options.searchResolution, i, text) + ) { + const rawSource = text.substring(0, i); + + for (const preprocessorVariant of preprocessorVariants) { + let source = rawSource; + + const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (preprocessorVariant.get('textReplacements')); + if (textReplacements !== null) { + source = this._applyTextReplacements(source, textReplacements); + } - for (const arrayVariant of this._generateArrayVariants(variantSpace)) { - const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (arrayVariant.get('textReplacements')); + source = this._applyTextProcessors(textPreprocessors, preprocessorVariant, source, sourceCache); - let text2 = text; - const sourceMap = new TextSourceMap(text2); + if (used.has(source)) { continue; } + used.add(source); + for (const deinflection of this._multiLanguageTransformer.transform(language, source)) { + const {trace, conditions} = deinflection; + for (const postprocessorVariant of postprocessorVariants) { + let {text: transformedText} = deinflection; + transformedText = this._applyTextProcessors(textPostprocessors, postprocessorVariant, transformedText, sourceCache); + + /** @type {import('dictionary').InflectionRuleChainCandidate} */ + const inflectionRuleChainCandidate = { + source: 'algorithm', + inflectionRules: trace.map((frame) => frame.transform) + }; + deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate])); + } + } + } + } + return deinflections; + } - if (textReplacements !== null) { - text2 = this._applyTextReplacements(text2, sourceMap, textReplacements); + /** + * @param {import('language').TextProcessorWithId<unknown>[]} textProcessors + * @param {Map<string, unknown>} processorVariant + * @param {string} text + * @param {Map<string, import('core').SafeAny>} textCache + * @returns {string} + */ + _applyTextProcessors(textProcessors, processorVariant, text, textCache) { + for (const {id, textProcessor: {process}} of textProcessors) { + const setting = processorVariant.get(id); + let level1 = textCache.get(text); + if (!level1) { + level1 = new Map(); + textCache.set(text, level1); } - for (const preprocessor of textPreprocessors.values()) { - const {id, textPreprocessor} = preprocessor; - const setting = arrayVariant.get(id); - text2 = textPreprocessor.process(text2, setting, sourceMap); + let level2 = level1.get(id); + if (!level2) { + level2 = new Map(); + level1.set(id, level2); } - for ( - let source = text2, i = text2.length; - i > 0; - i = this._getNextSubstringLength(options.searchResolution, i, source) - ) { - source = text2.substring(0, i); - if (used.has(source)) { break; } - used.add(source); - const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); - for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) { - /** @type {import('dictionary').InflectionRuleChainCandidate} */ - const inflectionRuleChainCandidate = { - source: 'algorithm', - inflectionRules: trace.map((frame) => frame.transform) - }; - deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate])); - } + if (!level2.has(setting)) { + text = process(text, setting); + level2.set(setting, text); + } else { + text = level2.get(setting); } } - return deinflections; + + return text; } /** @@ -507,13 +545,12 @@ export class Translator { /** * @param {string} text - * @param {TextSourceMap} sourceMap * @param {import('translation').FindTermsTextReplacement[]} replacements * @returns {string} */ - _applyTextReplacements(text, sourceMap, replacements) { + _applyTextReplacements(text, replacements) { for (const {pattern, replacement} of replacements) { - text = applyTextReplacement(text, sourceMap, pattern, replacement); + text = applyTextReplacement(text, pattern, replacement); } return text; } @@ -1325,10 +1362,11 @@ export class Translator { /** * @param {Map<string, unknown[]>} arrayVariants - * @yields {Map<string, unknown>} - * @returns {Generator<Map<string, unknown>, void, void>} + * @returns {Map<string, unknown>[]} */ - *_generateArrayVariants(arrayVariants) { + _getArrayVariants(arrayVariants) { + /** @type {Map<string, unknown>[]} */ + const results = []; const variantKeys = [...arrayVariants.keys()]; const entryVariantLengths = []; for (const key of variantKeys) { @@ -1350,8 +1388,9 @@ export class Translator { remainingIndex = Math.floor(remainingIndex / entryVariants.length); } - yield variant; + results.push(variant); } + return results; } /** |