summaryrefslogtreecommitdiff
path: root/ext/js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/js')
-rw-r--r--ext/js/general/regex-util.js12
-rw-r--r--ext/js/general/text-source-map.js153
-rw-r--r--ext/js/language/ar/arabic-text-preprocessors.js6
-rw-r--r--ext/js/language/ja/japanese-text-preprocessors.js24
-rw-r--r--ext/js/language/ja/japanese-wanakana.js52
-rw-r--r--ext/js/language/ja/japanese.js15
-rw-r--r--ext/js/language/language-descriptors.js11
-rwxr-xr-xext/js/language/languages.js20
-rw-r--r--ext/js/language/ru/russian-text-preprocessors.js10
-rwxr-xr-xext/js/language/text-processors.js (renamed from ext/js/language/text-preprocessors.js)16
-rw-r--r--ext/js/language/translator.js147
11 files changed, 147 insertions, 319 deletions
diff --git a/ext/js/general/regex-util.js b/ext/js/general/regex-util.js
index e0982154..c633ec06 100644
--- a/ext/js/general/regex-util.js
+++ b/ext/js/general/regex-util.js
@@ -23,13 +23,12 @@ const matchReplacementPattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g;
* Applies string.replace using a regular expression and replacement string as arguments.
* A source map of the changes is also maintained.
* @param {string} text A string of the text to replace.
- * @param {import('./text-source-map.js').TextSourceMap} sourceMap An instance of `TextSourceMap` which corresponds to `text`.
* @param {RegExp} pattern A regular expression to use as the replacement.
* @param {string} replacement A replacement string that follows the format of the standard
* JavaScript regular expression replacement string.
* @returns {string} A new string with the pattern replacements applied and the source map updated.
*/
-export function applyTextReplacement(text, sourceMap, pattern, replacement) {
+export function applyTextReplacement(text, pattern, replacement) {
const isGlobal = pattern.global;
if (isGlobal) { pattern.lastIndex = 0; }
for (let loop = true; loop; loop = isGlobal) {
@@ -44,15 +43,6 @@ export function applyTextReplacement(text, sourceMap, pattern, replacement) {
text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`;
pattern.lastIndex += delta;
-
- if (actualReplacementLength > 0) {
- /** @type {number[]} */
- const zeroes = new Array(actualReplacementLength).fill(0);
- sourceMap.insert(index, ...zeroes);
- sourceMap.combine(index - 1 + actualReplacementLength, matchText.length);
- } else {
- sourceMap.combine(index, matchText.length);
- }
}
return text;
}
diff --git a/ext/js/general/text-source-map.js b/ext/js/general/text-source-map.js
deleted file mode 100644
index 527c232b..00000000
--- a/ext/js/general/text-source-map.js
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2023-2024 Yomitan Authors
- * Copyright (C) 2020-2022 Yomichan Authors
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-export class TextSourceMap {
- /**
- * @param {string} source
- * @param {number[]|null} [mapping=null]
- */
- constructor(source, mapping = null) {
- /** @type {string} */
- this._source = source;
- /** @type {?number[]} */
- this._mapping = (mapping !== null ? TextSourceMap.normalizeMapping(mapping) : null);
- }
-
- /** @type {string} */
- get source() {
- return this._source;
- }
-
- /**
- * @param {unknown} other
- * @returns {boolean}
- */
- equals(other) {
- if (this === other) {
- return true;
- }
-
- const source = this._source;
- if (!(other instanceof TextSourceMap && source === other.source)) {
- return false;
- }
-
- let mapping = this._mapping;
- let otherMapping = other.getMappingCopy();
- if (mapping === null) {
- if (otherMapping === null) {
- return true;
- }
- mapping = TextSourceMap.createMapping(source);
- } else if (otherMapping === null) {
- otherMapping = TextSourceMap.createMapping(source);
- }
-
- const mappingLength = mapping.length;
- if (mappingLength !== otherMapping.length) {
- return false;
- }
-
- for (let i = 0; i < mappingLength; ++i) {
- if (mapping[i] !== otherMapping[i]) {
- return false;
- }
- }
-
- return true;
- }
-
- /**
- * @param {number} finalLength
- * @returns {number}
- */
- getSourceLength(finalLength) {
- const mapping = this._mapping;
- if (mapping === null) {
- return finalLength;
- }
-
- let sourceLength = 0;
- for (let i = 0; i < finalLength; ++i) {
- sourceLength += mapping[i];
- }
- return sourceLength;
- }
-
- /**
- * @param {number} index
- * @param {number} count
- */
- combine(index, count) {
- if (count <= 0) { return; }
-
- if (this._mapping === null) {
- this._mapping = TextSourceMap.createMapping(this._source);
- }
-
- let sum = this._mapping[index];
- const parts = this._mapping.splice(index + 1, count);
- for (const part of parts) {
- sum += part;
- }
- this._mapping[index] = sum;
- }
-
- /**
- * @param {number} index
- * @param {number[]} items
- */
- insert(index, ...items) {
- if (this._mapping === null) {
- this._mapping = TextSourceMap.createMapping(this._source);
- }
-
- this._mapping.splice(index, 0, ...items);
- }
-
- /**
- * @returns {?number[]}
- */
- getMappingCopy() {
- return this._mapping !== null ? [...this._mapping] : null;
- }
-
- /**
- * @param {string} text
- * @returns {number[]}
- */
- static createMapping(text) {
- return new Array(text.length).fill(1);
- }
-
- /**
- * @param {number[]} mapping
- * @returns {number[]}
- */
- static normalizeMapping(mapping) {
- const result = [];
- for (const value of mapping) {
- result.push(
- (typeof value === 'number' && Number.isFinite(value)) ?
- Math.floor(value) :
- 0
- );
- }
- return result;
- }
-}
diff --git a/ext/js/language/ar/arabic-text-preprocessors.js b/ext/js/language/ar/arabic-text-preprocessors.js
index 6007d770..91535ccd 100644
--- a/ext/js/language/ar/arabic-text-preprocessors.js
+++ b/ext/js/language/ar/arabic-text-preprocessors.js
@@ -15,7 +15,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+import {basicTextProcessorOptions} from '../text-processors.js';
const optionalDiacritics = [
'\u0618', // Small Fatha
@@ -38,11 +38,11 @@ const optionalDiacritics = [
const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g');
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const removeArabicScriptDiacritics = {
name: 'Remove diacritics',
description: 'وَلَدَ ⬅️ ولد',
- options: basicTextPreprocessorOptions,
+ options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace(diacriticsRegex, '') : text;
}
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js
index 06f944c1..b3d50817 100644
--- a/ext/js/language/ja/japanese-text-preprocessors.js
+++ b/ext/js/language/ja/japanese-text-preprocessors.js
@@ -15,7 +15,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+import {basicTextProcessorOptions} from '../text-processors.js';
import {convertAlphabeticToKana} from './japanese-wanakana.js';
import {
collapseEmphaticSequences as collapseEmphaticSequencesFunction,
@@ -25,28 +25,28 @@ import {
convertNumericToFullWidth
} from './japanese.js';
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const convertHalfWidthCharacters = {
name: 'Convert half width characters to full width',
description: 'ヨミチャン → ヨミチャン',
- options: basicTextPreprocessorOptions,
- process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str)
+ options: basicTextProcessorOptions,
+ process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str)
};
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const convertNumericCharacters = {
name: 'Convert numeric characters to full width',
description: '1234 → 1234',
- options: basicTextPreprocessorOptions,
+ options: basicTextProcessorOptions,
process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)
};
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const convertAlphabeticCharacters = {
name: 'Convert alphabetic characters to hiragana',
description: 'yomichan → よみちゃん',
- options: basicTextPreprocessorOptions,
- process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str)
+ options: basicTextProcessorOptions,
+ process: (str, setting) => (setting ? convertAlphabeticToKana(str) : str)
};
/** @type {import('language').BidirectionalConversionPreprocessor} */
@@ -66,15 +66,15 @@ export const convertHiraganaToKatakana = {
}
};
-/** @type {import('language').TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
+/** @type {import('language').TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
export const collapseEmphaticSequences = {
name: 'Collapse emphatic character sequences',
description: 'すっっごーーい → すっごーい / すごい',
options: [[false, false], [true, false], [true, true]],
- process: (str, setting, sourceMap) => {
+ process: (str, setting) => {
const [collapseEmphatic, collapseEmphaticFull] = setting;
if (collapseEmphatic) {
- str = collapseEmphaticSequencesFunction(str, collapseEmphaticFull, sourceMap);
+ str = collapseEmphaticSequencesFunction(str, collapseEmphaticFull);
}
return str;
}
diff --git a/ext/js/language/ja/japanese-wanakana.js b/ext/js/language/ja/japanese-wanakana.js
index 32260489..a87db6b7 100644
--- a/ext/js/language/ja/japanese-wanakana.js
+++ b/ext/js/language/ja/japanese-wanakana.js
@@ -19,51 +19,10 @@ import * as wanakana from '../../../lib/wanakana.js';
/**
* @param {string} text
- * @param {?import('../../general/text-source-map.js').TextSourceMap} sourceMap
- * @param {number} sourceMapStart
* @returns {string}
*/
-function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
- const result = wanakana.toHiragana(text);
-
- // Generate source mapping
- if (sourceMap !== null) {
- let i = 0;
- let resultPos = 0;
- const ii = text.length;
- while (i < ii) {
- // Find smallest matching substring
- let iNext = i + 1;
- let resultPosNext = result.length;
- while (iNext < ii) {
- const t = wanakana.toHiragana(text.substring(0, iNext));
- if (t === result.substring(0, t.length)) {
- resultPosNext = t.length;
- break;
- }
- ++iNext;
- }
-
- // Merge characters
- const removals = iNext - i - 1;
- if (removals > 0) {
- sourceMap.combine(sourceMapStart, removals);
- }
- ++sourceMapStart;
-
- // Empty elements
- const additions = resultPosNext - resultPos - 1;
- for (let j = 0; j < additions; ++j) {
- sourceMap.insert(sourceMapStart, 0);
- ++sourceMapStart;
- }
-
- i = iNext;
- resultPos = resultPosNext;
- }
- }
-
- return result;
+function convertAlphabeticPartToKana(text) {
+ return wanakana.toHiragana(text);
}
/**
@@ -84,10 +43,9 @@ export function convertToRomaji(text) {
/**
* @param {string} text
- * @param {?import('../../general/text-source-map.js').TextSourceMap} sourceMap
* @returns {string}
*/
-export function convertAlphabeticToKana(text, sourceMap = null) {
+export function convertAlphabeticToKana(text) {
let part = '';
let result = '';
@@ -106,7 +64,7 @@ export function convertAlphabeticToKana(text, sourceMap = null) {
c = 0x2d; // '-'
} else {
if (part.length > 0) {
- result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+ result += convertAlphabeticPartToKana(part);
part = '';
}
result += char;
@@ -116,7 +74,7 @@ export function convertAlphabeticToKana(text, sourceMap = null) {
}
if (part.length > 0) {
- result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+ result += convertAlphabeticPartToKana(part);
}
return result;
}
diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js
index 2c9a1f7f..3507e5df 100644
--- a/ext/js/language/ja/japanese.js
+++ b/ext/js/language/ja/japanese.js
@@ -539,10 +539,9 @@ export function convertNumericToFullWidth(text) {
/**
* @param {string} text
- * @param {?import('../../general/text-source-map.js').TextSourceMap} [sourceMap]
* @returns {string}
*/
-export function convertHalfWidthKanaToFullWidth(text, sourceMap = null) {
+export function convertHalfWidthKanaToFullWidth(text) {
let result = '';
// This function is safe to use charCodeAt instead of codePointAt, since all
@@ -575,9 +574,6 @@ export function convertHalfWidthKanaToFullWidth(text, sourceMap = null) {
}
}
- if (sourceMap !== null && index > 0) {
- sourceMap.combine(result.length, 1);
- }
result += c2;
}
@@ -705,13 +701,11 @@ export function distributeFuriganaInflected(term, reading, source) {
/**
* @param {string} text
* @param {boolean} fullCollapse
- * @param {?import('../../general/text-source-map.js').TextSourceMap} [sourceMap]
* @returns {string}
*/
-export function collapseEmphaticSequences(text, fullCollapse, sourceMap = null) {
+export function collapseEmphaticSequences(text, fullCollapse) {
let result = '';
let collapseCodePoint = -1;
- const hasSourceMap = (sourceMap !== null);
for (const char of text) {
const c = char.codePointAt(0);
if (
@@ -729,11 +723,6 @@ export function collapseEmphaticSequences(text, fullCollapse, sourceMap = null)
} else {
collapseCodePoint = -1;
result += char;
- continue;
- }
-
- if (hasSourceMap) {
- sourceMap.combine(Math.max(0, result.length - 1), 1);
}
}
return result;
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index c5c3e01e..2df2f794 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -25,7 +25,7 @@ import {isStringPartiallyJapanese} from './ja/japanese.js';
import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
import {oldIrishTransforms} from './sga/old-irish-transforms.js';
import {albanianTransforms} from './sq/albanian-transforms.js';
-import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-preprocessors.js';
+import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js';
const capitalizationPreprocessors = {
decapitalize,
@@ -138,8 +138,7 @@ const languageDescriptors = [
{
iso: 'km',
name: 'Khmer',
- exampleText: 'អាន',
- textPreprocessors: {}
+ exampleText: 'អាន'
},
{
iso: 'pl',
@@ -201,8 +200,7 @@ const languageDescriptors = [
{
iso: 'th',
name: 'Thai',
- exampleText: 'อ่าน',
- textPreprocessors: {}
+ exampleText: 'อ่าน'
},
{
iso: 'tr',
@@ -219,8 +217,7 @@ const languageDescriptors = [
{
iso: 'zh',
name: 'Chinese',
- exampleText: '读',
- textPreprocessors: {}
+ exampleText: '读'
}
];
diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js
index 4b196c2c..b3890aa6 100755
--- a/ext/js/language/languages.js
+++ b/ext/js/language/languages.js
@@ -29,21 +29,29 @@ export function getLanguageSummaries() {
}
/**
- * @returns {import('language').LanguageAndPreprocessors[]}
+ * @returns {import('language').LanguageAndProcessors[]}
* @throws {Error}
*/
-export function getAllLanguageTextPreprocessors() {
+export function getAllLanguageTextProcessors() {
const results = [];
- for (const {iso, textPreprocessors} of languageDescriptorMap.values()) {
- /** @type {import('language').TextPreprocessorWithId<unknown>[]} */
+ for (const {iso, textPreprocessors = {}, textPostprocessors = {}} of languageDescriptorMap.values()) {
+ /** @type {import('language').TextProcessorWithId<unknown>[]} */
const textPreprocessorsArray = [];
for (const [id, textPreprocessor] of Object.entries(textPreprocessors)) {
textPreprocessorsArray.push({
id,
- textPreprocessor: /** @type {import('language').TextPreprocessor<unknown>} */ (textPreprocessor)
+ textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPreprocessor)
});
}
- results.push({iso, textPreprocessors: textPreprocessorsArray});
+ /** @type {import('language').TextProcessorWithId<unknown>[]} */
+ const textPostprocessorsArray = [];
+ for (const [id, textPostprocessor] of Object.entries(textPostprocessors)) {
+ textPostprocessorsArray.push({
+ id,
+ textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPostprocessor)
+ });
+ }
+ results.push({iso, textPreprocessors: textPreprocessorsArray, textPostprocessors: textPostprocessorsArray});
}
return results;
}
diff --git a/ext/js/language/ru/russian-text-preprocessors.js b/ext/js/language/ru/russian-text-preprocessors.js
index fc4472e9..fbda38c7 100644
--- a/ext/js/language/ru/russian-text-preprocessors.js
+++ b/ext/js/language/ru/russian-text-preprocessors.js
@@ -15,23 +15,23 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+import {basicTextProcessorOptions} from '../text-processors.js';
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const removeRussianDiacritics = {
name: 'Remove diacritics',
description: 'A\u0301 → A, a\u0301 → a',
- options: basicTextPreprocessorOptions,
+ options: basicTextProcessorOptions,
process: (str, setting) => {
return setting ? str.replace(/\u0301/g, '') : str;
}
};
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const yoToE = {
name: 'Yo to E',
description: 'ё → е, Ё → Е',
- options: basicTextPreprocessorOptions,
+ options: basicTextProcessorOptions,
process: (str, setting) => {
return setting ? str.replace(/ё/g, 'е').replace(/Ё/g, 'Е') : str;
}
diff --git a/ext/js/language/text-preprocessors.js b/ext/js/language/text-processors.js
index e33fccda..e7855df2 100755
--- a/ext/js/language/text-preprocessors.js
+++ b/ext/js/language/text-processors.js
@@ -15,22 +15,22 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-/** @type {import('language').TextPreprocessorOptions<boolean>} */
-export const basicTextPreprocessorOptions = [false, true];
+/** @type {import('language').TextProcessorOptions<boolean>} */
+export const basicTextProcessorOptions = [false, true];
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const decapitalize = {
name: 'Decapitalize text',
description: 'CAPITALIZED TEXT → capitalized text',
- options: basicTextPreprocessorOptions,
+ options: basicTextProcessorOptions,
process: (str, setting) => (setting ? str.toLowerCase() : str)
};
-/** @type {import('language').TextPreprocessor<boolean>} */
+/** @type {import('language').TextProcessor<boolean>} */
export const capitalizeFirstLetter = {
name: 'Capitalize first letter',
description: 'lowercase text → Lowercase text',
- options: basicTextPreprocessorOptions,
+ options: basicTextProcessorOptions,
process: (str, setting) => (setting ? str.charAt(0).toUpperCase() + str.slice(1) : str)
};
@@ -39,11 +39,11 @@ export const capitalizeFirstLetter = {
* as it can result in undesirable normalization:
* - '\u9038'.normalize('NFD') => '\u9038' (逸)
* - '\ufa67'.normalize('NFD') => '\u9038' (逸 => 逸)
- * @type {import('language').TextPreprocessor<boolean>}
+ * @type {import('language').TextProcessor<boolean>}
*/
export const removeAlphabeticDiacritics = {
name: 'Remove Alphabetic Diacritics',
description: 'ἄήé -> αηe',
- options: basicTextPreprocessorOptions,
+ options: basicTextProcessorOptions,
process: (str, setting) => (setting ? str.normalize('NFD').replace(/[\u0300-\u036f]/g, '') : str)
};
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index 6132ee82..845d53d5 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -17,10 +17,9 @@
*/
import {applyTextReplacement} from '../general/regex-util.js';
-import {TextSourceMap} from '../general/text-source-map.js';
import {isCodePointJapanese} from './ja/japanese.js';
import {LanguageTransformer} from './language-transformer.js';
-import {getAllLanguageTextPreprocessors} from './languages.js';
+import {getAllLanguageTextProcessors} from './languages.js';
import {MultiLanguageTransformer} from './multi-language-transformer.js';
/**
@@ -41,8 +40,8 @@ export class Translator {
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
/** @type {RegExp} */
this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/;
- /** @type {Map<string, {textPreprocessors: import('language').TextPreprocessorWithId<unknown>[], optionSpace: import('translation-internal').PreprocessorOptionsSpace}>} */
- this._textPreprocessors = new Map();
+ /** @type {import('translation-internal').TextProcessorMap} */
+ this._textProcessors = new Map();
}
/**
@@ -50,13 +49,19 @@ export class Translator {
*/
prepare() {
this._multiLanguageTransformer.prepare();
- for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) {
- /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
- const optionSpace = new Map();
- for (const {id, textPreprocessor} of textPreprocessors) {
- optionSpace.set(id, textPreprocessor.options);
+ for (const {iso, textPreprocessors = [], textPostprocessors = []} of getAllLanguageTextProcessors()) {
+ /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */
+ const preprocessorOptionsSpace = new Map();
+ /** @type {import('translation-internal').TextProcessorOptionsSpace}>} */
+ const postprocessorOptionsSpace = new Map();
+
+ for (const {id, textProcessor} of textPreprocessors) {
+ preprocessorOptionsSpace.set(id, textProcessor.options);
}
- this._textPreprocessors.set(iso, {textPreprocessors, optionSpace});
+ for (const {id, textProcessor} of textPostprocessors) {
+ postprocessorOptionsSpace.set(id, textProcessor.options);
+ }
+ this._textProcessors.set(iso, {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace});
}
}
@@ -428,7 +433,7 @@ export class Translator {
}
}
- // Deinflections and text preprocessing
+ // Deinflections and text processing
/**
* @param {string} text
@@ -438,57 +443,90 @@ export class Translator {
*/
_getAlgorithmDeinflections(text, options) {
const {language} = options;
- const info = this._textPreprocessors.get(language);
+ const info = this._textProcessors.get(language);
if (typeof info === 'undefined') { throw new Error(`Unsupported language: ${language}`); }
- const {textPreprocessors, optionSpace: textPreprocessorOptionsSpace} = info;
+ const {textPreprocessors, preprocessorOptionsSpace, textPostprocessors, postprocessorOptionsSpace} = info;
- /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
- const variantSpace = new Map();
- variantSpace.set('textReplacements', this._getTextReplacementsVariants(options));
- for (const [key, value] of textPreprocessorOptionsSpace) {
- variantSpace.set(key, value);
- }
+ const preprocessorVariantSpace = new Map(preprocessorOptionsSpace);
+ preprocessorVariantSpace.set('textReplacements', this._getTextReplacementsVariants(options));
+ const preprocessorVariants = this._getArrayVariants(preprocessorVariantSpace);
+ const postprocessorVariants = this._getArrayVariants(postprocessorOptionsSpace);
/** @type {import('translation-internal').DatabaseDeinflection[]} */
const deinflections = [];
const used = new Set();
+ /** @type {Map<string, import('core').SafeAny>} */
+ const sourceCache = new Map(); // For reusing text processors' outputs
+
+ for (
+ let i = text.length;
+ i > 0;
+ i = this._getNextSubstringLength(options.searchResolution, i, text)
+ ) {
+ const rawSource = text.substring(0, i);
+
+ for (const preprocessorVariant of preprocessorVariants) {
+ let source = rawSource;
+
+ const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (preprocessorVariant.get('textReplacements'));
+ if (textReplacements !== null) {
+ source = this._applyTextReplacements(source, textReplacements);
+ }
- for (const arrayVariant of this._generateArrayVariants(variantSpace)) {
- const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (arrayVariant.get('textReplacements'));
+ source = this._applyTextProcessors(textPreprocessors, preprocessorVariant, source, sourceCache);
- let text2 = text;
- const sourceMap = new TextSourceMap(text2);
+ if (used.has(source)) { continue; }
+ used.add(source);
+ for (const deinflection of this._multiLanguageTransformer.transform(language, source)) {
+ const {trace, conditions} = deinflection;
+ for (const postprocessorVariant of postprocessorVariants) {
+ let {text: transformedText} = deinflection;
+ transformedText = this._applyTextProcessors(textPostprocessors, postprocessorVariant, transformedText, sourceCache);
+
+ /** @type {import('dictionary').InflectionRuleChainCandidate} */
+ const inflectionRuleChainCandidate = {
+ source: 'algorithm',
+ inflectionRules: trace.map((frame) => frame.transform)
+ };
+ deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate]));
+ }
+ }
+ }
+ }
+ return deinflections;
+ }
- if (textReplacements !== null) {
- text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
+ /**
+ * @param {import('language').TextProcessorWithId<unknown>[]} textProcessors
+ * @param {Map<string, unknown>} processorVariant
+ * @param {string} text
+ * @param {Map<string, import('core').SafeAny>} textCache
+ * @returns {string}
+ */
+ _applyTextProcessors(textProcessors, processorVariant, text, textCache) {
+ for (const {id, textProcessor: {process}} of textProcessors) {
+ const setting = processorVariant.get(id);
+ let level1 = textCache.get(text);
+ if (!level1) {
+ level1 = new Map();
+ textCache.set(text, level1);
}
- for (const preprocessor of textPreprocessors.values()) {
- const {id, textPreprocessor} = preprocessor;
- const setting = arrayVariant.get(id);
- text2 = textPreprocessor.process(text2, setting, sourceMap);
+ let level2 = level1.get(id);
+ if (!level2) {
+ level2 = new Map();
+ level1.set(id, level2);
}
- for (
- let source = text2, i = text2.length;
- i > 0;
- i = this._getNextSubstringLength(options.searchResolution, i, source)
- ) {
- source = text2.substring(0, i);
- if (used.has(source)) { break; }
- used.add(source);
- const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
- for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) {
- /** @type {import('dictionary').InflectionRuleChainCandidate} */
- const inflectionRuleChainCandidate = {
- source: 'algorithm',
- inflectionRules: trace.map((frame) => frame.transform)
- };
- deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate]));
- }
+ if (!level2.has(setting)) {
+ text = process(text, setting);
+ level2.set(setting, text);
+ } else {
+ text = level2.get(setting);
}
}
- return deinflections;
+
+ return text;
}
/**
@@ -507,13 +545,12 @@ export class Translator {
/**
* @param {string} text
- * @param {TextSourceMap} sourceMap
* @param {import('translation').FindTermsTextReplacement[]} replacements
* @returns {string}
*/
- _applyTextReplacements(text, sourceMap, replacements) {
+ _applyTextReplacements(text, replacements) {
for (const {pattern, replacement} of replacements) {
- text = applyTextReplacement(text, sourceMap, pattern, replacement);
+ text = applyTextReplacement(text, pattern, replacement);
}
return text;
}
@@ -1325,10 +1362,11 @@ export class Translator {
/**
* @param {Map<string, unknown[]>} arrayVariants
- * @yields {Map<string, unknown>}
- * @returns {Generator<Map<string, unknown>, void, void>}
+ * @returns {Map<string, unknown>[]}
*/
- *_generateArrayVariants(arrayVariants) {
+ _getArrayVariants(arrayVariants) {
+ /** @type {Map<string, unknown>[]} */
+ const results = [];
const variantKeys = [...arrayVariants.keys()];
const entryVariantLengths = [];
for (const key of variantKeys) {
@@ -1350,8 +1388,9 @@ export class Translator {
remainingIndex = Math.floor(remainingIndex / entryVariants.length);
}
- yield variant;
+ results.push(variant);
}
+ return results;
}
/**