summaryrefslogtreecommitdiff
path: root/ext/bg/js/translator.js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-01-03 12:12:55 -0500
committerGitHub <noreply@github.com>2021-01-03 12:12:55 -0500
commit8e304b83c685dde17a00d402877a21303b7c11f2 (patch)
tree0b2123575502c3e3cb5127582b03e9c196c9891d /ext/bg/js/translator.js
parenteda8534e195d653ee0dea36f70caed0d8d49acf1 (diff)
Translator regex replacements (#1199)
* Add support for regex replacements during the translation process * Allow assignment of textReplacements * Rename * Set up test data * Write expected data * Set up options * Prevent infinite loop if regex matches empty string * Implement setting controller * Add support for testing pattern replacements
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r--ext/bg/js/translator.js77
1 files changed, 76 insertions, 1 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index c23649e1..8cc520a8 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -68,6 +68,13 @@ class Translator {
* convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'),
* convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'),
* collapseEmphaticSequences: (enum: 'false', 'true', 'full'),
+ * textReplacements: [
+ * (null or [
+ * {pattern: (RegExp), replacement: (string)}
+ * ...
+ * ])
+ * ...
+ * ],
* enabledDictionaryMap: (Map of [
* (string),
* {
@@ -302,6 +309,7 @@ class Translator {
_getAllDeinflections(text, options) {
const textOptionVariantArray = [
+ this._getTextReplacementsVariants(options),
this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
this._getTextOptionEntryVariants(options.convertNumericCharacters),
this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
@@ -313,9 +321,12 @@ class Translator {
const jp = this._japaneseUtil;
const deinflections = [];
const used = new Set();
- for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
+ for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
let text2 = text;
const sourceMap = new TextSourceMap(text2);
+ if (textReplacements !== null) {
+ text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
+ }
if (halfWidth) {
text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
}
@@ -879,6 +890,10 @@ class Translator {
return collapseEmphaticOptions;
}
+ _getTextReplacementsVariants(options) {
+ return options.textReplacements;
+ }
+
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
const secondarySearchDictionaryMap = new Map();
for (const [dictionary, details] of enabledDictionaryMap.entries()) {
@@ -1304,4 +1319,64 @@ class Translator {
return stringComparer.compare(v1.notes, v2.notes);
});
}
+
+ // Regex functions
+
+ _applyTextReplacements(text, sourceMap, replacements) {
+ for (const {pattern, replacement} of replacements) {
+ text = this._applyTextReplacement(text, sourceMap, pattern, replacement);
+ }
+ return text;
+ }
+
+ _applyTextReplacement(text, sourceMap, pattern, replacement) {
+ const isGlobal = pattern.global;
+ if (isGlobal) { pattern.lastIndex = 0; }
+ for (let loop = true; loop; loop = isGlobal) {
+ const match = pattern.exec(text);
+ if (match === null) { break; }
+
+ const matchText = match[0];
+ const index = match.index;
+ const actualReplacement = this._applyMatchReplacement(replacement, match);
+ const actualReplacementLength = actualReplacement.length;
+ const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1);
+
+ text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`;
+ pattern.lastIndex += delta;
+
+ if (actualReplacementLength > 0) {
+ sourceMap.combine(Math.max(0, index - 1), matchText.length);
+ sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0)));
+ } else {
+ sourceMap.combine(index, matchText.length);
+ }
+ }
+ return text;
+ }
+
+ _applyMatchReplacement(replacement, match) {
+ const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g;
+ return replacement.replace(pattern, (g0, g1, g2) => {
+ if (typeof g1 !== 'undefined') {
+ const matchIndex = Number.parseInt(g1, 10);
+ if (matchIndex >= 1 && matchIndex <= match.length) {
+ return match[matchIndex];
+ }
+ } else if (typeof g2 !== 'undefined') {
+ const {groups} = match;
+ if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) {
+ return groups[g2];
+ }
+ } else {
+ switch (g0) {
+ case '$': return '$';
+ case '&': return match[0];
+ case '`': return replacement.substring(0, match.index);
+ case '\'': return replacement.substring(match.index + g0.length);
+ }
+ }
+ return g0;
+ });
+ }
}