diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-01-03 12:12:55 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-03 12:12:55 -0500 |
commit | 8e304b83c685dde17a00d402877a21303b7c11f2 (patch) | |
tree | 0b2123575502c3e3cb5127582b03e9c196c9891d /ext/bg/js/translator.js | |
parent | eda8534e195d653ee0dea36f70caed0d8d49acf1 (diff) |
Translator regex replacements (#1199)
* Add support for regex replacements during the translation process
* Allow assignment of textReplacements
* Rename
* Set up test data
* Write expected data
* Set up options
* Prevent infinite loop if regex matches empty string
* Implement setting controller
* Add support for testing pattern replacements
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r-- | ext/bg/js/translator.js | 77 |
1 files changed, 76 insertions, 1 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index c23649e1..8cc520a8 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -68,6 +68,13 @@ class Translator { * convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'), * convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'), * collapseEmphaticSequences: (enum: 'false', 'true', 'full'), + * textReplacements: [ + * (null or [ + * {pattern: (RegExp), replacement: (string)} + * ... + * ]) + * ... + * ], * enabledDictionaryMap: (Map of [ * (string), * { @@ -302,6 +309,7 @@ class Translator { _getAllDeinflections(text, options) { const textOptionVariantArray = [ + this._getTextReplacementsVariants(options), this._getTextOptionEntryVariants(options.convertHalfWidthCharacters), this._getTextOptionEntryVariants(options.convertNumericCharacters), this._getTextOptionEntryVariants(options.convertAlphabeticCharacters), @@ -313,9 +321,12 @@ class Translator { const jp = this._japaneseUtil; const deinflections = []; const used = new Set(); - for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) { + for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) { let text2 = text; const sourceMap = new TextSourceMap(text2); + if (textReplacements !== null) { + text2 = this._applyTextReplacements(text2, sourceMap, textReplacements); + } if (halfWidth) { text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap); } @@ -879,6 +890,10 @@ class Translator { return collapseEmphaticOptions; } + _getTextReplacementsVariants(options) { + return options.textReplacements; + } + _getSecondarySearchDictionaryMap(enabledDictionaryMap) { const secondarySearchDictionaryMap = new Map(); for (const [dictionary, details] of enabledDictionaryMap.entries()) { @@ -1304,4 +1319,64 @@ class Translator { return stringComparer.compare(v1.notes, v2.notes); }); } + + // Regex functions + + _applyTextReplacements(text, sourceMap, replacements) { + for (const {pattern, replacement} of replacements) { + text = this._applyTextReplacement(text, sourceMap, pattern, replacement); + } + return text; + } + + _applyTextReplacement(text, sourceMap, pattern, replacement) { + const isGlobal = pattern.global; + if (isGlobal) { pattern.lastIndex = 0; } + for (let loop = true; loop; loop = isGlobal) { + const match = pattern.exec(text); + if (match === null) { break; } + + const matchText = match[0]; + const index = match.index; + const actualReplacement = this._applyMatchReplacement(replacement, match); + const actualReplacementLength = actualReplacement.length; + const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1); + + text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`; + pattern.lastIndex += delta; + + if (actualReplacementLength > 0) { + sourceMap.combine(Math.max(0, index - 1), matchText.length); + sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0))); + } else { + sourceMap.combine(index, matchText.length); + } + } + return text; + } + + _applyMatchReplacement(replacement, match) { + const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g; + return replacement.replace(pattern, (g0, g1, g2) => { + if (typeof g1 !== 'undefined') { + const matchIndex = Number.parseInt(g1, 10); + if (matchIndex >= 1 && matchIndex <= match.length) { + return match[matchIndex]; + } + } else if (typeof g2 !== 'undefined') { + const {groups} = match; + if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) { + return groups[g2]; + } + } else { + switch (g0) { + case '$': return '$'; + case '&': return match[0]; + case '`': return replacement.substring(0, match.index); + case '\'': return replacement.substring(match.index + g0.length); + } + } + return g0; + }); + } } |