From 8e304b83c685dde17a00d402877a21303b7c11f2 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 3 Jan 2021 12:12:55 -0500 Subject: Translator regex replacements (#1199) * Add support for regex replacements during the translation process * Allow assignment of textReplacements * Rename * Set up test data * Write expected data * Set up options * Prevent infinite loop if regex matches empty string * Implement setting controller * Add support for testing pattern replacements --- ext/bg/css/settings2.css | 107 +++++++++ ext/bg/data/options-schema.json | 43 +++- ext/bg/js/backend.js | 28 ++- ext/bg/js/options.js | 16 ++ ext/bg/js/settings2/settings-main.js | 4 + .../translation-text-replacements-controller.js | 241 +++++++++++++++++++++ ext/bg/js/translator.js | 77 ++++++- ext/bg/settings2.html | 94 ++++++++ 8 files changed, 607 insertions(+), 3 deletions(-) create mode 100644 ext/bg/js/settings2/translation-text-replacements-controller.js (limited to 'ext/bg') diff --git a/ext/bg/css/settings2.css b/ext/bg/css/settings2.css index 8759e941..9d3d081d 100644 --- a/ext/bg/css/settings2.css +++ b/ext/bg/css/settings2.css @@ -1732,6 +1732,113 @@ code.anki-field-marker { height: calc(0.425em * 4 + 1em * var(--line-height-default) * 3); } +#translation-text-replacement-list-empty { + display: none; +} +#translation-text-replacement-list:empty+#translation-text-replacement-list-empty { + display: block; +} +.translation-text-replacement-entry { + display: grid; + grid-template-columns: auto auto 1fr auto; + grid-template-rows: auto; + grid-template-areas: + "index pattern-label pattern button" + ". replacement-label replacement button" + ". test-label test ."; + column-gap: 0.25em; + row-gap: 0.25em; + align-items: stretch; + justify-items: stretch; +} +.translation-text-replacement-entry+.translation-text-replacement-entry { + margin-top: 0.5em; +} +.translation-text-replacement-index { + grid-area: index; + align-self: center; + justify-self: start; + padding-right: 0.5em; +} +.translation-text-replacement-pattern-label { + grid-area: pattern-label; + align-self: center; + justify-self: start; + padding-right: 0.5em; +} +.translation-text-replacement-replacement-label { + grid-area: replacement-label; + align-self: center; + justify-self: start; + padding-right: 0.5em; +} +.translation-text-replacement-pattern-container { + grid-area: pattern; + align-self: stretch; + justify-self: stretch; + display: flex; + flex-flow: row nowrap; + align-items: stretch; +} +.translation-text-replacement-replacement-container { + grid-area: replacement; + align-self: stretch; + justify-self: stretch; + display: flex; + flex-flow: row nowrap; + align-items: stretch; +} +input.translation-text-replacement-pattern, +input.translation-text-replacement-replacement { + flex: 1 1 auto; + width: auto; +} +.translation-text-replacement-checkbox-container { + white-space: nowrap; + display: flex; + flex-flow: row nowrap; + align-items: center; + padding-left: 0.5em; +} +.translation-text-replacement-checkbox-label { + padding-left: 0.5em; +} +.translation-text-replacement-button { + grid-area: button; + align-self: center; + justify-self: start; +} +.translation-text-replacement-test-label { + grid-area: test-label; + align-self: center; + justify-self: start; + padding-right: 0.5em; +} +.translation-text-replacement-test-container { + grid-area: test; + align-self: stretch; + justify-self: stretch; + display: flex; + flex-flow: row nowrap; + align-items: stretch; +} +input.translation-text-replacement-test-input, +input.translation-text-replacement-test-output { + flex: 1 1 auto; + width: auto; +} +.translation-text-replacement-test-label-inner { + grid-area: button; + align-self: center; + justify-self: start; + flex: 0 0 auto; + padding: 0 0.5em; + white-space: nowrap; +} +.translation-text-replacement-entry:not([data-test-visible=true]) .translation-text-replacement-test-node { + display: none; +} + /* Generic layouts */ .margin-above { diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index a22ae8a1..12c4097d 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -592,7 +592,8 @@ "convertAlphabeticCharacters", "convertHiraganaToKatakana", "convertKatakanaToHiragana", - "collapseEmphaticSequences" + "collapseEmphaticSequences", + "textReplacements" ], "properties": { "convertHalfWidthCharacters": { @@ -624,6 +625,46 @@ "type": "string", "enum": ["false", "true", "full"], "default": "false" + }, + "textReplacements": { + "type": "object", + "required": [ + "searchOriginal", + "groups" + ], + "properties": { + "searchOriginal": { + "type": "boolean", + "default": true + }, + "groups": { + "type": "array", + "items": { + "type": "array", + "items": { + "required": [ + "pattern", + "ignoreCase", + "replacement" + ], + "properties": { + "pattern": { + "type": "string", + "default": "" + }, + "ignoreCase": { + "type": "boolean", + "default": false + }, + "replacement": { + "type": "string", + "default": "" + } + } + } + } + } + } } } }, diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index c7131728..690f6a3c 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -1655,9 +1655,11 @@ class Backend { convertAlphabeticCharacters, convertHiraganaToKatakana, convertKatakanaToHiragana, - collapseEmphaticSequences + collapseEmphaticSequences, + textReplacements: textReplacementsOptions } } = options; + const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions); return { wildcard, mainDictionary, @@ -1668,6 +1670,7 @@ class Backend { convertHiraganaToKatakana, convertKatakanaToHiragana, collapseEmphaticSequences, + textReplacements, enabledDictionaryMap }; } @@ -1686,6 +1689,29 @@ class Backend { return enabledDictionaryMap; } + _getTranslatorTextReplacements(textReplacementsOptions) { + const textReplacements = []; + for (const group of textReplacementsOptions.groups) { + const textReplacementsEntries = []; + for (let {pattern, ignoreCase, replacement} of group) { + try { + pattern = new RegExp(pattern, ignoreCase ? 'gi' : 'g'); + } catch (e) { + // Invalid pattern + continue; + } + textReplacementsEntries.push({pattern, replacement}); + } + if (textReplacementsEntries.length > 0) { + textReplacements.push(textReplacementsEntries); + } + } + if (textReplacements.length === 0 || textReplacementsOptions.searchOriginal) { + textReplacements.unshift(null); + } + return textReplacements; + } + async _openWelcomeGuidePage() { await this._createTab(chrome.runtime.getURL('/bg/welcome.html')); } diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 5c68e77c..16168e38 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -485,6 +485,10 @@ class OptionsUtil { { async: false, update: this._updateVersion7.bind(this) + }, + { + async: false, + update: this._updateVersion8.bind(this) } ]; } @@ -675,4 +679,16 @@ class OptionsUtil { } return options; } + + _updateVersion8(options) { + // Version 8 changes: + // Added translation.textReplacements. + for (const profile of options.profiles) { + profile.options.translation.textReplacements = { + searchOriginal: true, + groups: [] + }; + } + return options; + } } diff --git a/ext/bg/js/settings2/settings-main.js b/ext/bg/js/settings2/settings-main.js index 1a719edd..1b3bfaa0 100644 --- a/ext/bg/js/settings2/settings-main.js +++ b/ext/bg/js/settings2/settings-main.js @@ -36,6 +36,7 @@ * SettingsDisplayController * StatusFooter * StorageController + * TranslationTextReplacementsController * api */ @@ -120,6 +121,9 @@ async function setupGenericSettingsController(genericSettingController) { const secondarySearchDictionaryController = new SecondarySearchDictionaryController(settingsController); secondarySearchDictionaryController.prepare(); + const translationTextReplacementsController = new TranslationTextReplacementsController(settingsController); + translationTextReplacementsController.prepare(); + await Promise.all(preparePromises); document.documentElement.dataset.loaded = 'true'; diff --git a/ext/bg/js/settings2/translation-text-replacements-controller.js b/ext/bg/js/settings2/translation-text-replacements-controller.js new file mode 100644 index 00000000..41ee8e3f --- /dev/null +++ b/ext/bg/js/settings2/translation-text-replacements-controller.js @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2021 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +class TranslationTextReplacementsController { + constructor(settingsController) { + this._settingsController = settingsController; + this._entryContainer = null; + this._entries = []; + } + + async prepare() { + this._entryContainer = document.querySelector('#translation-text-replacement-list'); + const addButton = document.querySelector('#translation-text-replacement-add'); + + addButton.addEventListener('click', this._onAdd.bind(this), false); + this._settingsController.on('optionsChanged', this._onOptionsChanged.bind(this)); + + await this._updateOptions(); + } + + + async addGroup() { + const options = await this._settingsController.getOptions(); + const {groups} = options.translation.textReplacements; + const newEntry = this._createNewEntry(); + const target = ( + (groups.length === 0) ? + { + action: 'splice', + path: 'translation.textReplacements.groups', + start: 0, + deleteCount: 0, + items: [[newEntry]] + } : + { + action: 'splice', + path: 'translation.textReplacements.groups[0]', + start: groups[0].length, + deleteCount: 0, + items: [newEntry] + } + ); + + await this._settingsController.modifyProfileSettings([target]); + await this._updateOptions(); + } + + async deleteGroup(index) { + const options = await this._settingsController.getOptions(); + const {groups} = options.translation.textReplacements; + if (groups.length === 0) { return false; } + + const group0 = groups[0]; + if (index < 0 || index >= group0.length) { return false; } + + const target = ( + (group0.length > 1) ? + { + action: 'splice', + path: 'translation.textReplacements.groups[0]', + start: index, + deleteCount: 1, + items: [] + } : + { + action: 'splice', + path: 'translation.textReplacements.groups', + start: 0, + deleteCount: group0.length, + items: [] + } + ); + + await this._settingsController.modifyProfileSettings([target]); + await this._updateOptions(); + return true; + } + + // Private + + _onOptionsChanged({options}) { + for (const entry of this._entries) { + entry.cleanup(); + } + this._entries = []; + + const {groups} = options.translation.textReplacements; + if (groups.length > 0) { + const group0 = groups[0]; + for (let i = 0, ii = group0.length; i < ii; ++i) { + const data = group0[i]; + const node = this._settingsController.instantiateTemplate('translation-text-replacement-entry'); + this._entryContainer.appendChild(node); + const entry = new TranslationTextReplacementsEntry(this, node, i, data); + this._entries.push(entry); + entry.prepare(); + } + } + } + + _onAdd() { + this.addGroup(); + } + + async _updateOptions() { + const options = await this._settingsController.getOptions(); + this._onOptionsChanged({options}); + } + + _createNewEntry() { + return {pattern: '', ignoreCase: false, replacement: ''}; + } +} + +class TranslationTextReplacementsEntry { + constructor(parent, node, index) { + this._parent = parent; + this._node = node; + this._index = index; + this._eventListeners = new EventListenerCollection(); + this._patternInput = null; + this._replacementInput = null; + this._ignoreCaseToggle = null; + this._testInput = null; + this._testOutput = null; + } + + prepare() { + const patternInput = this._node.querySelector('.translation-text-replacement-pattern'); + const replacementInput = this._node.querySelector('.translation-text-replacement-replacement'); + const ignoreCaseToggle = this._node.querySelector('.translation-text-replacement-pattern-ignore-case'); + const menuButton = this._node.querySelector('.translation-text-replacement-button'); + const testInput = this._node.querySelector('.translation-text-replacement-test-input'); + const testOutput = this._node.querySelector('.translation-text-replacement-test-output'); + + this._patternInput = patternInput; + this._replacementInput = replacementInput; + this._ignoreCaseToggle = ignoreCaseToggle; + this._testInput = testInput; + this._testOutput = testOutput; + + const pathBase = `translation.textReplacements.groups[0][${this._index}]`; + patternInput.dataset.setting = `${pathBase}.pattern`; + replacementInput.dataset.setting = `${pathBase}.replacement`; + ignoreCaseToggle.dataset.setting = `${pathBase}.ignoreCase`; + + this._eventListeners.addEventListener(menuButton, 'menuOpened', this._onMenuOpened.bind(this), false); + this._eventListeners.addEventListener(menuButton, 'menuClosed', this._onMenuClosed.bind(this), false); + this._eventListeners.addEventListener(patternInput, 'settingChanged', this._onPatternChanged.bind(this), false); + this._eventListeners.addEventListener(ignoreCaseToggle, 'settingChanged', this._updateTestInput.bind(this), false); + this._eventListeners.addEventListener(replacementInput, 'settingChanged', this._updateTestInput.bind(this), false); + this._eventListeners.addEventListener(testInput, 'input', this._updateTestInput.bind(this), false); + } + + cleanup() { + this._eventListeners.removeAllEventListeners(); + if (this._node.parentNode !== null) { + this._node.parentNode.removeChild(this._node); + } + } + + // Private + + _onMenuOpened({detail: {menu}}) { + const testVisible = this._isTestVisible(); + menu.querySelector('[data-menu-action=showTest]').hidden = testVisible; + menu.querySelector('[data-menu-action=hideTest]').hidden = !testVisible; + } + + _onMenuClosed({detail: {action}}) { + switch (action) { + case 'remove': + this._parent.deleteGroup(this._index); + break; + case 'showTest': + this._setTestVisible(true); + break; + case 'hideTest': + this._setTestVisible(false); + break; + } + } + + _onPatternChanged({detail: {value}}) { + this._validatePattern(value); + this._updateTestInput(); + } + + _validatePattern(value) { + let okay = false; + try { + new RegExp(value, 'g'); + okay = true; + } catch (e) { + // NOP + } + + this._patternInput.dataset.invalid = `${!okay}`; + } + + _isTestVisible() { + return this._node.dataset.testVisible === 'true'; + } + + _setTestVisible(visible) { + this._node.dataset.testVisible = `${visible}`; + this._updateTestInput(); + } + + _updateTestInput() { + if (!this._isTestVisible()) { return; } + + const ignoreCase = this._ignoreCaseToggle.checked; + const pattern = this._patternInput.value; + let regex; + try { + regex = new RegExp(pattern, ignoreCase ? 'gi' : 'g'); + } catch (e) { + return; + } + + const replacement = this._replacementInput.value; + const input = this._testInput.value; + const output = input.replace(regex, replacement); + this._testOutput.value = output; + } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index c23649e1..8cc520a8 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -68,6 +68,13 @@ class Translator { * convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'), * convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'), * collapseEmphaticSequences: (enum: 'false', 'true', 'full'), + * textReplacements: [ + * (null or [ + * {pattern: (RegExp), replacement: (string)} + * ... + * ]) + * ... + * ], * enabledDictionaryMap: (Map of [ * (string), * { @@ -302,6 +309,7 @@ class Translator { _getAllDeinflections(text, options) { const textOptionVariantArray = [ + this._getTextReplacementsVariants(options), this._getTextOptionEntryVariants(options.convertHalfWidthCharacters), this._getTextOptionEntryVariants(options.convertNumericCharacters), this._getTextOptionEntryVariants(options.convertAlphabeticCharacters), @@ -313,9 +321,12 @@ class Translator { const jp = this._japaneseUtil; const deinflections = []; const used = new Set(); - for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) { + for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) { let text2 = text; const sourceMap = new TextSourceMap(text2); + if (textReplacements !== null) { + text2 = this._applyTextReplacements(text2, sourceMap, textReplacements); + } if (halfWidth) { text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap); } @@ -879,6 +890,10 @@ class Translator { return collapseEmphaticOptions; } + _getTextReplacementsVariants(options) { + return options.textReplacements; + } + _getSecondarySearchDictionaryMap(enabledDictionaryMap) { const secondarySearchDictionaryMap = new Map(); for (const [dictionary, details] of enabledDictionaryMap.entries()) { @@ -1304,4 +1319,64 @@ class Translator { return stringComparer.compare(v1.notes, v2.notes); }); } + + // Regex functions + + _applyTextReplacements(text, sourceMap, replacements) { + for (const {pattern, replacement} of replacements) { + text = this._applyTextReplacement(text, sourceMap, pattern, replacement); + } + return text; + } + + _applyTextReplacement(text, sourceMap, pattern, replacement) { + const isGlobal = pattern.global; + if (isGlobal) { pattern.lastIndex = 0; } + for (let loop = true; loop; loop = isGlobal) { + const match = pattern.exec(text); + if (match === null) { break; } + + const matchText = match[0]; + const index = match.index; + const actualReplacement = this._applyMatchReplacement(replacement, match); + const actualReplacementLength = actualReplacement.length; + const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1); + + text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`; + pattern.lastIndex += delta; + + if (actualReplacementLength > 0) { + sourceMap.combine(Math.max(0, index - 1), matchText.length); + sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0))); + } else { + sourceMap.combine(index, matchText.length); + } + } + return text; + } + + _applyMatchReplacement(replacement, match) { + const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g; + return replacement.replace(pattern, (g0, g1, g2) => { + if (typeof g1 !== 'undefined') { + const matchIndex = Number.parseInt(g1, 10); + if (matchIndex >= 1 && matchIndex <= match.length) { + return match[matchIndex]; + } + } else if (typeof g2 !== 'undefined') { + const {groups} = match; + if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) { + return groups[g2]; + } + } else { + switch (g0) { + case '$': return '$'; + case '&': return match[0]; + case '`': return replacement.substring(0, match.index); + case '\'': return replacement.substring(match.index + g0.length); + } + } + return g0; + }); + } } diff --git a/ext/bg/settings2.html b/ext/bg/settings2.html index 1ffe466e..98dbd608 100644 --- a/ext/bg/settings2.html +++ b/ext/bg/settings2.html @@ -1144,6 +1144,14 @@
+
+
+
Configure custom text replacement patterns…
+
+
+ +
+
Convert half width characters to full width
@@ -2612,6 +2620,91 @@
+ + + + + + + + + + @@ -2671,6 +2764,7 @@ + -- cgit v1.2.3