diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-01-03 12:12:55 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-01-03 12:12:55 -0500 | 
| commit | 8e304b83c685dde17a00d402877a21303b7c11f2 (patch) | |
| tree | 0b2123575502c3e3cb5127582b03e9c196c9891d /ext/bg/js | |
| parent | eda8534e195d653ee0dea36f70caed0d8d49acf1 (diff) | |
Translator regex replacements (#1199)
* Add support for regex replacements during the translation process
* Allow assignment of textReplacements
* Rename
* Set up test data
* Write expected data
* Set up options
* Prevent infinite loop if regex matches empty string
* Implement setting controller
* Add support for testing pattern replacements
Diffstat (limited to 'ext/bg/js')
| -rw-r--r-- | ext/bg/js/backend.js | 28 | ||||
| -rw-r--r-- | ext/bg/js/options.js | 16 | ||||
| -rw-r--r-- | ext/bg/js/settings2/settings-main.js | 4 | ||||
| -rw-r--r-- | ext/bg/js/settings2/translation-text-replacements-controller.js | 241 | ||||
| -rw-r--r-- | ext/bg/js/translator.js | 77 | 
5 files changed, 364 insertions, 2 deletions
| diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index c7131728..690f6a3c 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -1655,9 +1655,11 @@ class Backend {                  convertAlphabeticCharacters,                  convertHiraganaToKatakana,                  convertKatakanaToHiragana, -                collapseEmphaticSequences +                collapseEmphaticSequences, +                textReplacements: textReplacementsOptions              }          } = options; +        const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions);          return {              wildcard,              mainDictionary, @@ -1668,6 +1670,7 @@ class Backend {              convertHiraganaToKatakana,              convertKatakanaToHiragana,              collapseEmphaticSequences, +            textReplacements,              enabledDictionaryMap          };      } @@ -1686,6 +1689,29 @@ class Backend {          return enabledDictionaryMap;      } +    _getTranslatorTextReplacements(textReplacementsOptions) { +        const textReplacements = []; +        for (const group of textReplacementsOptions.groups) { +            const textReplacementsEntries = []; +            for (let {pattern, ignoreCase, replacement} of group) { +                try { +                    pattern = new RegExp(pattern, ignoreCase ? 'gi' : 'g'); +                } catch (e) { +                    // Invalid pattern +                    continue; +                } +                textReplacementsEntries.push({pattern, replacement}); +            } +            if (textReplacementsEntries.length > 0) { +                textReplacements.push(textReplacementsEntries); +            } +        } +        if (textReplacements.length === 0 || textReplacementsOptions.searchOriginal) { +            textReplacements.unshift(null); +        } +        return textReplacements; +    } +      async _openWelcomeGuidePage() {          await this._createTab(chrome.runtime.getURL('/bg/welcome.html'));      } diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 5c68e77c..16168e38 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -485,6 +485,10 @@ class OptionsUtil {              {                  async: false,                  update: this._updateVersion7.bind(this) +            }, +            { +                async: false, +                update: this._updateVersion8.bind(this)              }          ];      } @@ -675,4 +679,16 @@ class OptionsUtil {          }          return options;      } + +    _updateVersion8(options) { +        // Version 8 changes: +        //  Added translation.textReplacements. +        for (const profile of options.profiles) { +            profile.options.translation.textReplacements = { +                searchOriginal: true, +                groups: [] +            }; +        } +        return options; +    }  } diff --git a/ext/bg/js/settings2/settings-main.js b/ext/bg/js/settings2/settings-main.js index 1a719edd..1b3bfaa0 100644 --- a/ext/bg/js/settings2/settings-main.js +++ b/ext/bg/js/settings2/settings-main.js @@ -36,6 +36,7 @@   * SettingsDisplayController   * StatusFooter   * StorageController + * TranslationTextReplacementsController   * api   */ @@ -120,6 +121,9 @@ async function setupGenericSettingsController(genericSettingController) {          const secondarySearchDictionaryController = new SecondarySearchDictionaryController(settingsController);          secondarySearchDictionaryController.prepare(); +        const translationTextReplacementsController = new TranslationTextReplacementsController(settingsController); +        translationTextReplacementsController.prepare(); +          await Promise.all(preparePromises);          document.documentElement.dataset.loaded = 'true'; diff --git a/ext/bg/js/settings2/translation-text-replacements-controller.js b/ext/bg/js/settings2/translation-text-replacements-controller.js new file mode 100644 index 00000000..41ee8e3f --- /dev/null +++ b/ext/bg/js/settings2/translation-text-replacements-controller.js @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2021  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +class TranslationTextReplacementsController { +    constructor(settingsController) { +        this._settingsController = settingsController; +        this._entryContainer = null; +        this._entries = []; +    } + +    async prepare() { +        this._entryContainer = document.querySelector('#translation-text-replacement-list'); +        const addButton = document.querySelector('#translation-text-replacement-add'); + +        addButton.addEventListener('click', this._onAdd.bind(this), false); +        this._settingsController.on('optionsChanged', this._onOptionsChanged.bind(this)); + +        await this._updateOptions(); +    } + + +    async addGroup() { +        const options = await this._settingsController.getOptions(); +        const {groups} = options.translation.textReplacements; +        const newEntry = this._createNewEntry(); +        const target = ( +            (groups.length === 0) ? +            { +                action: 'splice', +                path: 'translation.textReplacements.groups', +                start: 0, +                deleteCount: 0, +                items: [[newEntry]] +            } : +            { +                action: 'splice', +                path: 'translation.textReplacements.groups[0]', +                start: groups[0].length, +                deleteCount: 0, +                items: [newEntry] +            } +        ); + +        await this._settingsController.modifyProfileSettings([target]); +        await this._updateOptions(); +    } + +    async deleteGroup(index) { +        const options = await this._settingsController.getOptions(); +        const {groups} = options.translation.textReplacements; +        if (groups.length === 0) { return false; } + +        const group0 = groups[0]; +        if (index < 0 || index >= group0.length) { return false; } + +        const target = ( +            (group0.length > 1) ? +            { +                action: 'splice', +                path: 'translation.textReplacements.groups[0]', +                start: index, +                deleteCount: 1, +                items: [] +            } : +            { +                action: 'splice', +                path: 'translation.textReplacements.groups', +                start: 0, +                deleteCount: group0.length, +                items: [] +            } +        ); + +        await this._settingsController.modifyProfileSettings([target]); +        await this._updateOptions(); +        return true; +    } + +    // Private + +    _onOptionsChanged({options}) { +        for (const entry of this._entries) { +            entry.cleanup(); +        } +        this._entries = []; + +        const {groups} = options.translation.textReplacements; +        if (groups.length > 0) { +            const group0 = groups[0]; +            for (let i = 0, ii = group0.length; i < ii; ++i) { +                const data = group0[i]; +                const node = this._settingsController.instantiateTemplate('translation-text-replacement-entry'); +                this._entryContainer.appendChild(node); +                const entry = new TranslationTextReplacementsEntry(this, node, i, data); +                this._entries.push(entry); +                entry.prepare(); +            } +        } +    } + +    _onAdd() { +        this.addGroup(); +    } + +    async _updateOptions() { +        const options = await this._settingsController.getOptions(); +        this._onOptionsChanged({options}); +    } + +    _createNewEntry() { +        return {pattern: '', ignoreCase: false, replacement: ''}; +    } +} + +class TranslationTextReplacementsEntry { +    constructor(parent, node, index) { +        this._parent = parent; +        this._node = node; +        this._index = index; +        this._eventListeners = new EventListenerCollection(); +        this._patternInput = null; +        this._replacementInput = null; +        this._ignoreCaseToggle = null; +        this._testInput = null; +        this._testOutput = null; +    } + +    prepare() { +        const patternInput = this._node.querySelector('.translation-text-replacement-pattern'); +        const replacementInput = this._node.querySelector('.translation-text-replacement-replacement'); +        const ignoreCaseToggle = this._node.querySelector('.translation-text-replacement-pattern-ignore-case'); +        const menuButton = this._node.querySelector('.translation-text-replacement-button'); +        const testInput = this._node.querySelector('.translation-text-replacement-test-input'); +        const testOutput = this._node.querySelector('.translation-text-replacement-test-output'); + +        this._patternInput = patternInput; +        this._replacementInput = replacementInput; +        this._ignoreCaseToggle = ignoreCaseToggle; +        this._testInput = testInput; +        this._testOutput = testOutput; + +        const pathBase = `translation.textReplacements.groups[0][${this._index}]`; +        patternInput.dataset.setting = `${pathBase}.pattern`; +        replacementInput.dataset.setting = `${pathBase}.replacement`; +        ignoreCaseToggle.dataset.setting = `${pathBase}.ignoreCase`; + +        this._eventListeners.addEventListener(menuButton, 'menuOpened', this._onMenuOpened.bind(this), false); +        this._eventListeners.addEventListener(menuButton, 'menuClosed', this._onMenuClosed.bind(this), false); +        this._eventListeners.addEventListener(patternInput, 'settingChanged', this._onPatternChanged.bind(this), false); +        this._eventListeners.addEventListener(ignoreCaseToggle, 'settingChanged', this._updateTestInput.bind(this), false); +        this._eventListeners.addEventListener(replacementInput, 'settingChanged', this._updateTestInput.bind(this), false); +        this._eventListeners.addEventListener(testInput, 'input', this._updateTestInput.bind(this), false); +    } + +    cleanup() { +        this._eventListeners.removeAllEventListeners(); +        if (this._node.parentNode !== null) { +            this._node.parentNode.removeChild(this._node); +        } +    } + +    // Private + +    _onMenuOpened({detail: {menu}}) { +        const testVisible = this._isTestVisible(); +        menu.querySelector('[data-menu-action=showTest]').hidden = testVisible; +        menu.querySelector('[data-menu-action=hideTest]').hidden = !testVisible; +    } + +    _onMenuClosed({detail: {action}}) { +        switch (action) { +            case 'remove': +                this._parent.deleteGroup(this._index); +                break; +            case 'showTest': +                this._setTestVisible(true); +                break; +            case 'hideTest': +                this._setTestVisible(false); +                break; +        } +    } + +    _onPatternChanged({detail: {value}}) { +        this._validatePattern(value); +        this._updateTestInput(); +    } + +    _validatePattern(value) { +        let okay = false; +        try { +            new RegExp(value, 'g'); +            okay = true; +        } catch (e) { +            // NOP +        } + +        this._patternInput.dataset.invalid = `${!okay}`; +    } + +    _isTestVisible() { +        return this._node.dataset.testVisible === 'true'; +    } + +    _setTestVisible(visible) { +        this._node.dataset.testVisible = `${visible}`; +        this._updateTestInput(); +    } + +    _updateTestInput() { +        if (!this._isTestVisible()) { return; } + +        const ignoreCase = this._ignoreCaseToggle.checked; +        const pattern = this._patternInput.value; +        let regex; +        try { +            regex = new RegExp(pattern, ignoreCase ? 'gi' : 'g'); +        } catch (e) { +            return; +        } + +        const replacement = this._replacementInput.value; +        const input = this._testInput.value; +        const output = input.replace(regex, replacement); +        this._testOutput.value = output; +    } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index c23649e1..8cc520a8 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -68,6 +68,13 @@ class Translator {       *     convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'),       *     convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'),       *     collapseEmphaticSequences: (enum: 'false', 'true', 'full'), +     *     textReplacements: [ +     *       (null or [ +     *         {pattern: (RegExp), replacement: (string)} +     *         ... +     *       ]) +     *       ... +     *     ],       *     enabledDictionaryMap: (Map of [       *       (string),       *       { @@ -302,6 +309,7 @@ class Translator {      _getAllDeinflections(text, options) {          const textOptionVariantArray = [ +            this._getTextReplacementsVariants(options),              this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),              this._getTextOptionEntryVariants(options.convertNumericCharacters),              this._getTextOptionEntryVariants(options.convertAlphabeticCharacters), @@ -313,9 +321,12 @@ class Translator {          const jp = this._japaneseUtil;          const deinflections = [];          const used = new Set(); -        for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) { +        for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {              let text2 = text;              const sourceMap = new TextSourceMap(text2); +            if (textReplacements !== null) { +                text2 = this._applyTextReplacements(text2, sourceMap, textReplacements); +            }              if (halfWidth) {                  text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);              } @@ -879,6 +890,10 @@ class Translator {          return collapseEmphaticOptions;      } +    _getTextReplacementsVariants(options) { +        return options.textReplacements; +    } +      _getSecondarySearchDictionaryMap(enabledDictionaryMap) {          const secondarySearchDictionaryMap = new Map();          for (const [dictionary, details] of enabledDictionaryMap.entries()) { @@ -1304,4 +1319,64 @@ class Translator {              return stringComparer.compare(v1.notes, v2.notes);          });      } + +    // Regex functions + +    _applyTextReplacements(text, sourceMap, replacements) { +        for (const {pattern, replacement} of replacements) { +            text = this._applyTextReplacement(text, sourceMap, pattern, replacement); +        } +        return text; +    } + +    _applyTextReplacement(text, sourceMap, pattern, replacement) { +        const isGlobal = pattern.global; +        if (isGlobal) { pattern.lastIndex = 0; } +        for (let loop = true; loop; loop = isGlobal) { +            const match = pattern.exec(text); +            if (match === null) { break; } + +            const matchText = match[0]; +            const index = match.index; +            const actualReplacement = this._applyMatchReplacement(replacement, match); +            const actualReplacementLength = actualReplacement.length; +            const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1); + +            text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`; +            pattern.lastIndex += delta; + +            if (actualReplacementLength > 0) { +                sourceMap.combine(Math.max(0, index - 1), matchText.length); +                sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0))); +            } else { +                sourceMap.combine(index, matchText.length); +            } +        } +        return text; +    } + +    _applyMatchReplacement(replacement, match) { +        const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g; +        return replacement.replace(pattern, (g0, g1, g2) => { +            if (typeof g1 !== 'undefined') { +                const matchIndex = Number.parseInt(g1, 10); +                if (matchIndex >= 1 && matchIndex <= match.length) { +                    return match[matchIndex]; +                } +            } else if (typeof g2 !== 'undefined') { +                const {groups} = match; +                if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) { +                    return groups[g2]; +                } +            } else { +                switch (g0) { +                    case '$': return '$'; +                    case '&': return match[0]; +                    case '`': return replacement.substring(0, match.index); +                    case '\'': return replacement.substring(match.index + g0.length); +                } +            } +            return g0; +        }); +    }  } |