diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-03-08 21:01:55 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-08 21:01:55 -0500 | 
| commit | c6f4144fda0f07ddfc84ebee1960264c968d156b (patch) | |
| tree | 23e1a7de5c3d6ff0ce1743726322d1d5005434dc /ext/js | |
| parent | 643dbfb12a66b98b2fabe82416322f0218474567 (diff) | |
Clean up translator (#1505)
* Remove unused: _removeUsedDefinitions
* Remove unused: _scoreToTermFrequency
* Remove unused: _getTermTagsScoreSum
* Add RegexUtil
* Update Translator to use RegexUtil
* Update sw.js
* Update tests
Diffstat (limited to 'ext/js')
| -rw-r--r-- | ext/js/general/regex-util.js | 93 | ||||
| -rw-r--r-- | ext/js/language/translator.js | 90 | 
2 files changed, 95 insertions, 88 deletions
| diff --git a/ext/js/general/regex-util.js b/ext/js/general/regex-util.js new file mode 100644 index 00000000..35b1c2b8 --- /dev/null +++ b/ext/js/general/regex-util.js @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2021  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * This class provides some general utility functions for regular expressions. + */ +class RegexUtil { +    /** +     * Applies string.replace using a regular expression and replacement string as arguments. +     * A source map of the changes is also maintained. +     * @param text A string of the text to replace. +     * @param sourceMap An instance of `TextSourceMap` which corresponds to `text`. +     * @param pattern A regular expression to use as the replacement. +     * @param replacement A replacement string that follows the format of the standard +     *   JavaScript regular expression replacement string. +     * @return A new string with the pattern replacements applied and the source map updated. +     */ +    static applyTextReplacement(text, sourceMap, pattern, replacement) { +        const isGlobal = pattern.global; +        if (isGlobal) { pattern.lastIndex = 0; } +        for (let loop = true; loop; loop = isGlobal) { +            const match = pattern.exec(text); +            if (match === null) { break; } + +            const matchText = match[0]; +            const index = match.index; +            const actualReplacement = this.applyMatchReplacement(replacement, match); +            const actualReplacementLength = actualReplacement.length; +            const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1); + +            text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`; +            pattern.lastIndex += delta; + +            if (actualReplacementLength > 0) { +                sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0))); +                sourceMap.combine(index - 1 + actualReplacementLength, matchText.length); +            } else { +                sourceMap.combine(index, matchText.length); +            } +        } +        return text; +    } + +    /** +     * Applies the replacement string for a given regular expression match. +     * @param replacement The replacement string that follows the format of the standard +     *   JavaScript regular expression replacement string. +     * @param match A match object returned from RegExp.match. +     * @return A new string with the pattern replacement applied. +     */ +    static applyMatchReplacement(replacement, match) { +        const pattern = this._matchReplacementPattern; +        pattern.lastIndex = 0; +        return replacement.replace(pattern, (g0, g1, g2) => { +            if (typeof g1 !== 'undefined') { +                const matchIndex = Number.parseInt(g1, 10); +                if (matchIndex >= 1 && matchIndex <= match.length) { +                    return match[matchIndex]; +                } +            } else if (typeof g2 !== 'undefined') { +                const {groups} = match; +                if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) { +                    return groups[g2]; +                } +            } else { +                switch (g0) { +                    case '$': return '$'; +                    case '&': return match[0]; +                    case '`': return replacement.substring(0, match.index); +                    case '\'': return replacement.substring(match.index + g0.length); +                } +            } +            return g0; +        }); +    } +} + +// eslint-disable-next-line no-underscore-dangle +RegexUtil._matchReplacementPattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g; diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index e8aba5b1..c885cd4d 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -17,6 +17,7 @@  /* global   * Deinflector + * RegexUtil   * TextSourceMap   */ @@ -534,24 +535,6 @@ class Translator {          );      } -    _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) { -        for (let i = 0, ii = definitions.length; i < ii; ++i) { -            const definition = definitions[i]; -            const {expression, reading} = definition; -            const expressionMap = termInfoMap.get(expression); -            if ( -                typeof expressionMap !== 'undefined' && -                typeof expressionMap.get(reading) !== 'undefined' -            ) { -                usedDefinitions.add(definition); -            } else { -                definitions.splice(i, 1); -                --i; -                --ii; -            } -        } -    } -      _getUniqueDefinitionTags(definitions) {          const definitionTagsMap = new Map();          for (const {definitionTags} of definitions) { @@ -794,16 +777,6 @@ class Translator {      // Simple helpers -    _scoreToTermFrequency(score) { -        if (score > 0) { -            return 'popular'; -        } else if (score < 0) { -            return 'rare'; -        } else { -            return 'normal'; -        } -    } -      _getNameBase(name) {          const pos = name.indexOf(':');          return (pos >= 0 ? name.substring(0, pos) : name); @@ -974,14 +947,6 @@ class Translator {      // Reduction functions -    _getTermTagsScoreSum(termTags) { -        let result = 0; -        for (const {score} of termTags) { -            result += score; -        } -        return result; -    } -      _getSourceTermMatchCountSum(definitions) {          let result = 0;          for (const {sourceTermExactMatchCount} of definitions) { @@ -1408,59 +1373,8 @@ class Translator {      _applyTextReplacements(text, sourceMap, replacements) {          for (const {pattern, replacement} of replacements) { -            text = this._applyTextReplacement(text, sourceMap, pattern, replacement); +            text = RegexUtil.applyTextReplacement(text, sourceMap, pattern, replacement);          }          return text;      } - -    _applyTextReplacement(text, sourceMap, pattern, replacement) { -        const isGlobal = pattern.global; -        if (isGlobal) { pattern.lastIndex = 0; } -        for (let loop = true; loop; loop = isGlobal) { -            const match = pattern.exec(text); -            if (match === null) { break; } - -            const matchText = match[0]; -            const index = match.index; -            const actualReplacement = this._applyMatchReplacement(replacement, match); -            const actualReplacementLength = actualReplacement.length; -            const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1); - -            text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`; -            pattern.lastIndex += delta; - -            if (actualReplacementLength > 0) { -                sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0))); -                sourceMap.combine(index - 1 + actualReplacementLength, matchText.length); -            } else { -                sourceMap.combine(index, matchText.length); -            } -        } -        return text; -    } - -    _applyMatchReplacement(replacement, match) { -        const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g; -        return replacement.replace(pattern, (g0, g1, g2) => { -            if (typeof g1 !== 'undefined') { -                const matchIndex = Number.parseInt(g1, 10); -                if (matchIndex >= 1 && matchIndex <= match.length) { -                    return match[matchIndex]; -                } -            } else if (typeof g2 !== 'undefined') { -                const {groups} = match; -                if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) { -                    return groups[g2]; -                } -            } else { -                switch (g0) { -                    case '$': return '$'; -                    case '&': return match[0]; -                    case '`': return replacement.substring(0, match.index); -                    case '\'': return replacement.substring(match.index + g0.length); -                } -            } -            return g0; -        }); -    }  } |