diff options
| -rw-r--r-- | ext/bg/background.html | 1 | ||||
| -rw-r--r-- | ext/bg/js/japanese.js | 31 | ||||
| -rw-r--r-- | ext/bg/js/text-source-map.js | 115 | ||||
| -rw-r--r-- | ext/bg/js/translator.js | 30 | ||||
| -rw-r--r-- | package.json | 2 | ||||
| -rw-r--r-- | test/test-japanese.js | 18 | ||||
| -rw-r--r-- | test/test-text-source-map.js | 234 | 
7 files changed, 378 insertions, 53 deletions
| diff --git a/ext/bg/background.html b/ext/bg/background.html index f7cf6e55..e456717e 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -38,6 +38,7 @@          <script src="/bg/js/options.js"></script>          <script src="/bg/js/profile-conditions.js"></script>          <script src="/bg/js/request.js"></script> +        <script src="/bg/js/text-source-map.js"></script>          <script src="/bg/js/translator.js"></script>          <script src="/bg/js/util.js"></script>          <script src="/mixed/js/audio-system.js"></script> diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index c5873cf1..2a2b39fd 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -158,9 +158,8 @@          return result;      } -    function convertHalfWidthKanaToFullWidth(text, sourceMapping) { +    function convertHalfWidthKanaToFullWidth(text, sourceMap=null) {          let result = ''; -        const hasSourceMapping = Array.isArray(sourceMapping);          // This function is safe to use charCodeAt instead of codePointAt, since all          // the relevant characters are represented with a single UTF-16 character code. @@ -192,10 +191,8 @@                  }              } -            if (hasSourceMapping && index > 0) { -                index = result.length; -                const v = sourceMapping.splice(index + 1, 1)[0]; -                sourceMapping[index] += v; +            if (sourceMap !== null && index > 0) { +                sourceMap.combine(result.length, 1);              }              result += c2;          } @@ -203,7 +200,7 @@          return result;      } -    function convertAlphabeticToKana(text, sourceMapping) { +    function convertAlphabeticToKana(text, sourceMap=null) {          let part = '';          let result = ''; @@ -222,7 +219,7 @@                  c = 0x2d; // '-'              } else {                  if (part.length > 0) { -                    result += convertAlphabeticPartToKana(part, sourceMapping, result.length); +                    result += convertAlphabeticPartToKana(part, sourceMap, result.length);                      part = '';                  }                  result += char; @@ -232,17 +229,16 @@          }          if (part.length > 0) { -            result += convertAlphabeticPartToKana(part, sourceMapping, result.length); +            result += convertAlphabeticPartToKana(part, sourceMap, result.length);          }          return result;      } -    function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) { +    function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {          const result = wanakana.toHiragana(text);          // Generate source mapping -        if (Array.isArray(sourceMapping)) { -            if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; } +        if (sourceMap !== null) {              let i = 0;              let resultPos = 0;              const ii = text.length; @@ -262,18 +258,15 @@                  // Merge characters                  const removals = iNext - i - 1;                  if (removals > 0) { -                    let sum = 0; -                    const vs = sourceMapping.splice(sourceMappingStart + 1, removals); -                    for (const v of vs) { sum += v; } -                    sourceMapping[sourceMappingStart] += sum; +                    sourceMap.combine(sourceMapStart, removals);                  } -                ++sourceMappingStart; +                ++sourceMapStart;                  // Empty elements                  const additions = resultPosNext - resultPos - 1;                  for (let j = 0; j < additions; ++j) { -                    sourceMapping.splice(sourceMappingStart, 0, 0); -                    ++sourceMappingStart; +                    sourceMap.insert(sourceMapStart, 0); +                    ++sourceMapStart;                  }                  i = iNext; diff --git a/ext/bg/js/text-source-map.js b/ext/bg/js/text-source-map.js new file mode 100644 index 00000000..24970978 --- /dev/null +++ b/ext/bg/js/text-source-map.js @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2020  Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +class TextSourceMap { +    constructor(source, mapping=null) { +        this._source = source; +        this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null); +    } + +    get source() { +        return this._source; +    } + +    equals(other) { +        if (this === other) { +            return true; +        } + +        const source = this._source; +        if (!(other instanceof TextSourceMap && source === other._source)) { +            return false; +        } + +        let mapping = this._mapping; +        let otherMapping = other._mapping; +        if (mapping === null) { +            if (otherMapping === null) { +                return true; +            } +            mapping = TextSourceMap._createMapping(source); +        } else if (otherMapping === null) { +            otherMapping = TextSourceMap._createMapping(source); +        } + +        const mappingLength = mapping.length; +        if (mappingLength !== otherMapping.length) { +            return false; +        } + +        for (let i = 0; i < mappingLength; ++i) { +            if (mapping[i] !== otherMapping[i]) { +                return false; +            } +        } + +        return true; +    } + +    getSourceLength(finalLength) { +        const mapping = this._mapping; +        if (mapping === null) { +            return finalLength; +        } + +        let sourceLength = 0; +        for (let i = 0; i < finalLength; ++i) { +            sourceLength += mapping[i]; +        } +        return sourceLength; +    } + +    combine(index, count) { +        if (count <= 0) { return; } + +        if (this._mapping === null) { +            this._mapping = TextSourceMap._createMapping(this._source); +        } + +        let sum = this._mapping[index]; +        const parts = this._mapping.splice(index + 1, count); +        for (const part of parts) { +            sum += part; +        } +        this._mapping[index] = sum; +    } + +    insert(index, ...items) { +        if (this._mapping === null) { +            this._mapping = TextSourceMap._createMapping(this._source); +        } + +        this._mapping.splice(index, 0, ...items); +    } + +    static _createMapping(text) { +        return new Array(text.length).fill(1); +    } + +    static _normalizeMapping(mapping) { +        const result = []; +        for (const value of mapping) { +            result.push( +                (typeof value === 'number' && Number.isFinite(value)) ? +                Math.floor(value) : +                0 +            ); +        } +        return result; +    } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index f16889ce..cd991efa 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -19,6 +19,7 @@  /* global   * Database   * Deinflector + * TextSourceMap   * dictEnabledSet   * dictTagBuildSource   * dictTagSanitize @@ -367,17 +368,15 @@ class Translator {          const used = new Set();          for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) {              let text2 = text; -            let sourceMapping = null; +            const sourceMap = new TextSourceMap(text2);              if (halfWidth) { -                if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } -                text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping); +                text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);              }              if (numeric) {                  text2 = jp.convertNumericToFullWidth(text2);              }              if (alphabetic) { -                if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } -                text2 = jp.convertAlphabeticToKana(text2, sourceMapping); +                text2 = jp.convertAlphabeticToKana(text2, sourceMap);              }              if (katakana) {                  text2 = jp.convertHiraganaToKatakana(text2); @@ -391,7 +390,7 @@ class Translator {                  if (used.has(text2Substring)) { break; }                  used.add(text2Substring);                  for (const deinflection of this.deinflector.deinflect(text2Substring)) { -                    deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping); +                    deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));                      deinflections.push(deinflection);                  }              } @@ -407,25 +406,6 @@ class Translator {          }      } -    static getDeinflectionRawSource(source, length, sourceMapping) { -        if (sourceMapping === null) { -            return source.substring(0, length); -        } - -        let result = ''; -        let index = 0; -        for (let i = 0; i < length; ++i) { -            const c = sourceMapping[i]; -            result += source.substring(index, index + c); -            index += c; -        } -        return result; -    } - -    static createTextSourceMapping(text) { -        return new Array(text.length).fill(1); -    } -      async findKanji(text, options) {          const dictionaries = dictEnabledSet(options);          const kanjiUnique = new Set(); diff --git a/package.json b/package.json index 8ae103a0..b02ec179 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@      "scripts": {          "test": "npm run test-lint && npm run test-code",          "test-lint": "eslint . && node ./test/lint/global-declarations.js", -        "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js" +        "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js && node ./test/test-text-source-map.js"      },      "repository": {          "type": "git", diff --git a/test/test-japanese.js b/test/test-japanese.js index eab632bf..ca65dde2 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -23,9 +23,11 @@ const vm = new VM();  vm.execute([      'mixed/lib/wanakana.min.js',      'mixed/js/japanese.js', +    'bg/js/text-source-map.js',      'bg/js/japanese.js'  ]);  const jp = vm.get('jp'); +const TextSourceMap = vm.get('TextSourceMap');  function testIsCodePointKanji() { @@ -262,13 +264,13 @@ function testConvertHalfWidthKanaToFullWidth() {      ];      for (const [string, expected, expectedSourceMapping] of data) { -        const sourceMapping = new Array(string.length).fill(1); +        const sourceMap = new TextSourceMap(string);          const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null); -        const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping); +        const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMap);          assert.strictEqual(actual1, expected);          assert.strictEqual(actual2, expected); -        if (Array.isArray(expectedSourceMapping)) { -            vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); +        if (typeof expectedSourceMapping !== 'undefined') { +            assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));          }      }  } @@ -285,13 +287,13 @@ function testConvertAlphabeticToKana() {      ];      for (const [string, expected, expectedSourceMapping] of data) { -        const sourceMapping = new Array(string.length).fill(1); +        const sourceMap = new TextSourceMap(string);          const actual1 = jp.convertAlphabeticToKana(string, null); -        const actual2 = jp.convertAlphabeticToKana(string, sourceMapping); +        const actual2 = jp.convertAlphabeticToKana(string, sourceMap);          assert.strictEqual(actual1, expected);          assert.strictEqual(actual2, expected); -        if (Array.isArray(expectedSourceMapping)) { -            vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); +        if (typeof expectedSourceMapping !== 'undefined') { +            assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));          }      }  } diff --git a/test/test-text-source-map.js b/test/test-text-source-map.js new file mode 100644 index 00000000..25bd8fc2 --- /dev/null +++ b/test/test-text-source-map.js @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2020  Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +const assert = require('assert'); +const {VM} = require('./yomichan-vm'); + +const vm = new VM(); +vm.execute(['bg/js/text-source-map.js']); +const TextSourceMap = vm.get('TextSourceMap'); + + +function testSource() { +    const data = [ +        ['source1'], +        ['source2'], +        ['source3'] +    ]; + +    for (const [source] of data) { +        const sourceMap = new TextSourceMap(source); +        assert.strictEqual(source, sourceMap.source); +    } +} + +function testEquals() { +    const data = [ +        [['source1', null], ['source1', null], true], +        [['source2', null], ['source2', null], true], +        [['source3', null], ['source3', null], true], + +        [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', null], true], +        [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', null], true], +        [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', null], true], + +        [['source1', null], ['source1', [1, 1, 1, 1, 1, 1, 1]], true], +        [['source2', null], ['source2', [1, 1, 1, 1, 1, 1, 1]], true], +        [['source3', null], ['source3', [1, 1, 1, 1, 1, 1, 1]], true], + +        [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', [1, 1, 1, 1, 1, 1, 1]], true], +        [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', [1, 1, 1, 1, 1, 1, 1]], true], +        [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', [1, 1, 1, 1, 1, 1, 1]], true], + +        [['source1', [1, 2, 1, 3]], ['source1', [1, 2, 1, 3]], true], +        [['source2', [1, 2, 1, 3]], ['source2', [1, 2, 1, 3]], true], +        [['source3', [1, 2, 1, 3]], ['source3', [1, 2, 1, 3]], true], + +        [['source1', [1, 3, 1, 2]], ['source1', [1, 2, 1, 3]], false], +        [['source2', [1, 3, 1, 2]], ['source2', [1, 2, 1, 3]], false], +        [['source3', [1, 3, 1, 2]], ['source3', [1, 2, 1, 3]], false], + +        [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source4', [1, 1, 1, 1, 1, 1, 1]], false], +        [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source5', [1, 1, 1, 1, 1, 1, 1]], false], +        [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source6', [1, 1, 1, 1, 1, 1, 1]], false] +    ]; + +    for (const [[source1, mapping1], [source2, mapping2], expectedEquals] of data) { +        const sourceMap1 = new TextSourceMap(source1, mapping1); +        const sourceMap2 = new TextSourceMap(source2, mapping2); +        assert.ok(sourceMap1.equals(sourceMap1)); +        assert.ok(sourceMap2.equals(sourceMap2)); +        assert.strictEqual(sourceMap1.equals(sourceMap2), expectedEquals); +    } +} + +function testGetSourceLength() { +    const data = [ +        [['source', [1, 1, 1, 1, 1, 1]], 1, 1], +        [['source', [1, 1, 1, 1, 1, 1]], 2, 2], +        [['source', [1, 1, 1, 1, 1, 1]], 3, 3], +        [['source', [1, 1, 1, 1, 1, 1]], 4, 4], +        [['source', [1, 1, 1, 1, 1, 1]], 5, 5], +        [['source', [1, 1, 1, 1, 1, 1]], 6, 6], + +        [['source', [2, 2, 2]], 1, 2], +        [['source', [2, 2, 2]], 2, 4], +        [['source', [2, 2, 2]], 3, 6], + +        [['source', [3, 3]], 1, 3], +        [['source', [3, 3]], 2, 6], + +        [['source', [6, 6]], 1, 6] +    ]; + +    for (const [[source, mapping], finalLength, expectedValue] of data) { +        const sourceMap = new TextSourceMap(source, mapping); +        assert.strictEqual(sourceMap.getSourceLength(finalLength), expectedValue); +    } +} + +function testCombineInsert() { +    const data = [ +        // No operations +        [ +            ['source', null], +            ['source', [1, 1, 1, 1, 1, 1]], +            [] +        ], + +        // Combine +        [ +            ['source', null], +            ['source', [3, 1, 1, 1]], +            [ +                ['combine', 0, 2] +            ] +        ], +        [ +            ['source', null], +            ['source', [1, 1, 1, 3]], +            [ +                ['combine', 3, 2] +            ] +        ], +        [ +            ['source', null], +            ['source', [3, 3]], +            [ +                ['combine', 0, 2], +                ['combine', 1, 2] +            ] +        ], +        [ +            ['source', null], +            ['source', [3, 3]], +            [ +                ['combine', 3, 2], +                ['combine', 0, 2] +            ] +        ], + +        // Insert +        [ +            ['source', null], +            ['source', [0, 1, 1, 1, 1, 1, 1]], +            [ +                ['insert', 0, 0] +            ] +        ], +        [ +            ['source', null], +            ['source', [1, 1, 1, 1, 1, 1, 0]], +            [ +                ['insert', 6, 0] +            ] +        ], +        [ +            ['source', null], +            ['source', [0, 1, 1, 1, 1, 1, 1, 0]], +            [ +                ['insert', 0, 0], +                ['insert', 7, 0] +            ] +        ], +        [ +            ['source', null], +            ['source', [0, 1, 1, 1, 1, 1, 1, 0]], +            [ +                ['insert', 6, 0], +                ['insert', 0, 0] +            ] +        ], + +        // Mixed +        [ +            ['source', null], +            ['source', [3, 0, 3]], +            [ +                ['combine', 0, 2], +                ['insert', 1, 0], +                ['combine', 2, 2] +            ] +        ], +        [ +            ['source', null], +            ['source', [3, 0, 3]], +            [ +                ['combine', 0, 2], +                ['combine', 1, 2], +                ['insert', 1, 0] +            ] +        ], +        [ +            ['source', null], +            ['source', [3, 0, 3]], +            [ +                ['insert', 3, 0], +                ['combine', 0, 2], +                ['combine', 2, 2] +            ] +        ] +    ]; + +    for (const [[source, mapping], [expectedSource, expectedMapping], operations] of data) { +        const sourceMap = new TextSourceMap(source, mapping); +        const expectedSourceMap = new TextSourceMap(expectedSource, expectedMapping); +        for (const [operation, ...args] of operations) { +            switch (operation) { +                case 'combine': +                    sourceMap.combine(...args); +                    break; +                case 'insert': +                    sourceMap.insert(...args); +                    break; +            } +        } +        assert.ok(sourceMap.equals(expectedSourceMap)); +    } +} + + +function main() { +    testSource(); +    testEquals(); +    testGetSourceLength(); +    testCombineInsert(); +} + + +if (require.main === module) { main(); } |