summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-04-05 12:51:56 -0400
committerGitHub <noreply@github.com>2020-04-05 12:51:56 -0400
commit058f626efd6c5d6fae66346d487c10930d769971 (patch)
tree13045c5866953d6886db6faaf87fbff715fd7ceb
parentf439d12718247411ccd0575af0d1de82aa22564a (diff)
parent7225201fb6776664d7a820e45e85c3500e83c80f (diff)
Merge pull request #423 from toasted-nutbread/text-source-map
Text source map
-rw-r--r--ext/bg/background.html1
-rw-r--r--ext/bg/js/japanese.js31
-rw-r--r--ext/bg/js/text-source-map.js115
-rw-r--r--ext/bg/js/translator.js30
-rw-r--r--package.json2
-rw-r--r--test/test-japanese.js18
-rw-r--r--test/test-text-source-map.js234
7 files changed, 378 insertions, 53 deletions
diff --git a/ext/bg/background.html b/ext/bg/background.html
index f7cf6e55..e456717e 100644
--- a/ext/bg/background.html
+++ b/ext/bg/background.html
@@ -38,6 +38,7 @@
<script src="/bg/js/options.js"></script>
<script src="/bg/js/profile-conditions.js"></script>
<script src="/bg/js/request.js"></script>
+ <script src="/bg/js/text-source-map.js"></script>
<script src="/bg/js/translator.js"></script>
<script src="/bg/js/util.js"></script>
<script src="/mixed/js/audio-system.js"></script>
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index c5873cf1..2a2b39fd 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -158,9 +158,8 @@
return result;
}
- function convertHalfWidthKanaToFullWidth(text, sourceMapping) {
+ function convertHalfWidthKanaToFullWidth(text, sourceMap=null) {
let result = '';
- const hasSourceMapping = Array.isArray(sourceMapping);
// This function is safe to use charCodeAt instead of codePointAt, since all
// the relevant characters are represented with a single UTF-16 character code.
@@ -192,10 +191,8 @@
}
}
- if (hasSourceMapping && index > 0) {
- index = result.length;
- const v = sourceMapping.splice(index + 1, 1)[0];
- sourceMapping[index] += v;
+ if (sourceMap !== null && index > 0) {
+ sourceMap.combine(result.length, 1);
}
result += c2;
}
@@ -203,7 +200,7 @@
return result;
}
- function convertAlphabeticToKana(text, sourceMapping) {
+ function convertAlphabeticToKana(text, sourceMap=null) {
let part = '';
let result = '';
@@ -222,7 +219,7 @@
c = 0x2d; // '-'
} else {
if (part.length > 0) {
- result += convertAlphabeticPartToKana(part, sourceMapping, result.length);
+ result += convertAlphabeticPartToKana(part, sourceMap, result.length);
part = '';
}
result += char;
@@ -232,17 +229,16 @@
}
if (part.length > 0) {
- result += convertAlphabeticPartToKana(part, sourceMapping, result.length);
+ result += convertAlphabeticPartToKana(part, sourceMap, result.length);
}
return result;
}
- function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) {
+ function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
const result = wanakana.toHiragana(text);
// Generate source mapping
- if (Array.isArray(sourceMapping)) {
- if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; }
+ if (sourceMap !== null) {
let i = 0;
let resultPos = 0;
const ii = text.length;
@@ -262,18 +258,15 @@
// Merge characters
const removals = iNext - i - 1;
if (removals > 0) {
- let sum = 0;
- const vs = sourceMapping.splice(sourceMappingStart + 1, removals);
- for (const v of vs) { sum += v; }
- sourceMapping[sourceMappingStart] += sum;
+ sourceMap.combine(sourceMapStart, removals);
}
- ++sourceMappingStart;
+ ++sourceMapStart;
// Empty elements
const additions = resultPosNext - resultPos - 1;
for (let j = 0; j < additions; ++j) {
- sourceMapping.splice(sourceMappingStart, 0, 0);
- ++sourceMappingStart;
+ sourceMap.insert(sourceMapStart, 0);
+ ++sourceMapStart;
}
i = iNext;
diff --git a/ext/bg/js/text-source-map.js b/ext/bg/js/text-source-map.js
new file mode 100644
index 00000000..24970978
--- /dev/null
+++ b/ext/bg/js/text-source-map.js
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net>
+ * Author: Alex Yatskov <alex@foosoft.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+class TextSourceMap {
+ constructor(source, mapping=null) {
+ this._source = source;
+ this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null);
+ }
+
+ get source() {
+ return this._source;
+ }
+
+ equals(other) {
+ if (this === other) {
+ return true;
+ }
+
+ const source = this._source;
+ if (!(other instanceof TextSourceMap && source === other._source)) {
+ return false;
+ }
+
+ let mapping = this._mapping;
+ let otherMapping = other._mapping;
+ if (mapping === null) {
+ if (otherMapping === null) {
+ return true;
+ }
+ mapping = TextSourceMap._createMapping(source);
+ } else if (otherMapping === null) {
+ otherMapping = TextSourceMap._createMapping(source);
+ }
+
+ const mappingLength = mapping.length;
+ if (mappingLength !== otherMapping.length) {
+ return false;
+ }
+
+ for (let i = 0; i < mappingLength; ++i) {
+ if (mapping[i] !== otherMapping[i]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ getSourceLength(finalLength) {
+ const mapping = this._mapping;
+ if (mapping === null) {
+ return finalLength;
+ }
+
+ let sourceLength = 0;
+ for (let i = 0; i < finalLength; ++i) {
+ sourceLength += mapping[i];
+ }
+ return sourceLength;
+ }
+
+ combine(index, count) {
+ if (count <= 0) { return; }
+
+ if (this._mapping === null) {
+ this._mapping = TextSourceMap._createMapping(this._source);
+ }
+
+ let sum = this._mapping[index];
+ const parts = this._mapping.splice(index + 1, count);
+ for (const part of parts) {
+ sum += part;
+ }
+ this._mapping[index] = sum;
+ }
+
+ insert(index, ...items) {
+ if (this._mapping === null) {
+ this._mapping = TextSourceMap._createMapping(this._source);
+ }
+
+ this._mapping.splice(index, 0, ...items);
+ }
+
+ static _createMapping(text) {
+ return new Array(text.length).fill(1);
+ }
+
+ static _normalizeMapping(mapping) {
+ const result = [];
+ for (const value of mapping) {
+ result.push(
+ (typeof value === 'number' && Number.isFinite(value)) ?
+ Math.floor(value) :
+ 0
+ );
+ }
+ return result;
+ }
+}
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index f16889ce..cd991efa 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -19,6 +19,7 @@
/* global
* Database
* Deinflector
+ * TextSourceMap
* dictEnabledSet
* dictTagBuildSource
* dictTagSanitize
@@ -367,17 +368,15 @@ class Translator {
const used = new Set();
for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) {
let text2 = text;
- let sourceMapping = null;
+ const sourceMap = new TextSourceMap(text2);
if (halfWidth) {
- if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); }
- text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping);
+ text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
}
if (numeric) {
text2 = jp.convertNumericToFullWidth(text2);
}
if (alphabetic) {
- if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); }
- text2 = jp.convertAlphabeticToKana(text2, sourceMapping);
+ text2 = jp.convertAlphabeticToKana(text2, sourceMap);
}
if (katakana) {
text2 = jp.convertHiraganaToKatakana(text2);
@@ -391,7 +390,7 @@ class Translator {
if (used.has(text2Substring)) { break; }
used.add(text2Substring);
for (const deinflection of this.deinflector.deinflect(text2Substring)) {
- deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping);
+ deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
deinflections.push(deinflection);
}
}
@@ -407,25 +406,6 @@ class Translator {
}
}
- static getDeinflectionRawSource(source, length, sourceMapping) {
- if (sourceMapping === null) {
- return source.substring(0, length);
- }
-
- let result = '';
- let index = 0;
- for (let i = 0; i < length; ++i) {
- const c = sourceMapping[i];
- result += source.substring(index, index + c);
- index += c;
- }
- return result;
- }
-
- static createTextSourceMapping(text) {
- return new Array(text.length).fill(1);
- }
-
async findKanji(text, options) {
const dictionaries = dictEnabledSet(options);
const kanjiUnique = new Set();
diff --git a/package.json b/package.json
index 8ae103a0..b02ec179 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
"scripts": {
"test": "npm run test-lint && npm run test-code",
"test-lint": "eslint . && node ./test/lint/global-declarations.js",
- "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js"
+ "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js && node ./test/test-text-source-map.js"
},
"repository": {
"type": "git",
diff --git a/test/test-japanese.js b/test/test-japanese.js
index eab632bf..ca65dde2 100644
--- a/test/test-japanese.js
+++ b/test/test-japanese.js
@@ -23,9 +23,11 @@ const vm = new VM();
vm.execute([
'mixed/lib/wanakana.min.js',
'mixed/js/japanese.js',
+ 'bg/js/text-source-map.js',
'bg/js/japanese.js'
]);
const jp = vm.get('jp');
+const TextSourceMap = vm.get('TextSourceMap');
function testIsCodePointKanji() {
@@ -262,13 +264,13 @@ function testConvertHalfWidthKanaToFullWidth() {
];
for (const [string, expected, expectedSourceMapping] of data) {
- const sourceMapping = new Array(string.length).fill(1);
+ const sourceMap = new TextSourceMap(string);
const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null);
- const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping);
+ const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMap);
assert.strictEqual(actual1, expected);
assert.strictEqual(actual2, expected);
- if (Array.isArray(expectedSourceMapping)) {
- vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping);
+ if (typeof expectedSourceMapping !== 'undefined') {
+ assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));
}
}
}
@@ -285,13 +287,13 @@ function testConvertAlphabeticToKana() {
];
for (const [string, expected, expectedSourceMapping] of data) {
- const sourceMapping = new Array(string.length).fill(1);
+ const sourceMap = new TextSourceMap(string);
const actual1 = jp.convertAlphabeticToKana(string, null);
- const actual2 = jp.convertAlphabeticToKana(string, sourceMapping);
+ const actual2 = jp.convertAlphabeticToKana(string, sourceMap);
assert.strictEqual(actual1, expected);
assert.strictEqual(actual2, expected);
- if (Array.isArray(expectedSourceMapping)) {
- vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping);
+ if (typeof expectedSourceMapping !== 'undefined') {
+ assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));
}
}
}
diff --git a/test/test-text-source-map.js b/test/test-text-source-map.js
new file mode 100644
index 00000000..25bd8fc2
--- /dev/null
+++ b/test/test-text-source-map.js
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net>
+ * Author: Alex Yatskov <alex@foosoft.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+const assert = require('assert');
+const {VM} = require('./yomichan-vm');
+
+const vm = new VM();
+vm.execute(['bg/js/text-source-map.js']);
+const TextSourceMap = vm.get('TextSourceMap');
+
+
+function testSource() {
+ const data = [
+ ['source1'],
+ ['source2'],
+ ['source3']
+ ];
+
+ for (const [source] of data) {
+ const sourceMap = new TextSourceMap(source);
+ assert.strictEqual(source, sourceMap.source);
+ }
+}
+
+function testEquals() {
+ const data = [
+ [['source1', null], ['source1', null], true],
+ [['source2', null], ['source2', null], true],
+ [['source3', null], ['source3', null], true],
+
+ [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', null], true],
+ [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', null], true],
+ [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', null], true],
+
+ [['source1', null], ['source1', [1, 1, 1, 1, 1, 1, 1]], true],
+ [['source2', null], ['source2', [1, 1, 1, 1, 1, 1, 1]], true],
+ [['source3', null], ['source3', [1, 1, 1, 1, 1, 1, 1]], true],
+
+ [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', [1, 1, 1, 1, 1, 1, 1]], true],
+ [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', [1, 1, 1, 1, 1, 1, 1]], true],
+ [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', [1, 1, 1, 1, 1, 1, 1]], true],
+
+ [['source1', [1, 2, 1, 3]], ['source1', [1, 2, 1, 3]], true],
+ [['source2', [1, 2, 1, 3]], ['source2', [1, 2, 1, 3]], true],
+ [['source3', [1, 2, 1, 3]], ['source3', [1, 2, 1, 3]], true],
+
+ [['source1', [1, 3, 1, 2]], ['source1', [1, 2, 1, 3]], false],
+ [['source2', [1, 3, 1, 2]], ['source2', [1, 2, 1, 3]], false],
+ [['source3', [1, 3, 1, 2]], ['source3', [1, 2, 1, 3]], false],
+
+ [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source4', [1, 1, 1, 1, 1, 1, 1]], false],
+ [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source5', [1, 1, 1, 1, 1, 1, 1]], false],
+ [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source6', [1, 1, 1, 1, 1, 1, 1]], false]
+ ];
+
+ for (const [[source1, mapping1], [source2, mapping2], expectedEquals] of data) {
+ const sourceMap1 = new TextSourceMap(source1, mapping1);
+ const sourceMap2 = new TextSourceMap(source2, mapping2);
+ assert.ok(sourceMap1.equals(sourceMap1));
+ assert.ok(sourceMap2.equals(sourceMap2));
+ assert.strictEqual(sourceMap1.equals(sourceMap2), expectedEquals);
+ }
+}
+
+function testGetSourceLength() {
+ const data = [
+ [['source', [1, 1, 1, 1, 1, 1]], 1, 1],
+ [['source', [1, 1, 1, 1, 1, 1]], 2, 2],
+ [['source', [1, 1, 1, 1, 1, 1]], 3, 3],
+ [['source', [1, 1, 1, 1, 1, 1]], 4, 4],
+ [['source', [1, 1, 1, 1, 1, 1]], 5, 5],
+ [['source', [1, 1, 1, 1, 1, 1]], 6, 6],
+
+ [['source', [2, 2, 2]], 1, 2],
+ [['source', [2, 2, 2]], 2, 4],
+ [['source', [2, 2, 2]], 3, 6],
+
+ [['source', [3, 3]], 1, 3],
+ [['source', [3, 3]], 2, 6],
+
+ [['source', [6, 6]], 1, 6]
+ ];
+
+ for (const [[source, mapping], finalLength, expectedValue] of data) {
+ const sourceMap = new TextSourceMap(source, mapping);
+ assert.strictEqual(sourceMap.getSourceLength(finalLength), expectedValue);
+ }
+}
+
+function testCombineInsert() {
+ const data = [
+ // No operations
+ [
+ ['source', null],
+ ['source', [1, 1, 1, 1, 1, 1]],
+ []
+ ],
+
+ // Combine
+ [
+ ['source', null],
+ ['source', [3, 1, 1, 1]],
+ [
+ ['combine', 0, 2]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [1, 1, 1, 3]],
+ [
+ ['combine', 3, 2]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [3, 3]],
+ [
+ ['combine', 0, 2],
+ ['combine', 1, 2]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [3, 3]],
+ [
+ ['combine', 3, 2],
+ ['combine', 0, 2]
+ ]
+ ],
+
+ // Insert
+ [
+ ['source', null],
+ ['source', [0, 1, 1, 1, 1, 1, 1]],
+ [
+ ['insert', 0, 0]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [1, 1, 1, 1, 1, 1, 0]],
+ [
+ ['insert', 6, 0]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [0, 1, 1, 1, 1, 1, 1, 0]],
+ [
+ ['insert', 0, 0],
+ ['insert', 7, 0]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [0, 1, 1, 1, 1, 1, 1, 0]],
+ [
+ ['insert', 6, 0],
+ ['insert', 0, 0]
+ ]
+ ],
+
+ // Mixed
+ [
+ ['source', null],
+ ['source', [3, 0, 3]],
+ [
+ ['combine', 0, 2],
+ ['insert', 1, 0],
+ ['combine', 2, 2]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [3, 0, 3]],
+ [
+ ['combine', 0, 2],
+ ['combine', 1, 2],
+ ['insert', 1, 0]
+ ]
+ ],
+ [
+ ['source', null],
+ ['source', [3, 0, 3]],
+ [
+ ['insert', 3, 0],
+ ['combine', 0, 2],
+ ['combine', 2, 2]
+ ]
+ ]
+ ];
+
+ for (const [[source, mapping], [expectedSource, expectedMapping], operations] of data) {
+ const sourceMap = new TextSourceMap(source, mapping);
+ const expectedSourceMap = new TextSourceMap(expectedSource, expectedMapping);
+ for (const [operation, ...args] of operations) {
+ switch (operation) {
+ case 'combine':
+ sourceMap.combine(...args);
+ break;
+ case 'insert':
+ sourceMap.insert(...args);
+ break;
+ }
+ }
+ assert.ok(sourceMap.equals(expectedSourceMap));
+ }
+}
+
+
+function main() {
+ testSource();
+ testEquals();
+ testGetSourceLength();
+ testCombineInsert();
+}
+
+
+if (require.main === module) { main(); }