From 6865fc0b526896a79cb68ee912af8a742a13bedd Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 8 Mar 2020 18:24:35 -0400 Subject: Add ObjectPropertyAccessor --- ext/mixed/js/object-property-accessor.js | 244 +++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 ext/mixed/js/object-property-accessor.js diff --git a/ext/mixed/js/object-property-accessor.js b/ext/mixed/js/object-property-accessor.js new file mode 100644 index 00000000..6b5f9678 --- /dev/null +++ b/ext/mixed/js/object-property-accessor.js @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2016-2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * Class used to get and set generic properties of an object by using path strings. + */ +class ObjectPropertyAccessor { + constructor(target, setter=null) { + this._target = target; + this._setter = (typeof setter === 'function' ? setter : null); + } + + getProperty(pathArray, pathLength) { + let target = this._target; + const ii = typeof pathLength === 'number' ? Math.min(pathArray.length, pathLength) : pathArray.length; + for (let i = 0; i < ii; ++i) { + const key = pathArray[i]; + if (!ObjectPropertyAccessor.hasProperty(target, key)) { + throw new Error(`Invalid path: ${this.getPathString(pathArray.slice(0, i + 1))}`); + } + target = target[key]; + } + return target; + } + + setProperty(pathArray, value) { + if (pathArray.length === 0) { + throw new Error('Invalid path'); + } + + const target = this.getProperty(pathArray, pathArray.length - 1); + const key = pathArray[pathArray.length - 1]; + if (!ObjectPropertyAccessor.isValidPropertyType(target, key)) { + throw new Error(`Invalid path: ${this.getPathString(pathArray)}`); + } + + if (this._setter !== null) { + this._setter(target, key, value, pathArray); + } else { + target[key] = value; + } + } + + static getPathString(pathArray) { + const regexShort = /^[a-zA-Z_][a-zA-Z0-9_]*$/; + let pathString = ''; + let first = true; + for (let part of pathArray) { + switch (typeof part) { + case 'number': + if (Math.floor(part) !== part || part < 0) { + throw new Error('Invalid index'); + } + part = `[${part}]`; + break; + case 'string': + if (!regexShort.test(part)) { + const escapedPart = part.replace(/["\\]/g, '\\$&'); + part = `["${escapedPart}"]`; + } else { + if (!first) { + part = `.${part}`; + } + } + break; + default: + throw new Error(`Invalid type: ${typeof part}`); + } + pathString += part; + first = false; + } + return pathString; + } + + static getPathArray(pathString) { + const pathArray = []; + let state = 0; + let quote = 0; + let value = ''; + let escaped = false; + for (const c of pathString) { + const v = c.codePointAt(0); + switch (state) { + case 0: // Empty + case 1: // Expecting identifier start + if (v === 0x5b) { // '[' + if (state === 1) { + throw new Error(`Unexpected character: ${c}`); + } + state = 3; + } else if ( + (v >= 0x41 && v <= 0x5a) || // ['A', 'Z'] + (v >= 0x61 && v <= 0x7a) || // ['a', 'z'] + v === 0x5f // '_' + ) { + state = 2; + value += c; + } else { + throw new Error(`Unexpected character: ${c}`); + } + break; + case 2: // Identifier + if ( + (v >= 0x41 && v <= 0x5a) || // ['A', 'Z'] + (v >= 0x61 && v <= 0x7a) || // ['a', 'z'] + (v >= 0x30 && v <= 0x39) || // ['0', '9'] + v === 0x5f // '_' + ) { + value += c; + } else if (v === 0x5b) { // '[' + pathArray.push(value); + value = ''; + state = 3; + } else if (v === 0x2e) { // '.' + pathArray.push(value); + value = ''; + state = 1; + } else { + throw new Error(`Unexpected character: ${c}`); + } + break; + case 3: // Open bracket + if (v === 0x22 || v === 0x27) { // '"' or '\'' + quote = v; + state = 4; + } else if (v >= 0x30 && v <= 0x39) { // ['0', '9'] + state = 5; + value += c; + } else { + throw new Error(`Unexpected character: ${c}`); + } + break; + case 4: // Quoted string + if (escaped) { + value += c; + escaped = false; + } else if (v === 0x5c) { // '\\' + escaped = true; + } else if (v !== quote) { + value += c; + } else { + state = 6; + } + break; + case 5: // Number + if (v >= 0x30 && v <= 0x39) { // ['0', '9'] + value += c; + } else if (v === 0x5d) { // ']' + pathArray.push(Number.parseInt(value, 10)); + value = ''; + state = 7; + } else { + throw new Error(`Unexpected character: ${c}`); + } + break; + case 6: // Expecting closing bracket after quoted string + if (v === 0x5d) { // ']' + pathArray.push(value); + value = ''; + state = 7; + } else { + throw new Error(`Unexpected character: ${c}`); + } + break; + case 7: // Expecting . or [ + if (v === 0x5b) { // '[' + state = 3; + } else if (v === 0x2e) { // '.' + state = 1; + } else { + throw new Error(`Unexpected character: ${c}`); + } + break; + } + } + switch (state) { + case 0: + case 7: + break; + case 2: + pathArray.push(value); + value = ''; + break; + default: + throw new Error('Path not terminated correctly'); + } + return pathArray; + } + + static hasProperty(object, property) { + switch (typeof property) { + case 'string': + return ( + typeof object === 'object' && + object !== null && + !Array.isArray(object) && + Object.prototype.hasOwnProperty.call(object, property) + ); + case 'number': + return ( + Array.isArray(object) && + property >= 0 && + property < object.length && + property === Math.floor(property) + ); + default: + return false; + } + } + + static isValidPropertyType(object, property) { + switch (typeof property) { + case 'string': + return ( + typeof object === 'object' && + object !== null && + !Array.isArray(object) + ); + case 'number': + return ( + Array.isArray(object) && + property >= 0 && + property === Math.floor(property) + ); + default: + return false; + } + } +} -- cgit v1.2.3 From 701f73440c661b19cecefeb02ce03dfa9db76fb3 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 8 Mar 2020 21:24:05 -0400 Subject: Add tests --- package.json | 2 +- test/test-object-property-accessor.js | 287 ++++++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 test/test-object-property-accessor.js diff --git a/package.json b/package.json index eb449ea9..23f0eb25 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "scripts": { "test": "npm run test-lint && npm run test-code", "test-lint": "eslint . && node ./test/lint/global-declarations.js", - "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js" + "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js" }, "repository": { "type": "git", diff --git a/test/test-object-property-accessor.js b/test/test-object-property-accessor.js new file mode 100644 index 00000000..69e5dbdb --- /dev/null +++ b/test/test-object-property-accessor.js @@ -0,0 +1,287 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const assert = require('assert'); +const {VM} = require('./yomichan-vm'); + +const vm = new VM({}); +vm.execute('mixed/js/object-property-accessor.js'); +const ObjectPropertyAccessor = vm.get('ObjectPropertyAccessor'); + + +function createTestObject() { + return { + 0: null, + value1: { + value2: {}, + value3: [], + value4: null + }, + value5: [ + {}, + [], + null + ] + }; +} + + +function testGetProperty1() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const data = [ + [[], object], + [['0'], object['0']], + [['value1'], object.value1], + [['value1', 'value2'], object.value1.value2], + [['value1', 'value3'], object.value1.value3], + [['value1', 'value4'], object.value1.value4], + [['value5'], object.value5], + [['value5', 0], object.value5[0]], + [['value5', 1], object.value5[1]], + [['value5', 2], object.value5[2]] + ]; + + for (const [pathArray, expected] of data) { + assert.strictEqual(accessor.getProperty(pathArray), expected); + } +} + +function testGetProperty2() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const data = [ + [0], + ['0', 'invalid'], + ['invalid'], + ['value1', 'invalid'], + ['value1', 'value2', 'invalid'], + ['value1', 'value2', 0], + ['value1', 'value3', 'invalid'], + ['value1', 'value3', 0], + ['value1', 'value4', 'invalid'], + ['value1', 'value4', 0], + ['value5', 'length'], + ['value5', 0, 'invalid'], + ['value5', 0, 0], + ['value5', 1, 'invalid'], + ['value5', 1, 0], + ['value5', 2, 'invalid'], + ['value5', 2, 0], + ['value5', 2.5] + ]; + + for (const pathArray of data) { + assert.throws(() => accessor.getProperty(pathArray)); + } +} + + +function testSetProperty1() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const testValue = {}; + const data = [ + ['0'], + ['value1', 'value2'], + ['value1', 'value3'], + ['value1', 'value4'], + ['value1'], + ['value5', 0], + ['value5', 1], + ['value5', 2], + ['value5'] + ]; + + for (const pathArray of data) { + accessor.setProperty(pathArray, testValue); + assert.strictEqual(accessor.getProperty(pathArray), testValue); + } +} + +function testSetProperty2() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const testValue = {}; + const data = [ + [0], + ['0', 'invalid'], + ['value1', 'value2', 0], + ['value1', 'value3', 'invalid'], + ['value1', 'value4', 'invalid'], + ['value1', 'value4', 0], + ['value5', 1, 'invalid'], + ['value5', 2, 'invalid'], + ['value5', 2, 0], + ['value5', 2.5] + ]; + + for (const pathArray of data) { + assert.throws(() => accessor.setProperty(pathArray, testValue)); + } +} + + +function testGetPathString1() { + const data = [ + [[], ''], + [[0], '[0]'], + [['escape\\'], '["escape\\\\"]'], + [['\'quote\''], '["\'quote\'"]'], + [['"quote"'], '["\\"quote\\""]'], + [['part1', 'part2'], 'part1.part2'], + [['part1', 'part2', 3], 'part1.part2[3]'], + [['part1', 'part2', '3'], 'part1.part2["3"]'], + [['part1', 'part2', '3part'], 'part1.part2["3part"]'], + [['part1', 'part2', '3part', 'part4'], 'part1.part2["3part"].part4'], + [['part1', 'part2', '3part', '4part'], 'part1.part2["3part"]["4part"]'] + ]; + + for (const [pathArray, expected] of data) { + assert.strictEqual(ObjectPropertyAccessor.getPathString(pathArray), expected); + } +} + +function testGetPathString2() { + const data = [ + [1.5], + [null] + ]; + + for (const pathArray of data) { + assert.throws(() => ObjectPropertyAccessor.getPathString(pathArray)); + } +} + + +function testGetPathArray1() { + const data = [ + ['', []], + ['[0]', [0]], + ['["escape\\\\"]', ['escape\\']], + ['["\'quote\'"]', ['\'quote\'']], + ['["\\"quote\\""]', ['"quote"']], + ['part1.part2', ['part1', 'part2']], + ['part1.part2[3]', ['part1', 'part2', 3]], + ['part1.part2["3"]', ['part1', 'part2', '3']], + ['part1.part2[\'3\']', ['part1', 'part2', '3']], + ['part1.part2["3part"]', ['part1', 'part2', '3part']], + ['part1.part2[\'3part\']', ['part1', 'part2', '3part']], + ['part1.part2["3part"].part4', ['part1', 'part2', '3part', 'part4']], + ['part1.part2[\'3part\'].part4', ['part1', 'part2', '3part', 'part4']], + ['part1.part2["3part"]["4part"]', ['part1', 'part2', '3part', '4part']], + ['part1.part2[\'3part\'][\'4part\']', ['part1', 'part2', '3part', '4part']] + ]; + + for (const [pathString, expected] of data) { + vm.assert.deepStrictEqual(ObjectPropertyAccessor.getPathArray(pathString), expected); + } +} + +function testGetPathArray2() { + const data = [ + ['?', 'Unexpected character: ?'], + ['.', 'Unexpected character: .'], + ['0', 'Unexpected character: 0'], + ['part1.[0]', 'Unexpected character: ['], + ['part1?', 'Unexpected character: ?'], + ['[part1]', 'Unexpected character: p'], + ['[0a]', 'Unexpected character: a'], + ['["part1"x]', 'Unexpected character: x'], + ['[\'part1\'x]', 'Unexpected character: x'], + ['["part1"]x', 'Unexpected character: x'], + ['[\'part1\']x', 'Unexpected character: x'], + ['part1..part2', 'Unexpected character: .'], + + ['[', 'Path not terminated correctly'], + ['part1.', 'Path not terminated correctly'], + ['part1[', 'Path not terminated correctly'], + ['part1["', 'Path not terminated correctly'], + ['part1[\'', 'Path not terminated correctly'], + ['part1[""', 'Path not terminated correctly'], + ['part1[\'\'', 'Path not terminated correctly'], + ['part1[0', 'Path not terminated correctly'], + ['part1[0].', 'Path not terminated correctly'] + ]; + + for (const [pathString, message] of data) { + assert.throws(() => ObjectPropertyAccessor.getPathArray(pathString), {message}); + } +} + + +function testHasProperty() { + const data = [ + [{}, 'invalid', false], + [{}, 0, false], + [{valid: 0}, 'valid', true], + [{null: 0}, null, false], + [[], 'invalid', false], + [[], 0, false], + [[0], 0, true], + [[0], null, false], + ['string', 0, false], + ['string', 'length', false], + ['string', null, false] + ]; + + for (const [object, property, expected] of data) { + assert.strictEqual(ObjectPropertyAccessor.hasProperty(object, property), expected); + } +} + +function testIsValidPropertyType() { + const data = [ + [{}, 'invalid', true], + [{}, 0, false], + [{valid: 0}, 'valid', true], + [{null: 0}, null, false], + [[], 'invalid', false], + [[], 0, true], + [[0], 0, true], + [[0], null, false], + ['string', 0, false], + ['string', 'length', false], + ['string', null, false] + ]; + + for (const [object, property, expected] of data) { + assert.strictEqual(ObjectPropertyAccessor.isValidPropertyType(object, property), expected); + } +} + + +function main() { + testGetProperty1(); + testGetProperty2(); + testSetProperty1(); + testSetProperty2(); + testGetPathString1(); + testGetPathString2(); + testGetPathArray1(); + testGetPathArray2(); + testHasProperty(); + testIsValidPropertyType(); +} + + +if (require.main === module) { main(); } -- cgit v1.2.3 From 174a942e07b80b419729dbc9f8832e0b5b2f9b36 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 13 Mar 2020 18:17:29 -0400 Subject: Fix misuse of getPathString --- ext/mixed/js/object-property-accessor.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/mixed/js/object-property-accessor.js b/ext/mixed/js/object-property-accessor.js index 6b5f9678..aedf6dcb 100644 --- a/ext/mixed/js/object-property-accessor.js +++ b/ext/mixed/js/object-property-accessor.js @@ -31,7 +31,7 @@ class ObjectPropertyAccessor { for (let i = 0; i < ii; ++i) { const key = pathArray[i]; if (!ObjectPropertyAccessor.hasProperty(target, key)) { - throw new Error(`Invalid path: ${this.getPathString(pathArray.slice(0, i + 1))}`); + throw new Error(`Invalid path: ${ObjectPropertyAccessor.getPathString(pathArray.slice(0, i + 1))}`); } target = target[key]; } @@ -46,7 +46,7 @@ class ObjectPropertyAccessor { const target = this.getProperty(pathArray, pathArray.length - 1); const key = pathArray[pathArray.length - 1]; if (!ObjectPropertyAccessor.isValidPropertyType(target, key)) { - throw new Error(`Invalid path: ${this.getPathString(pathArray)}`); + throw new Error(`Invalid path: ${ObjectPropertyAccessor.getPathString(pathArray)}`); } if (this._setter !== null) { -- cgit v1.2.3 From 7e1e7d59cd8f076b8ee07c354ed11724364cc9fa Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 13 Mar 2020 18:17:40 -0400 Subject: Add error message checking --- test/test-object-property-accessor.js | 72 +++++++++++++++++------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/test/test-object-property-accessor.js b/test/test-object-property-accessor.js index 69e5dbdb..fb912b21 100644 --- a/test/test-object-property-accessor.js +++ b/test/test-object-property-accessor.js @@ -68,28 +68,28 @@ function testGetProperty2() { const accessor = new ObjectPropertyAccessor(object); const data = [ - [0], - ['0', 'invalid'], - ['invalid'], - ['value1', 'invalid'], - ['value1', 'value2', 'invalid'], - ['value1', 'value2', 0], - ['value1', 'value3', 'invalid'], - ['value1', 'value3', 0], - ['value1', 'value4', 'invalid'], - ['value1', 'value4', 0], - ['value5', 'length'], - ['value5', 0, 'invalid'], - ['value5', 0, 0], - ['value5', 1, 'invalid'], - ['value5', 1, 0], - ['value5', 2, 'invalid'], - ['value5', 2, 0], - ['value5', 2.5] + [[0], 'Invalid path: [0]'], + [['0', 'invalid'], 'Invalid path: ["0"].invalid'], + [['invalid'], 'Invalid path: invalid'], + [['value1', 'invalid'], 'Invalid path: value1.invalid'], + [['value1', 'value2', 'invalid'], 'Invalid path: value1.value2.invalid'], + [['value1', 'value2', 0], 'Invalid path: value1.value2[0]'], + [['value1', 'value3', 'invalid'], 'Invalid path: value1.value3.invalid'], + [['value1', 'value3', 0], 'Invalid path: value1.value3[0]'], + [['value1', 'value4', 'invalid'], 'Invalid path: value1.value4.invalid'], + [['value1', 'value4', 0], 'Invalid path: value1.value4[0]'], + [['value5', 'length'], 'Invalid path: value5.length'], + [['value5', 0, 'invalid'], 'Invalid path: value5[0].invalid'], + [['value5', 0, 0], 'Invalid path: value5[0][0]'], + [['value5', 1, 'invalid'], 'Invalid path: value5[1].invalid'], + [['value5', 1, 0], 'Invalid path: value5[1][0]'], + [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], + [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2.5], 'Invalid index'] ]; - for (const pathArray of data) { - assert.throws(() => accessor.getProperty(pathArray)); + for (const [pathArray, message] of data) { + assert.throws(() => accessor.getProperty(pathArray), {message}); } } @@ -123,20 +123,20 @@ function testSetProperty2() { const testValue = {}; const data = [ - [0], - ['0', 'invalid'], - ['value1', 'value2', 0], - ['value1', 'value3', 'invalid'], - ['value1', 'value4', 'invalid'], - ['value1', 'value4', 0], - ['value5', 1, 'invalid'], - ['value5', 2, 'invalid'], - ['value5', 2, 0], - ['value5', 2.5] + [[0], 'Invalid path: [0]'], + [['0', 'invalid'], 'Invalid path: ["0"].invalid'], + [['value1', 'value2', 0], 'Invalid path: value1.value2[0]'], + [['value1', 'value3', 'invalid'], 'Invalid path: value1.value3.invalid'], + [['value1', 'value4', 'invalid'], 'Invalid path: value1.value4.invalid'], + [['value1', 'value4', 0], 'Invalid path: value1.value4[0]'], + [['value5', 1, 'invalid'], 'Invalid path: value5[1].invalid'], + [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], + [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2.5], 'Invalid index'] ]; - for (const pathArray of data) { - assert.throws(() => accessor.setProperty(pathArray, testValue)); + for (const [pathArray, message] of data) { + assert.throws(() => accessor.setProperty(pathArray, testValue), {message}); } } @@ -163,12 +163,12 @@ function testGetPathString1() { function testGetPathString2() { const data = [ - [1.5], - [null] + [[1.5], 'Invalid index'], + [[null], 'Invalid type: object'] ]; - for (const pathArray of data) { - assert.throws(() => ObjectPropertyAccessor.getPathString(pathArray)); + for (const [pathArray, message] of data) { + assert.throws(() => ObjectPropertyAccessor.getPathString(pathArray), {message}); } } -- cgit v1.2.3 From a267799cd91e6d7e23395abd110f2348413cad58 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 13 Mar 2020 18:20:22 -0400 Subject: Add some extra tests --- test/test-object-property-accessor.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test-object-property-accessor.js b/test/test-object-property-accessor.js index fb912b21..47d2e451 100644 --- a/test/test-object-property-accessor.js +++ b/test/test-object-property-accessor.js @@ -85,6 +85,7 @@ function testGetProperty2() { [['value5', 1, 0], 'Invalid path: value5[1][0]'], [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2, 0, 'invalid'], 'Invalid path: value5[2][0]'], [['value5', 2.5], 'Invalid index'] ]; @@ -132,6 +133,7 @@ function testSetProperty2() { [['value5', 1, 'invalid'], 'Invalid path: value5[1].invalid'], [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2, 0, 'invalid'], 'Invalid path: value5[2][0]'], [['value5', 2.5], 'Invalid index'] ]; -- cgit v1.2.3 From 4b699a6b46869d5766f331d863aea38374ece50a Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 15 Mar 2020 12:26:38 -0400 Subject: Change integer state IDs to strings --- ext/mixed/js/object-property-accessor.js | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/ext/mixed/js/object-property-accessor.js b/ext/mixed/js/object-property-accessor.js index aedf6dcb..108afc0d 100644 --- a/ext/mixed/js/object-property-accessor.js +++ b/ext/mixed/js/object-property-accessor.js @@ -89,32 +89,32 @@ class ObjectPropertyAccessor { static getPathArray(pathString) { const pathArray = []; - let state = 0; + let state = 'empty'; let quote = 0; let value = ''; let escaped = false; for (const c of pathString) { const v = c.codePointAt(0); switch (state) { - case 0: // Empty - case 1: // Expecting identifier start + case 'empty': // Empty + case 'id-start': // Expecting identifier start if (v === 0x5b) { // '[' - if (state === 1) { + if (state === 'id-start') { throw new Error(`Unexpected character: ${c}`); } - state = 3; + state = 'open-bracket'; } else if ( (v >= 0x41 && v <= 0x5a) || // ['A', 'Z'] (v >= 0x61 && v <= 0x7a) || // ['a', 'z'] v === 0x5f // '_' ) { - state = 2; + state = 'id'; value += c; } else { throw new Error(`Unexpected character: ${c}`); } break; - case 2: // Identifier + case 'id': // Identifier if ( (v >= 0x41 && v <= 0x5a) || // ['A', 'Z'] (v >= 0x61 && v <= 0x7a) || // ['a', 'z'] @@ -125,27 +125,27 @@ class ObjectPropertyAccessor { } else if (v === 0x5b) { // '[' pathArray.push(value); value = ''; - state = 3; + state = 'open-bracket'; } else if (v === 0x2e) { // '.' pathArray.push(value); value = ''; - state = 1; + state = 'id-start'; } else { throw new Error(`Unexpected character: ${c}`); } break; - case 3: // Open bracket + case 'open-bracket': // Open bracket if (v === 0x22 || v === 0x27) { // '"' or '\'' quote = v; - state = 4; + state = 'string'; } else if (v >= 0x30 && v <= 0x39) { // ['0', '9'] - state = 5; + state = 'number'; value += c; } else { throw new Error(`Unexpected character: ${c}`); } break; - case 4: // Quoted string + case 'string': // Quoted string if (escaped) { value += c; escaped = false; @@ -154,34 +154,34 @@ class ObjectPropertyAccessor { } else if (v !== quote) { value += c; } else { - state = 6; + state = 'close-bracket'; } break; - case 5: // Number + case 'number': // Number if (v >= 0x30 && v <= 0x39) { // ['0', '9'] value += c; } else if (v === 0x5d) { // ']' pathArray.push(Number.parseInt(value, 10)); value = ''; - state = 7; + state = 'next'; } else { throw new Error(`Unexpected character: ${c}`); } break; - case 6: // Expecting closing bracket after quoted string + case 'close-bracket': // Expecting closing bracket after quoted string if (v === 0x5d) { // ']' pathArray.push(value); value = ''; - state = 7; + state = 'next'; } else { throw new Error(`Unexpected character: ${c}`); } break; - case 7: // Expecting . or [ + case 'next': // Expecting . or [ if (v === 0x5b) { // '[' - state = 3; + state = 'open-bracket'; } else if (v === 0x2e) { // '.' - state = 1; + state = 'id-start'; } else { throw new Error(`Unexpected character: ${c}`); } @@ -189,10 +189,10 @@ class ObjectPropertyAccessor { } } switch (state) { - case 0: - case 7: + case 'empty': + case 'next': break; - case 2: + case 'id': pathArray.push(value); value = ''; break; -- cgit v1.2.3 From 6182b53142cb99d7f52cf99a3c25bb4e688ba447 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 15 Mar 2020 17:33:39 -0400 Subject: Update EOL definition for handlebars files --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 2000050e..cb9d99bb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -tmpl/*.html text eol=lf +*.handlebars text eol=lf -- cgit v1.2.3 From 8585b8216bc4c4314ec08c8c6afeb0c82397d579 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2020 02:24:02 +0000 Subject: Bump acorn from 6.4.0 to 6.4.1 Bumps [acorn](https://github.com/acornjs/acorn) from 6.4.0 to 6.4.1. - [Release notes](https://github.com/acornjs/acorn/releases) - [Commits](https://github.com/acornjs/acorn/compare/6.4.0...6.4.1) Signed-off-by: dependabot[bot] --- package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/package-lock.json b/package-lock.json index 88ba43f6..a09abcd0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -31,9 +31,9 @@ "dev": true }, "acorn": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.1.0.tgz", - "integrity": "sha512-kL5CuoXA/dgxlBbVrflsflzQ3PAas7RYZB52NOm/6839iVYJgKMJ3cQJD+t2i5+qFa8h3MDpEOJiS64E8JLnSQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.1.1.tgz", + "integrity": "sha512-add7dgA5ppRPxCFJoAGfMDi7PIBXq1RtGo7BhbLaxwrXPOmw8gq48Y9ozT01hUKy9byMjlR20EJhu5zlkErEkg==", "dev": true }, "acorn-globals": { @@ -47,9 +47,9 @@ }, "dependencies": { "acorn": { - "version": "6.4.0", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.4.0.tgz", - "integrity": "sha512-gac8OEcQ2Li1dxIEWGZzsp2BitJxwkwcOm0zHAJLcPJaVvm58FRnk6RkuLRpU1EujipU2ZFODv2P9DLMfnV8mw==", + "version": "6.4.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.4.1.tgz", + "integrity": "sha512-ZVA9k326Nwrj3Cj9jlh3wGFutC2ZornPNARZwsNYqQYgN0EsV2d53w5RN/co65Ohn4sUAUtb1rSUAOD6XN9idA==", "dev": true } } -- cgit v1.2.3 From 0f15cca2dff995218a52ff7066008da4cd414e3f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Wed, 11 Mar 2020 20:33:01 -0400 Subject: Convert Japanese utilities to a module-like style --- ext/bg/js/audio-uri-builder.js | 4 +- ext/bg/js/backend.js | 18 +- ext/bg/js/clipboard-monitor.js | 4 +- ext/bg/js/handlebars.js | 9 +- ext/bg/js/japanese.js | 770 +++++++++++++++++++++-------------------- ext/bg/js/translator.js | 24 +- 6 files changed, 421 insertions(+), 408 deletions(-) diff --git a/ext/bg/js/audio-uri-builder.js b/ext/bg/js/audio-uri-builder.js index 499c3441..158006bb 100644 --- a/ext/bg/js/audio-uri-builder.js +++ b/ext/bg/js/audio-uri-builder.js @@ -17,7 +17,7 @@ */ /* global - * jpIsStringEntirelyKana + * jp */ class AudioUriBuilder { @@ -66,7 +66,7 @@ class AudioUriBuilder { let kana = definition.reading; let kanji = definition.expression; - if (!kana && jpIsStringEntirelyKana(kanji)) { + if (!kana && jp.isStringEntirelyKana(kanji)) { kana = kanji; kanji = null; } diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index 978c5a4a..b217e64d 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -32,9 +32,7 @@ * dictEnabledSet * dictTermsSort * handlebarsRenderDynamic - * jpConvertReading - * jpDistributeFuriganaInflected - * jpKatakanaToHiragana + * jp * optionsLoad * optionsSave * profileConditionsDescriptor @@ -402,13 +400,13 @@ class Backend { dictTermsSort(definitions); const {expression, reading} = definitions[0]; const source = text.substring(0, sourceLength); - for (const {text: text2, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { - const reading2 = jpConvertReading(text2, furigana, options.parsing.readingMode); + for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); term.push({text: text2, reading: reading2}); } text = text.substring(source.length); } else { - const reading = jpConvertReading(text[0], null, options.parsing.readingMode); + const reading = jp.convertReading(text[0], null, options.parsing.readingMode); term.push({text: text[0], reading}); text = text.substring(1); } @@ -427,16 +425,16 @@ class Backend { for (const {expression, reading, source} of parsedLine) { const term = []; if (expression !== null && reading !== null) { - for (const {text: text2, furigana} of jpDistributeFuriganaInflected( + for (const {text: text2, furigana} of jp.distributeFuriganaInflected( expression, - jpKatakanaToHiragana(reading), + jp.convertKatakanaToHiragana(reading), source )) { - const reading2 = jpConvertReading(text2, furigana, options.parsing.readingMode); + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); term.push({text: text2, reading: reading2}); } } else { - const reading2 = jpConvertReading(source, null, options.parsing.readingMode); + const reading2 = jp.convertReading(source, null, options.parsing.readingMode); term.push({text: source, reading: reading2}); } result.push(term); diff --git a/ext/bg/js/clipboard-monitor.js b/ext/bg/js/clipboard-monitor.js index 9a881f57..c67525fc 100644 --- a/ext/bg/js/clipboard-monitor.js +++ b/ext/bg/js/clipboard-monitor.js @@ -17,7 +17,7 @@ */ /* global - * jpIsStringPartiallyJapanese + * jp */ class ClipboardMonitor extends EventDispatcher { @@ -54,7 +54,7 @@ class ClipboardMonitor extends EventDispatcher { text !== this._previousText ) { this._previousText = text; - if (jpIsStringPartiallyJapanese(text)) { + if (jp.isStringPartiallyJapanese(text)) { this.trigger('change', {text}); } } diff --git a/ext/bg/js/handlebars.js b/ext/bg/js/handlebars.js index e3ce6bd0..5fda5baa 100644 --- a/ext/bg/js/handlebars.js +++ b/ext/bg/js/handlebars.js @@ -18,8 +18,7 @@ /* global * Handlebars - * jpDistributeFurigana - * jpIsCodePointKanji + * jp */ function handlebarsEscape(text) { @@ -33,7 +32,7 @@ function handlebarsDumpObject(options) { function handlebarsFurigana(options) { const definition = options.fn(this); - const segs = jpDistributeFurigana(definition.expression, definition.reading); + const segs = jp.distributeFurigana(definition.expression, definition.reading); let result = ''; for (const seg of segs) { @@ -49,7 +48,7 @@ function handlebarsFurigana(options) { function handlebarsFuriganaPlain(options) { const definition = options.fn(this); - const segs = jpDistributeFurigana(definition.expression, definition.reading); + const segs = jp.distributeFurigana(definition.expression, definition.reading); let result = ''; for (const seg of segs) { @@ -66,7 +65,7 @@ function handlebarsFuriganaPlain(options) { function handlebarsKanjiLinks(options) { let result = ''; for (const c of options.fn(this)) { - if (jpIsCodePointKanji(c.codePointAt(0))) { + if (jp.isCodePointKanji(c.codePointAt(0))) { result += `${c}`; } else { result += c; diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 3b37754d..182d5b98 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -20,439 +20,461 @@ * wanakana */ -const JP_HALFWIDTH_KATAKANA_MAPPING = new Map([ - ['ヲ', 'ヲヺ-'], - ['ァ', 'ァ--'], - ['ィ', 'ィ--'], - ['ゥ', 'ゥ--'], - ['ェ', 'ェ--'], - ['ォ', 'ォ--'], - ['ャ', 'ャ--'], - ['ュ', 'ュ--'], - ['ョ', 'ョ--'], - ['ッ', 'ッ--'], - ['ー', 'ー--'], - ['ア', 'ア--'], - ['イ', 'イ--'], - ['ウ', 'ウヴ-'], - ['エ', 'エ--'], - ['オ', 'オ--'], - ['カ', 'カガ-'], - ['キ', 'キギ-'], - ['ク', 'クグ-'], - ['ケ', 'ケゲ-'], - ['コ', 'コゴ-'], - ['サ', 'サザ-'], - ['シ', 'シジ-'], - ['ス', 'スズ-'], - ['セ', 'セゼ-'], - ['ソ', 'ソゾ-'], - ['タ', 'タダ-'], - ['チ', 'チヂ-'], - ['ツ', 'ツヅ-'], - ['テ', 'テデ-'], - ['ト', 'トド-'], - ['ナ', 'ナ--'], - ['ニ', 'ニ--'], - ['ヌ', 'ヌ--'], - ['ネ', 'ネ--'], - ['ノ', 'ノ--'], - ['ハ', 'ハバパ'], - ['ヒ', 'ヒビピ'], - ['フ', 'フブプ'], - ['ヘ', 'ヘベペ'], - ['ホ', 'ホボポ'], - ['マ', 'マ--'], - ['ミ', 'ミ--'], - ['ム', 'ム--'], - ['メ', 'メ--'], - ['モ', 'モ--'], - ['ヤ', 'ヤ--'], - ['ユ', 'ユ--'], - ['ヨ', 'ヨ--'], - ['ラ', 'ラ--'], - ['リ', 'リ--'], - ['ル', 'ル--'], - ['レ', 'レ--'], - ['ロ', 'ロ--'], - ['ワ', 'ワ--'], - ['ン', 'ン--'] -]); - -const JP_HIRAGANA_RANGE = [0x3040, 0x309f]; -const JP_KATAKANA_RANGE = [0x30a0, 0x30ff]; -const JP_KANA_RANGES = [JP_HIRAGANA_RANGE, JP_KATAKANA_RANGE]; - -const JP_CJK_COMMON_RANGE = [0x4e00, 0x9fff]; -const JP_CJK_RARE_RANGE = [0x3400, 0x4dbf]; -const JP_CJK_RANGES = [JP_CJK_COMMON_RANGE, JP_CJK_RARE_RANGE]; - -const JP_ITERATION_MARK_CHAR_CODE = 0x3005; - -// Japanese character ranges, roughly ordered in order of expected frequency -const JP_JAPANESE_RANGES = [ - JP_HIRAGANA_RANGE, - JP_KATAKANA_RANGE, - - JP_CJK_COMMON_RANGE, - JP_CJK_RARE_RANGE, - - [0xff66, 0xff9f], // Halfwidth katakana - - [0x30fb, 0x30fc], // Katakana punctuation - [0xff61, 0xff65], // Kana punctuation - [0x3000, 0x303f], // CJK punctuation - - [0xff10, 0xff19], // Fullwidth numbers - [0xff21, 0xff3a], // Fullwidth upper case Latin letters - [0xff41, 0xff5a], // Fullwidth lower case Latin letters - - [0xff01, 0xff0f], // Fullwidth punctuation 1 - [0xff1a, 0xff1f], // Fullwidth punctuation 2 - [0xff3b, 0xff3f], // Fullwidth punctuation 3 - [0xff5b, 0xff60], // Fullwidth punctuation 4 - [0xffe0, 0xffee] // Currency markers -]; - - -// Helper functions - -function _jpIsCodePointInRanges(codePoint, ranges) { - for (const [min, max] of ranges) { - if (codePoint >= min && codePoint <= max) { - return true; - } +const jp = (() => { + const HALFWIDTH_KATAKANA_MAPPING = new Map([ + ['ヲ', 'ヲヺ-'], + ['ァ', 'ァ--'], + ['ィ', 'ィ--'], + ['ゥ', 'ゥ--'], + ['ェ', 'ェ--'], + ['ォ', 'ォ--'], + ['ャ', 'ャ--'], + ['ュ', 'ュ--'], + ['ョ', 'ョ--'], + ['ッ', 'ッ--'], + ['ー', 'ー--'], + ['ア', 'ア--'], + ['イ', 'イ--'], + ['ウ', 'ウヴ-'], + ['エ', 'エ--'], + ['オ', 'オ--'], + ['カ', 'カガ-'], + ['キ', 'キギ-'], + ['ク', 'クグ-'], + ['ケ', 'ケゲ-'], + ['コ', 'コゴ-'], + ['サ', 'サザ-'], + ['シ', 'シジ-'], + ['ス', 'スズ-'], + ['セ', 'セゼ-'], + ['ソ', 'ソゾ-'], + ['タ', 'タダ-'], + ['チ', 'チヂ-'], + ['ツ', 'ツヅ-'], + ['テ', 'テデ-'], + ['ト', 'トド-'], + ['ナ', 'ナ--'], + ['ニ', 'ニ--'], + ['ヌ', 'ヌ--'], + ['ネ', 'ネ--'], + ['ノ', 'ノ--'], + ['ハ', 'ハバパ'], + ['ヒ', 'ヒビピ'], + ['フ', 'フブプ'], + ['ヘ', 'ヘベペ'], + ['ホ', 'ホボポ'], + ['マ', 'マ--'], + ['ミ', 'ミ--'], + ['ム', 'ム--'], + ['メ', 'メ--'], + ['モ', 'モ--'], + ['ヤ', 'ヤ--'], + ['ユ', 'ユ--'], + ['ヨ', 'ヨ--'], + ['ラ', 'ラ--'], + ['リ', 'リ--'], + ['ル', 'ル--'], + ['レ', 'レ--'], + ['ロ', 'ロ--'], + ['ワ', 'ワ--'], + ['ン', 'ン--'] + ]); + + const HIRAGANA_RANGE = [0x3040, 0x309f]; + const KATAKANA_RANGE = [0x30a0, 0x30ff]; + const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; + + const CJK_COMMON_RANGE = [0x4e00, 0x9fff]; + const CJK_RARE_RANGE = [0x3400, 0x4dbf]; + const CJK_RANGES = [CJK_COMMON_RANGE, CJK_RARE_RANGE]; + + const ITERATION_MARK_CODE_POINT = 0x3005; + + // Japanese character ranges, roughly ordered in order of expected frequency + const JAPANESE_RANGES = [ + HIRAGANA_RANGE, + KATAKANA_RANGE, + + CJK_COMMON_RANGE, + CJK_RARE_RANGE, + + [0xff66, 0xff9f], // Halfwidth katakana + + [0x30fb, 0x30fc], // Katakana punctuation + [0xff61, 0xff65], // Kana punctuation + [0x3000, 0x303f], // CJK punctuation + + [0xff10, 0xff19], // Fullwidth numbers + [0xff21, 0xff3a], // Fullwidth upper case Latin letters + [0xff41, 0xff5a], // Fullwidth lower case Latin letters + + [0xff01, 0xff0f], // Fullwidth punctuation 1 + [0xff1a, 0xff1f], // Fullwidth punctuation 2 + [0xff3b, 0xff3f], // Fullwidth punctuation 3 + [0xff5b, 0xff60], // Fullwidth punctuation 4 + [0xffe0, 0xffee] // Currency markers + ]; + + + // Character code testing functions + + function isCodePointKanji(codePoint) { + return isCodePointInRanges(codePoint, CJK_RANGES); } - return false; -} - - -// Character code testing functions -function jpIsCodePointKanji(codePoint) { - return _jpIsCodePointInRanges(codePoint, JP_CJK_RANGES); -} + function isCodePointKana(codePoint) { + return isCodePointInRanges(codePoint, KANA_RANGES); + } -function jpIsCodePointKana(codePoint) { - return _jpIsCodePointInRanges(codePoint, JP_KANA_RANGES); -} + function isCodePointJapanese(codePoint) { + return isCodePointInRanges(codePoint, JAPANESE_RANGES); + } -function jpIsCodePointJapanese(codePoint) { - return _jpIsCodePointInRanges(codePoint, JP_JAPANESE_RANGES); -} + function isCodePointInRanges(codePoint, ranges) { + for (const [min, max] of ranges) { + if (codePoint >= min && codePoint <= max) { + return true; + } + } + return false; + } -// String testing functions + // String testing functions -function jpIsStringEntirelyKana(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (!jpIsCodePointKana(c.codePointAt(0))) { - return false; + function isStringEntirelyKana(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (!isCodePointKana(c.codePointAt(0))) { + return false; + } } + return true; } - return true; -} - -function jpIsStringPartiallyJapanese(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (jpIsCodePointJapanese(c.codePointAt(0))) { - return true; + + function isStringPartiallyJapanese(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (isCodePointJapanese(c.codePointAt(0))) { + return true; + } } + return false; } - return false; -} -// Conversion functions + // Conversion functions -function jpKatakanaToHiragana(text) { - let result = ''; - for (const c of text) { - if (wanakana.isKatakana(c)) { - result += wanakana.toHiragana(c); - } else { - result += c; + function convertKatakanaToHiragana(text) { + let result = ''; + for (const c of text) { + if (wanakana.isKatakana(c)) { + result += wanakana.toHiragana(c); + } else { + result += c; + } } - } - return result; -} - -function jpHiraganaToKatakana(text) { - let result = ''; - for (const c of text) { - if (wanakana.isHiragana(c)) { - result += wanakana.toKatakana(c); - } else { - result += c; - } + return result; } - return result; -} - -function jpToRomaji(text) { - return wanakana.toRomaji(text); -} - -function jpConvertReading(expressionFragment, readingFragment, readingMode) { - switch (readingMode) { - case 'hiragana': - return jpKatakanaToHiragana(readingFragment || ''); - case 'katakana': - return jpHiraganaToKatakana(readingFragment || ''); - case 'romaji': - if (readingFragment) { - return jpToRomaji(readingFragment); + function convertHiraganaToKatakana(text) { + let result = ''; + for (const c of text) { + if (wanakana.isHiragana(c)) { + result += wanakana.toKatakana(c); } else { - if (jpIsStringEntirelyKana(expressionFragment)) { - return jpToRomaji(expressionFragment); - } + result += c; } - return readingFragment; - case 'none': - return null; - default: - return readingFragment; + } + + return result; } -} -function jpDistributeFurigana(expression, reading) { - const fallback = [{furigana: reading, text: expression}]; - if (!reading) { - return fallback; + function convertToRomaji(text) { + return wanakana.toRomaji(text); } - let isAmbiguous = false; - const segmentize = (reading2, groups) => { - if (groups.length === 0 || isAmbiguous) { - return []; + function convertReading(expressionFragment, readingFragment, readingMode) { + switch (readingMode) { + case 'hiragana': + return convertKatakanaToHiragana(readingFragment || ''); + case 'katakana': + return convertHiraganaToKatakana(readingFragment || ''); + case 'romaji': + if (readingFragment) { + return convertToRomaji(readingFragment); + } else { + if (isStringEntirelyKana(expressionFragment)) { + return convertToRomaji(expressionFragment); + } + } + return readingFragment; + case 'none': + return null; + default: + return readingFragment; } + } - const group = groups[0]; - if (group.mode === 'kana') { - if (jpKatakanaToHiragana(reading2).startsWith(jpKatakanaToHiragana(group.text))) { - const readingLeft = reading2.substring(group.text.length); - const segs = segmentize(readingLeft, groups.splice(1)); - if (segs) { - return [{text: group.text}].concat(segs); - } + function convertNumericTofullWidth(text) { + let result = ''; + for (const char of text) { + let c = char.codePointAt(0); + if (c >= 0x30 && c <= 0x39) { // ['0', '9'] + c += 0xff10 - 0x30; // 0xff10 = '0' full width + result += String.fromCodePoint(c); + } else { + result += char; } - } else { - let foundSegments = null; - for (let i = reading2.length; i >= group.text.length; --i) { - const readingUsed = reading2.substring(0, i); - const readingLeft = reading2.substring(i); - const segs = segmentize(readingLeft, groups.slice(1)); - if (segs) { - if (foundSegments !== null) { - // more than one way to segmentize the tail, mark as ambiguous - isAmbiguous = true; - return null; - } - foundSegments = [{text: group.text, furigana: readingUsed}].concat(segs); - } - // there is only one way to segmentize the last non-kana group - if (groups.length === 1) { + } + return result; + } + + function convertHalfWidthKanaToFullWidth(text, sourceMapping) { + let result = ''; + const hasSourceMapping = Array.isArray(sourceMapping); + + // This function is safe to use charCodeAt instead of codePointAt, since all + // the relevant characters are represented with a single UTF-16 character code. + for (let i = 0, ii = text.length; i < ii; ++i) { + const c = text[i]; + const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c); + if (typeof mapping !== 'string') { + result += c; + continue; + } + + let index = 0; + switch (text.charCodeAt(i + 1)) { + case 0xff9e: // dakuten + index = 1; + break; + case 0xff9f: // handakuten + index = 2; break; + } + + let c2 = mapping[index]; + if (index > 0) { + if (c2 === '-') { // invalid + index = 0; + c2 = mapping[0]; + } else { + ++i; } } - return foundSegments; - } - }; - const groups = []; - let modePrev = null; - for (const c of expression) { - const codePoint = c.codePointAt(0); - const modeCurr = jpIsCodePointKanji(codePoint) || codePoint === JP_ITERATION_MARK_CHAR_CODE ? 'kanji' : 'kana'; - if (modeCurr === modePrev) { - groups[groups.length - 1].text += c; - } else { - groups.push({mode: modeCurr, text: c}); - modePrev = modeCurr; + if (hasSourceMapping && index > 0) { + index = result.length; + const v = sourceMapping.splice(index + 1, 1)[0]; + sourceMapping[index] += v; + } + result += c2; } - } - - const segments = segmentize(reading, groups); - if (segments && !isAmbiguous) { - return segments; - } - return fallback; -} - -function jpDistributeFuriganaInflected(expression, reading, source) { - const output = []; - - let stemLength = 0; - const shortest = Math.min(source.length, expression.length); - const sourceHiragana = jpKatakanaToHiragana(source); - const expressionHiragana = jpKatakanaToHiragana(expression); - while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) { - ++stemLength; - } - const offset = source.length - stemLength; - - const stemExpression = source.substring(0, source.length - offset); - const stemReading = reading.substring( - 0, - offset === 0 ? reading.length : reading.length - expression.length + stemLength - ); - for (const segment of jpDistributeFurigana(stemExpression, stemReading)) { - output.push(segment); - } - if (stemLength !== source.length) { - output.push({text: source.substring(stemLength)}); + return result; } - return output; -} - -function jpConvertHalfWidthKanaToFullWidth(text, sourceMapping) { - let result = ''; - const hasSourceMapping = Array.isArray(sourceMapping); - - // This function is safe to use charCodeAt instead of codePointAt, since all - // the relevant characters are represented with a single UTF-16 character code. - for (let i = 0, ii = text.length; i < ii; ++i) { - const c = text[i]; - const mapping = JP_HALFWIDTH_KATAKANA_MAPPING.get(c); - if (typeof mapping !== 'string') { - result += c; - continue; - } + function convertAlphabeticToKana(text, sourceMapping) { + let part = ''; + let result = ''; + const ii = text.length; - let index = 0; - switch (text.charCodeAt(i + 1)) { - case 0xff9e: // dakuten - index = 1; - break; - case 0xff9f: // handakuten - index = 2; - break; + if (sourceMapping.length === ii) { + sourceMapping.length = ii; + sourceMapping.fill(1); } - let c2 = mapping[index]; - if (index > 0) { - if (c2 === '-') { // invalid - index = 0; - c2 = mapping[0]; + for (const char of text) { + // Note: 0x61 is the character code for 'a' + let c = char.codePointAt(0); + if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] + c += (0x61 - 0x41); + } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] + // NOP; c += (0x61 - 0x61); + } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth + c += (0x61 - 0xff21); + } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth + c += (0x61 - 0xff41); + } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash + c = 0x2d; // '-' } else { - ++i; + if (part.length > 0) { + result += convertAlphabeticPartToKana(part, sourceMapping, result.length); + part = ''; + } + result += char; + continue; } + part += String.fromCodePoint(c); } - if (hasSourceMapping && index > 0) { - index = result.length; - const v = sourceMapping.splice(index + 1, 1)[0]; - sourceMapping[index] += v; + if (part.length > 0) { + result += convertAlphabeticPartToKana(part, sourceMapping, result.length); } - result += c2; + return result; } - return result; -} - -function jpConvertNumericTofullWidth(text) { - let result = ''; - for (const char of text) { - let c = char.codePointAt(0); - if (c >= 0x30 && c <= 0x39) { // ['0', '9'] - c += 0xff10 - 0x30; // 0xff10 = '0' full width - result += String.fromCodePoint(c); - } else { - result += char; - } - } - return result; -} + function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) { + const result = wanakana.toHiragana(text); + + // Generate source mapping + if (Array.isArray(sourceMapping)) { + if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; } + let i = 0; + let resultPos = 0; + const ii = text.length; + while (i < ii) { + // Find smallest matching substring + let iNext = i + 1; + let resultPosNext = result.length; + while (iNext < ii) { + const t = wanakana.toHiragana(text.substring(0, iNext)); + if (t === result.substring(0, t.length)) { + resultPosNext = t.length; + break; + } + ++iNext; + } -function jpConvertAlphabeticToKana(text, sourceMapping) { - let part = ''; - let result = ''; - const ii = text.length; + // Merge characters + const removals = iNext - i - 1; + if (removals > 0) { + let sum = 0; + const vs = sourceMapping.splice(sourceMappingStart + 1, removals); + for (const v of vs) { sum += v; } + sourceMapping[sourceMappingStart] += sum; + } + ++sourceMappingStart; - if (sourceMapping.length === ii) { - sourceMapping.length = ii; - sourceMapping.fill(1); - } + // Empty elements + const additions = resultPosNext - resultPos - 1; + for (let j = 0; j < additions; ++j) { + sourceMapping.splice(sourceMappingStart, 0, 0); + ++sourceMappingStart; + } - for (const char of text) { - // Note: 0x61 is the character code for 'a' - let c = char.codePointAt(0); - if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] - c += (0x61 - 0x41); - } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] - // NOP; c += (0x61 - 0x61); - } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth - c += (0x61 - 0xff21); - } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth - c += (0x61 - 0xff41); - } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash - c = 0x2d; // '-' - } else { - if (part.length > 0) { - result += jpToHiragana(part, sourceMapping, result.length); - part = ''; + i = iNext; + resultPos = resultPosNext; } - result += char; - continue; } - part += String.fromCodePoint(c); - } - if (part.length > 0) { - result += jpToHiragana(part, sourceMapping, result.length); + return result; } - return result; -} -function jpToHiragana(text, sourceMapping, sourceMappingStart) { - const result = wanakana.toHiragana(text); - // Generate source mapping - if (Array.isArray(sourceMapping)) { - if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; } - let i = 0; - let resultPos = 0; - const ii = text.length; - while (i < ii) { - // Find smallest matching substring - let iNext = i + 1; - let resultPosNext = result.length; - while (iNext < ii) { - const t = wanakana.toHiragana(text.substring(0, iNext)); - if (t === result.substring(0, t.length)) { - resultPosNext = t.length; - break; - } - ++iNext; - } + // Furigana distribution - // Merge characters - const removals = iNext - i - 1; - if (removals > 0) { - let sum = 0; - const vs = sourceMapping.splice(sourceMappingStart + 1, removals); - for (const v of vs) { sum += v; } - sourceMapping[sourceMappingStart] += sum; + function distributeFurigana(expression, reading) { + const fallback = [{furigana: reading, text: expression}]; + if (!reading) { + return fallback; + } + + let isAmbiguous = false; + const segmentize = (reading2, groups) => { + if (groups.length === 0 || isAmbiguous) { + return []; } - ++sourceMappingStart; - // Empty elements - const additions = resultPosNext - resultPos - 1; - for (let j = 0; j < additions; ++j) { - sourceMapping.splice(sourceMappingStart, 0, 0); - ++sourceMappingStart; + const group = groups[0]; + if (group.mode === 'kana') { + if (convertKatakanaToHiragana(reading2).startsWith(convertKatakanaToHiragana(group.text))) { + const readingLeft = reading2.substring(group.text.length); + const segs = segmentize(readingLeft, groups.splice(1)); + if (segs) { + return [{text: group.text}].concat(segs); + } + } + } else { + let foundSegments = null; + for (let i = reading2.length; i >= group.text.length; --i) { + const readingUsed = reading2.substring(0, i); + const readingLeft = reading2.substring(i); + const segs = segmentize(readingLeft, groups.slice(1)); + if (segs) { + if (foundSegments !== null) { + // more than one way to segmentize the tail, mark as ambiguous + isAmbiguous = true; + return null; + } + foundSegments = [{text: group.text, furigana: readingUsed}].concat(segs); + } + // there is only one way to segmentize the last non-kana group + if (groups.length === 1) { + break; + } + } + return foundSegments; + } + }; + + const groups = []; + let modePrev = null; + for (const c of expression) { + const codePoint = c.codePointAt(0); + const modeCurr = isCodePointKanji(codePoint) || codePoint === ITERATION_MARK_CODE_POINT ? 'kanji' : 'kana'; + if (modeCurr === modePrev) { + groups[groups.length - 1].text += c; + } else { + groups.push({mode: modeCurr, text: c}); + modePrev = modeCurr; } + } + + const segments = segmentize(reading, groups); + if (segments && !isAmbiguous) { + return segments; + } + return fallback; + } + + function distributeFuriganaInflected(expression, reading, source) { + const output = []; + + let stemLength = 0; + const shortest = Math.min(source.length, expression.length); + const sourceHiragana = convertKatakanaToHiragana(source); + const expressionHiragana = convertKatakanaToHiragana(expression); + while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) { + ++stemLength; + } + const offset = source.length - stemLength; + + const stemExpression = source.substring(0, source.length - offset); + const stemReading = reading.substring( + 0, + offset === 0 ? reading.length : reading.length - expression.length + stemLength + ); + for (const segment of distributeFurigana(stemExpression, stemReading)) { + output.push(segment); + } - i = iNext; - resultPos = resultPosNext; + if (stemLength !== source.length) { + output.push({text: source.substring(stemLength)}); } + + return output; } - return result; -} + + // Exports + + return { + isCodePointKanji, + isCodePointKana, + isCodePointJapanese, + isStringEntirelyKana, + isStringPartiallyJapanese, + convertKatakanaToHiragana, + convertHiraganaToKatakana, + convertToRomaji, + convertReading, + convertNumericTofullWidth, + convertHalfWidthKanaToFullWidth, + convertAlphabeticToKana, + distributeFurigana, + distributeFuriganaInflected + }; +})(); diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 25da9bf0..54d046cf 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -29,13 +29,7 @@ * dictTermsMergeBySequence * dictTermsSort * dictTermsUndupe - * jpConvertAlphabeticToKana - * jpConvertHalfWidthKanaToFullWidth - * jpConvertNumericTofullWidth - * jpDistributeFurigana - * jpHiraganaToKatakana - * jpIsCodePointJapanese - * jpKatakanaToHiragana + * jp * requestJson */ @@ -275,7 +269,7 @@ class Translator { const termTags = await this.expandTags(definition.termTags, definition.dictionary); const {expression, reading} = definition; - const furiganaSegments = jpDistributeFurigana(expression, reading); + const furiganaSegments = jp.distributeFurigana(expression, reading); definitions.push({ source: deinflection.source, @@ -376,20 +370,20 @@ class Translator { let sourceMapping = null; if (halfWidth) { if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jpConvertHalfWidthKanaToFullWidth(text2, sourceMapping); + text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping); } if (numeric) { - text2 = jpConvertNumericTofullWidth(text2); + text2 = jp.convertNumericTofullWidth(text2); } if (alphabetic) { if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jpConvertAlphabeticToKana(text2, sourceMapping); + text2 = jp.convertAlphabeticToKana(text2, sourceMapping); } if (katakana) { - text2 = jpHiraganaToKatakana(text2); + text2 = jp.convertHiraganaToKatakana(text2); } if (hiragana) { - text2 = jpKatakanaToHiragana(text2); + text2 = jp.convertKatakanaToHiragana(text2); } for (let i = text2.length; i > 0; --i) { @@ -590,7 +584,7 @@ class Translator { } static createExpression(expression, reading, termTags=null, termFrequency=null) { - const furiganaSegments = jpDistributeFurigana(expression, reading); + const furiganaSegments = jp.distributeFurigana(expression, reading); return { expression, reading, @@ -639,7 +633,7 @@ class Translator { if (!options.scanning.alphanumeric) { let newText = ''; for (const c of text) { - if (!jpIsCodePointJapanese(c.codePointAt(0))) { + if (!jp.isCodePointJapanese(c.codePointAt(0))) { break; } newText += c; -- cgit v1.2.3 From 264820f2087e7dee13e358ba703d3dd863ed7faa Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 14 Mar 2020 16:11:07 -0400 Subject: Add more unicode code point ranges --- ext/bg/js/japanese.js | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 182d5b98..4c2df674 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -84,9 +84,24 @@ const jp = (() => { const KATAKANA_RANGE = [0x30a0, 0x30ff]; const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; - const CJK_COMMON_RANGE = [0x4e00, 0x9fff]; - const CJK_RARE_RANGE = [0x3400, 0x4dbf]; - const CJK_RANGES = [CJK_COMMON_RANGE, CJK_RARE_RANGE]; + const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; + const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; + const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ + CJK_UNIFIED_IDEOGRAPHS_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE + ]; const ITERATION_MARK_CODE_POINT = 0x3005; @@ -95,8 +110,7 @@ const jp = (() => { HIRAGANA_RANGE, KATAKANA_RANGE, - CJK_COMMON_RANGE, - CJK_RARE_RANGE, + ...CJK_UNIFIED_IDEOGRAPHS_RANGES, [0xff66, 0xff9f], // Halfwidth katakana @@ -119,7 +133,7 @@ const jp = (() => { // Character code testing functions function isCodePointKanji(codePoint) { - return isCodePointInRanges(codePoint, CJK_RANGES); + return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); } function isCodePointKana(codePoint) { -- cgit v1.2.3 From 248a18dd72c687a470246c26d5c74e440058bf55 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 14 Mar 2020 16:38:12 -0400 Subject: Fix case issue --- ext/bg/js/japanese.js | 4 ++-- ext/bg/js/translator.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 4c2df674..fa40fc98 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -231,7 +231,7 @@ const jp = (() => { } } - function convertNumericTofullWidth(text) { + function convertNumericToFullWidth(text) { let result = ''; for (const char of text) { let c = char.codePointAt(0); @@ -485,7 +485,7 @@ const jp = (() => { convertHiraganaToKatakana, convertToRomaji, convertReading, - convertNumericTofullWidth, + convertNumericToFullWidth, convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 54d046cf..6f43f7b0 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -373,7 +373,7 @@ class Translator { text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping); } if (numeric) { - text2 = jp.convertNumericTofullWidth(text2); + text2 = jp.convertNumericToFullWidth(text2); } if (alphabetic) { if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } -- cgit v1.2.3 From a50b76fd219b873df7bb7e3b6a1b03850c59f239 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 14 Mar 2020 17:10:53 -0400 Subject: Remove unnecessary sourceMapping population in convertAlphabeticToKana --- ext/bg/js/japanese.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index fa40fc98..d2a577e6 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -293,12 +293,6 @@ const jp = (() => { function convertAlphabeticToKana(text, sourceMapping) { let part = ''; let result = ''; - const ii = text.length; - - if (sourceMapping.length === ii) { - sourceMapping.length = ii; - sourceMapping.fill(1); - } for (const char of text) { // Note: 0x61 is the character code for 'a' -- cgit v1.2.3 From 487d4b239b88fd57fefb0ec3a9d1bd0d25984660 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 14 Mar 2020 17:48:40 -0400 Subject: Add unit tests --- package.json | 2 +- test/test-japanese.js | 373 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 test/test-japanese.js diff --git a/package.json b/package.json index 23f0eb25..1b2104f4 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "scripts": { "test": "npm run test-lint && npm run test-code", "test-lint": "eslint . && node ./test/lint/global-declarations.js", - "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js" + "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js" }, "repository": { "type": "git", diff --git a/test/test-japanese.js b/test/test-japanese.js new file mode 100644 index 00000000..78f63c0b --- /dev/null +++ b/test/test-japanese.js @@ -0,0 +1,373 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const assert = require('assert'); +const {VM} = require('./yomichan-vm'); + +const vm = new VM(); +vm.execute([ + 'mixed/lib/wanakana.min.js', + 'bg/js/japanese.js' +]); +const jp = vm.get('jp'); + + +function testIsCodePointKanji() { + const data = [ + ['力方', true], + ['\u53f1\u{20b9f}', true], + ['かたカタ々kata、。?,.?', false] + ]; + + for (const [characters, expected] of data) { + for (const character of characters) { + const codePoint = character.codePointAt(0); + const actual = jp.isCodePointKanji(codePoint); + assert.strictEqual(actual, expected, `isCodePointKanji failed for ${character} (\\u{${codePoint.toString(16)}})`); + } + } +} + +function testIsCodePointKana() { + const data = [ + ['かたカタ', true], + ['力方々kata、。?,.?', false], + ['\u53f1\u{20b9f}', false] + ]; + + for (const [characters, expected] of data) { + for (const character of characters) { + const codePoint = character.codePointAt(0); + const actual = jp.isCodePointKana(codePoint); + assert.strictEqual(actual, expected, `isCodePointKana failed for ${character} (\\u{${codePoint.toString(16)}})`); + } + } +} + +function testIsCodePointJapanese() { + const data = [ + ['かたカタ力方々、。?', true], + ['\u53f1\u{20b9f}', true], + ['kata,.?', false] + ]; + + for (const [characters, expected] of data) { + for (const character of characters) { + const codePoint = character.codePointAt(0); + const actual = jp.isCodePointJapanese(codePoint); + assert.strictEqual(actual, expected, `isCodePointJapanese failed for ${character} (\\u{${codePoint.toString(16)}})`); + } + } +} + +function testIsStringEntirelyKana() { + const data = [ + ['かたかな', true], + ['カタカナ', true], + ['ひらがな', true], + ['ヒラガナ', true], + ['カタカナひらがな', true], + ['かたカタ力方々、。?', false], + ['\u53f1\u{20b9f}', false], + ['kata,.?', false], + ['かたカタ力方々、。?invalid', false], + ['\u53f1\u{20b9f}invalid', false], + ['kata,.?かた', false] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.isStringEntirelyKana(string), expected); + } +} + +function testIsStringPartiallyJapanese() { + const data = [ + ['かたかな', true], + ['カタカナ', true], + ['ひらがな', true], + ['ヒラガナ', true], + ['カタカナひらがな', true], + ['かたカタ力方々、。?', true], + ['\u53f1\u{20b9f}', true], + ['kata,.?', false], + ['かたカタ力方々、。?invalid', true], + ['\u53f1\u{20b9f}invalid', true], + ['kata,.?かた', true] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.isStringPartiallyJapanese(string), expected); + } +} + +function testConvertKatakanaToHiragana() { + const data = [ + ['かたかな', 'かたかな'], + ['ひらがな', 'ひらがな'], + ['カタカナ', 'かたかな'], + ['ヒラガナ', 'ひらがな'], + ['カタカナかたかな', 'かたかなかたかな'], + ['ヒラガナひらがな', 'ひらがなひらがな'], + ['chikaraちからチカラ力', 'chikaraちからちから力'], + ['katakana', 'katakana'], + ['hiragana', 'hiragana'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertKatakanaToHiragana(string), expected); + } +} + +function testConvertHiraganaToKatakana() { + const data = [ + ['かたかな', 'カタカナ'], + ['ひらがな', 'ヒラガナ'], + ['カタカナ', 'カタカナ'], + ['ヒラガナ', 'ヒラガナ'], + ['カタカナかたかな', 'カタカナカタカナ'], + ['ヒラガナひらがな', 'ヒラガナヒラガナ'], + ['chikaraちからチカラ力', 'chikaraチカラチカラ力'], + ['katakana', 'katakana'], + ['hiragana', 'hiragana'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertHiraganaToKatakana(string), expected); + } +} + +function testConvertToRomaji() { + const data = [ + ['かたかな', 'katakana'], + ['ひらがな', 'hiragana'], + ['カタカナ', 'katakana'], + ['ヒラガナ', 'hiragana'], + ['カタカナかたかな', 'katakanakatakana'], + ['ヒラガナひらがな', 'hiraganahiragana'], + ['chikaraちからチカラ力', 'chikarachikarachikara力'], + ['katakana', 'katakana'], + ['hiragana', 'hiragana'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertToRomaji(string), expected); + } +} + +function testConvertReading() { + const data = [ + [['アリガトウ', 'アリガトウ', 'hiragana'], 'ありがとう'], + [['アリガトウ', 'アリガトウ', 'katakana'], 'アリガトウ'], + [['アリガトウ', 'アリガトウ', 'romaji'], 'arigatou'], + [['アリガトウ', 'アリガトウ', 'none'], null], + [['アリガトウ', 'アリガトウ', 'default'], 'アリガトウ'], + + [['ありがとう', 'ありがとう', 'hiragana'], 'ありがとう'], + [['ありがとう', 'ありがとう', 'katakana'], 'アリガトウ'], + [['ありがとう', 'ありがとう', 'romaji'], 'arigatou'], + [['ありがとう', 'ありがとう', 'none'], null], + [['ありがとう', 'ありがとう', 'default'], 'ありがとう'], + + [['有り難う', 'ありがとう', 'hiragana'], 'ありがとう'], + [['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'], + [['有り難う', 'ありがとう', 'romaji'], 'arigatou'], + [['有り難う', 'ありがとう', 'none'], null], + [['有り難う', 'ありがとう', 'default'], 'ありがとう'] + ]; + + for (const [[expressionFragment, readingFragment, readingMode], expected] of data) { + assert.strictEqual(jp.convertReading(expressionFragment, readingFragment, readingMode), expected); + } +} + +function testConvertNumericToFullWidth() { + const data = [ + ['0123456789', '0123456789'], + ['abcdefghij', 'abcdefghij'], + ['カタカナ', 'カタカナ'], + ['ひらがな', 'ひらがな'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertNumericToFullWidth(string), expected); + } +} + +function testConvertHalfWidthKanaToFullWidth() { + const data = [ + ['0123456789', '0123456789'], + ['abcdefghij', 'abcdefghij'], + ['カタカナ', 'カタカナ'], + ['ひらがな', 'ひらがな'], + ['カキ', 'カキ', [1, 1]], + ['ガキ', 'ガキ', [2, 1]], + ['ニホン', 'ニホン', [1, 1, 1]], + ['ニッポン', 'ニッポン', [1, 1, 2, 1]] + ]; + + for (const [string, expected, expectedSourceMapping] of data) { + const sourceMapping = new Array(string.length).fill(1); + const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null); + const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping); + assert.strictEqual(actual1, expected); + assert.strictEqual(actual2, expected); + if (Array.isArray(expectedSourceMapping)) { + vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + } + } +} + +function testConvertAlphabeticToKana() { + const data = [ + ['0123456789', '0123456789'], + ['abcdefghij', 'あbcでfgひj', [1, 1, 1, 2, 1, 1, 2, 1]], + ['ABCDEFGHIJ', 'あbcでfgひj', [1, 1, 1, 2, 1, 1, 2, 1]], // wanakana.toHiragana converts text to lower case + ['カタカナ', 'カタカナ'], + ['ひらがな', 'ひらがな'], + ['chikara', 'ちから', [3, 2, 2]], + ['CHIKARA', 'ちから', [3, 2, 2]] + ]; + + for (const [string, expected, expectedSourceMapping] of data) { + const sourceMapping = new Array(string.length).fill(1); + const actual1 = jp.convertAlphabeticToKana(string, null); + const actual2 = jp.convertAlphabeticToKana(string, sourceMapping); + assert.strictEqual(actual1, expected); + assert.strictEqual(actual2, expected); + if (Array.isArray(expectedSourceMapping)) { + vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + } + } +} + +function testDistributeFurigana() { + const data = [ + [ + ['有り難う', 'ありがとう'], + [ + {text: '有', furigana: 'あ'}, + {text: 'り'}, + {text: '難', furigana: 'がと'}, + {text: 'う'} + ] + ], + [ + ['方々', 'かたがた'], + [ + {text: '方々', furigana: 'かたがた'} + ] + ], + [ + ['お祝い', 'おいわい'], + [ + {text: 'お'}, + {text: '祝', furigana: 'いわ'}, + {text: 'い'} + ] + ], + [ + ['美味しい', 'おいしい'], + [ + {text: '美味', furigana: 'おい'}, + {text: 'しい'} + ] + ], + [ + ['食べ物', 'たべもの'], + [ + {text: '食', furigana: 'た'}, + {text: 'べ'}, + {text: '物', furigana: 'もの'} + ] + ], + [ + ['試し切り', 'ためしぎり'], + [ + {text: '試', furigana: 'ため'}, + {text: 'し'}, + {text: '切', furigana: 'ぎ'}, + {text: 'り'} + ] + ], + // Ambiguous + [ + ['飼い犬', 'かいいぬ'], + [ + {text: '飼い犬', furigana: 'かいいぬ'} + ] + ], + [ + ['長い間', 'ながいあいだ'], + [ + {text: '長い間', furigana: 'ながいあいだ'} + ] + ] + ]; + + for (const [[expression, reading], expected] of data) { + const actual = jp.distributeFurigana(expression, reading); + vm.assert.deepStrictEqual(actual, expected); + } +} + +function testDistributeFuriganaInflected() { + const data = [ + [ + ['美味しい', 'おいしい', '美味しかた'], + [ + {text: '美味', furigana: 'おい'}, + {text: 'し'}, + {text: 'かた'} + ] + ], + [ + ['食べる', 'たべる', '食べた'], + [ + {text: '食', furigana: 'た'}, + {text: 'べ'}, + {text: 'た'} + ] + ] + ]; + + for (const [[expression, reading, source], expected] of data) { + const actual = jp.distributeFuriganaInflected(expression, reading, source); + vm.assert.deepStrictEqual(actual, expected); + } +} + + +function main() { + testIsCodePointKanji(); + testIsCodePointKana(); + testIsCodePointJapanese(); + testIsStringEntirelyKana(); + testIsStringPartiallyJapanese(); + testConvertKatakanaToHiragana(); + testConvertHiraganaToKatakana(); + testConvertToRomaji(); + testConvertReading(); + testConvertNumericToFullWidth(); + testConvertHalfWidthKanaToFullWidth(); + testConvertAlphabeticToKana(); + testDistributeFurigana(); + testDistributeFuriganaInflected(); +} + + +if (require.main === module) { main(); } -- cgit v1.2.3 From 647769f2baae338f201690f21c4bf8983bcb10b0 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 15 Mar 2020 22:35:34 -0400 Subject: Update jsdom to version 16.2.1 --- package-lock.json | 76 +++++++++++++++++++++++++------------------------------ package.json | 2 +- 2 files changed, 35 insertions(+), 43 deletions(-) diff --git a/package-lock.json b/package-lock.json index a09abcd0..920263d2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -37,21 +37,13 @@ "dev": true }, "acorn-globals": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/acorn-globals/-/acorn-globals-4.3.4.tgz", - "integrity": "sha512-clfQEh21R+D0leSbUdWf3OcfqyaCSAQ8Ryq00bofSekfr9W8u1jyYZo6ir0xu9Gtcf7BjcHJpnbZH7JOCpP60A==", + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/acorn-globals/-/acorn-globals-6.0.0.tgz", + "integrity": "sha512-ZQl7LOWaF5ePqqcX4hLuv/bLXYQNfNWw2c0/yX/TsPRKamzHcTGQnlCjHT3TsmkOUVEPS3crCxiPfdzE/Trlhg==", "dev": true, "requires": { - "acorn": "^6.0.1", - "acorn-walk": "^6.0.1" - }, - "dependencies": { - "acorn": { - "version": "6.4.1", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.4.1.tgz", - "integrity": "sha512-ZVA9k326Nwrj3Cj9jlh3wGFutC2ZornPNARZwsNYqQYgN0EsV2d53w5RN/co65Ohn4sUAUtb1rSUAOD6XN9idA==", - "dev": true - } + "acorn": "^7.1.1", + "acorn-walk": "^7.1.1" } }, "acorn-jsx": { @@ -61,9 +53,9 @@ "dev": true }, "acorn-walk": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.2.0.tgz", - "integrity": "sha512-7evsyfH1cLOCdAzZAd43Cic04yKydNx0cF+7tiA19p1XnLLPU4dpCQOqpjqwokFe//vS0QqfqqjCS2JkiIs0cA==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-7.1.1.tgz", + "integrity": "sha512-wdlPY2tm/9XBr7QkKlq0WQVgiuGTX6YWPyRyBviSoScBuLfTVQhvwg6wJ369GJ/1nPfTLMfnrFIfjqVg6d+jQQ==", "dev": true }, "ajv": { @@ -182,9 +174,9 @@ } }, "browser-process-hrtime": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/browser-process-hrtime/-/browser-process-hrtime-0.1.3.tgz", - "integrity": "sha512-bRFnI4NnjO6cnyLmOV/7PVoDEMJChlcfN0z4s1YMBY989/SvlfMI1lgCnkFUs53e9gQF+w7qu7XdllSTiSl8Aw==", + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz", + "integrity": "sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==", "dev": true }, "callsites": { @@ -747,9 +739,9 @@ "dev": true }, "html-encoding-sniffer": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-2.0.0.tgz", - "integrity": "sha512-Y9prnPKkM7FXxQevZ5UH8Z6aVTY0ede1tHquck5UxGmKWDshxXh95gSa2xXYjS8AsGO5iOvrCI5+GttRKnLdNA==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz", + "integrity": "sha512-D5JbOMBIR/TVZkubHT+OyT2705QvogUW4IBn6nHd756OwieSF9aDYFj4dv6HHEVGYbHaLETa3WggZYWWMyy3ZQ==", "dev": true, "requires": { "whatwg-encoding": "^1.0.5" @@ -914,30 +906,30 @@ "dev": true }, "jsdom": { - "version": "16.2.0", - "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.2.0.tgz", - "integrity": "sha512-6VaW3UWyKbm9DFVIAgTfhuwnvqiqlRYNg5Rk6dINTVoZT0eKz+N86vQZr+nqt1ny1lSB1TWZJWSEWQAfu8oTpA==", + "version": "16.2.1", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.2.1.tgz", + "integrity": "sha512-3p0gHs5EfT7PxW9v8Phz3mrq//4Dy8MQenU/PoKxhdT+c45S7NjIjKbGT3Ph0nkICweE1r36+yaknXA5WfVNAg==", "dev": true, "requires": { "abab": "^2.0.3", - "acorn": "^7.1.0", - "acorn-globals": "^4.3.4", + "acorn": "^7.1.1", + "acorn-globals": "^6.0.0", "cssom": "^0.4.4", "cssstyle": "^2.2.0", "data-urls": "^2.0.0", "decimal.js": "^10.2.0", "domexception": "^2.0.1", - "escodegen": "^1.13.0", - "html-encoding-sniffer": "^2.0.0", + "escodegen": "^1.14.1", + "html-encoding-sniffer": "^2.0.1", "is-potential-custom-element-name": "^1.0.0", "nwsapi": "^2.2.0", "parse5": "5.1.1", - "request": "^2.88.0", + "request": "^2.88.2", "request-promise-native": "^1.0.8", - "saxes": "^4.0.2", + "saxes": "^5.0.0", "symbol-tree": "^3.2.4", "tough-cookie": "^3.0.1", - "w3c-hr-time": "^1.0.1", + "w3c-hr-time": "^1.0.2", "w3c-xmlserializer": "^2.0.0", "webidl-conversions": "^5.0.0", "whatwg-encoding": "^1.0.5", @@ -1370,9 +1362,9 @@ "dev": true }, "saxes": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/saxes/-/saxes-4.0.2.tgz", - "integrity": "sha512-EZOTeQ4bgkOaGCDaTKux+LaRNcLNbdbvMH7R3/yjEEULPEmqvkFbFub6DJhJTub2iGMT93CfpZ5LTdKZmAbVeQ==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-5.0.0.tgz", + "integrity": "sha512-LXTZygxhf8lfwKaTP/8N9CsVdjTlea3teze4lL6u37ivbgGbV0GGMuNtS/I9rnD/HC2/txUM7Df4S2LVl1qhiA==", "dev": true, "requires": { "xmlchars": "^2.2.0" @@ -1690,12 +1682,12 @@ } }, "w3c-hr-time": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz", - "integrity": "sha1-gqwr/2PZUOqeMYmlimViX+3xkEU=", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz", + "integrity": "sha512-z8P5DvDNjKDoFIHK7q8r8lackT6l+jo/Ye3HOle7l9nICP9lf1Ci25fy9vHd0JOWewkIFzXIEig3TdKT7JQ5fQ==", "dev": true, "requires": { - "browser-process-hrtime": "^0.1.2" + "browser-process-hrtime": "^1.0.0" } }, "w3c-xmlserializer": { @@ -1770,9 +1762,9 @@ } }, "ws": { - "version": "7.2.1", - "resolved": "https://registry.npmjs.org/ws/-/ws-7.2.1.tgz", - "integrity": "sha512-sucePNSafamSKoOqoNfBd8V0StlkzJKL2ZAhGQinCfNQ+oacw+Pk7lcdAElecBF2VkLNZRiIb5Oi1Q5lVUVt2A==", + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/ws/-/ws-7.2.3.tgz", + "integrity": "sha512-HTDl9G9hbkNDk98naoR/cHDws7+EyYMOdL1BmjsZXRUjf7d+MficC4B7HLUPlSiho0vg+CWKrGIt/VJBd1xunQ==", "dev": true }, "xml-name-validator": { diff --git a/package.json b/package.json index 23f0eb25..10aad3a9 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,6 @@ "eslint": "^6.8.0", "eslint-plugin-no-unsanitized": "^3.0.2", "fake-indexeddb": "^3.0.0", - "jsdom": "^16.2.0" + "jsdom": "^16.2.1" } } -- cgit v1.2.3 From 77a2cc60e9a4a89da354cadb1bf060204ee3b951 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 21 Mar 2020 13:18:34 -0400 Subject: Move basic string/character testing functions into a mixed/js/japanese.js --- ext/bg/background.html | 1 + ext/bg/js/japanese.js | 106 +++------------------------------------- ext/bg/search.html | 1 + ext/bg/settings.html | 1 + ext/mixed/js/japanese.js | 124 +++++++++++++++++++++++++++++++++++++++++++++++ test/test-japanese.js | 1 + 6 files changed, 135 insertions(+), 99 deletions(-) create mode 100644 ext/mixed/js/japanese.js diff --git a/ext/bg/background.html b/ext/bg/background.html index 44abe8fd..f7cf6e55 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -20,6 +20,7 @@ + diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index d2a577e6..c5873cf1 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -17,10 +17,11 @@ */ /* global + * jp * wanakana */ -const jp = (() => { +(() => { const HALFWIDTH_KATAKANA_MAPPING = new Map([ ['ヲ', 'ヲヺ-'], ['ァ', 'ァ--'], @@ -80,101 +81,13 @@ const jp = (() => { ['ン', 'ン--'] ]); - const HIRAGANA_RANGE = [0x3040, 0x309f]; - const KATAKANA_RANGE = [0x30a0, 0x30ff]; - const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; - - const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; - const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; - const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ - CJK_UNIFIED_IDEOGRAPHS_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, - CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE - ]; - const ITERATION_MARK_CODE_POINT = 0x3005; - // Japanese character ranges, roughly ordered in order of expected frequency - const JAPANESE_RANGES = [ - HIRAGANA_RANGE, - KATAKANA_RANGE, - - ...CJK_UNIFIED_IDEOGRAPHS_RANGES, - - [0xff66, 0xff9f], // Halfwidth katakana - - [0x30fb, 0x30fc], // Katakana punctuation - [0xff61, 0xff65], // Kana punctuation - [0x3000, 0x303f], // CJK punctuation - - [0xff10, 0xff19], // Fullwidth numbers - [0xff21, 0xff3a], // Fullwidth upper case Latin letters - [0xff41, 0xff5a], // Fullwidth lower case Latin letters - - [0xff01, 0xff0f], // Fullwidth punctuation 1 - [0xff1a, 0xff1f], // Fullwidth punctuation 2 - [0xff3b, 0xff3f], // Fullwidth punctuation 3 - [0xff5b, 0xff60], // Fullwidth punctuation 4 - [0xffe0, 0xffee] // Currency markers - ]; - - - // Character code testing functions - - function isCodePointKanji(codePoint) { - return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); - } - - function isCodePointKana(codePoint) { - return isCodePointInRanges(codePoint, KANA_RANGES); - } - - function isCodePointJapanese(codePoint) { - return isCodePointInRanges(codePoint, JAPANESE_RANGES); - } - function isCodePointInRanges(codePoint, ranges) { - for (const [min, max] of ranges) { - if (codePoint >= min && codePoint <= max) { - return true; - } - } - return false; - } + // Existing functions - - // String testing functions - - function isStringEntirelyKana(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (!isCodePointKana(c.codePointAt(0))) { - return false; - } - } - return true; - } - - function isStringPartiallyJapanese(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (isCodePointJapanese(c.codePointAt(0))) { - return true; - } - } - return false; - } + const isCodePointKanji = jp.isCodePointKanji; + const isStringEntirelyKana = jp.isStringEntirelyKana; // Conversion functions @@ -469,12 +382,7 @@ const jp = (() => { // Exports - return { - isCodePointKanji, - isCodePointKana, - isCodePointJapanese, - isStringEntirelyKana, - isStringPartiallyJapanese, + Object.assign(jp, { convertKatakanaToHiragana, convertHiraganaToKatakana, convertToRomaji, @@ -484,5 +392,5 @@ const jp = (() => { convertAlphabeticToKana, distributeFurigana, distributeFuriganaInflected - }; + }); })(); diff --git a/ext/bg/search.html b/ext/bg/search.html index f4c1a737..eacc1893 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -74,6 +74,7 @@ + diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 0db76d71..cfe20be4 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -1088,6 +1088,7 @@ + diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js new file mode 100644 index 00000000..61a247b2 --- /dev/null +++ b/ext/mixed/js/japanese.js @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const jp = (() => { + const HIRAGANA_RANGE = [0x3040, 0x309f]; + const KATAKANA_RANGE = [0x30a0, 0x30ff]; + const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; + + const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; + const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; + const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ + CJK_UNIFIED_IDEOGRAPHS_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE + ]; + + // Japanese character ranges, roughly ordered in order of expected frequency + const JAPANESE_RANGES = [ + HIRAGANA_RANGE, + KATAKANA_RANGE, + + ...CJK_UNIFIED_IDEOGRAPHS_RANGES, + + [0xff66, 0xff9f], // Halfwidth katakana + + [0x30fb, 0x30fc], // Katakana punctuation + [0xff61, 0xff65], // Kana punctuation + [0x3000, 0x303f], // CJK punctuation + + [0xff10, 0xff19], // Fullwidth numbers + [0xff21, 0xff3a], // Fullwidth upper case Latin letters + [0xff41, 0xff5a], // Fullwidth lower case Latin letters + + [0xff01, 0xff0f], // Fullwidth punctuation 1 + [0xff1a, 0xff1f], // Fullwidth punctuation 2 + [0xff3b, 0xff3f], // Fullwidth punctuation 3 + [0xff5b, 0xff60], // Fullwidth punctuation 4 + [0xffe0, 0xffee] // Currency markers + ]; + + + // Character code testing functions + + function isCodePointKanji(codePoint) { + return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); + } + + function isCodePointKana(codePoint) { + return isCodePointInRanges(codePoint, KANA_RANGES); + } + + function isCodePointJapanese(codePoint) { + return isCodePointInRanges(codePoint, JAPANESE_RANGES); + } + + function isCodePointInRanges(codePoint, ranges) { + for (const [min, max] of ranges) { + if (codePoint >= min && codePoint <= max) { + return true; + } + } + return false; + } + + + // String testing functions + + function isStringEntirelyKana(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (!isCodePointKana(c.codePointAt(0))) { + return false; + } + } + return true; + } + + function isStringPartiallyJapanese(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (isCodePointJapanese(c.codePointAt(0))) { + return true; + } + } + return false; + } + + + // Exports + + return { + isCodePointKanji, + isCodePointKana, + isCodePointJapanese, + isStringEntirelyKana, + isStringPartiallyJapanese + }; +})(); diff --git a/test/test-japanese.js b/test/test-japanese.js index 78f63c0b..32e4d176 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -22,6 +22,7 @@ const {VM} = require('./yomichan-vm'); const vm = new VM(); vm.execute([ 'mixed/lib/wanakana.min.js', + 'mixed/js/japanese.js', 'bg/js/japanese.js' ]); const jp = vm.get('jp'); -- cgit v1.2.3 From 70284c62eea5a5f5dca16bf7b72ee4919c8450cd Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 21 Mar 2020 13:22:14 -0400 Subject: Replace DisplayGenerator._isCharacterKanji with jp.isCodePointKanji --- ext/fg/float.html | 1 + ext/mixed/js/display-generator.js | 11 ++--------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/ext/fg/float.html b/ext/fg/float.html index 7bbed565..3ccf68eb 100644 --- a/ext/fg/float.html +++ b/ext/fg/float.html @@ -43,6 +43,7 @@ + diff --git a/ext/mixed/js/display-generator.js b/ext/mixed/js/display-generator.js index 49afc44b..41f7315a 100644 --- a/ext/mixed/js/display-generator.js +++ b/ext/mixed/js/display-generator.js @@ -19,6 +19,7 @@ /* global * TemplateHandler * apiGetDisplayTemplatesHtml + * jp */ class DisplayGenerator { @@ -283,7 +284,7 @@ class DisplayGenerator { _appendKanjiLinks(container, text) { let part = ''; for (const c of text) { - if (DisplayGenerator._isCharacterKanji(c)) { + if (jp.isCodePointKanji(c.codePointAt(0))) { if (part.length > 0) { container.appendChild(document.createTextNode(part)); part = ''; @@ -300,14 +301,6 @@ class DisplayGenerator { } } - static _isCharacterKanji(c) { - const code = c.codePointAt(0); - return ( - code >= 0x4e00 && code < 0x9fb0 || - code >= 0x3400 && code < 0x4dc0 - ); - } - static _appendMultiple(container, createItem, detailsArray, fallback=[]) { if (container === null) { return 0; } -- cgit v1.2.3 From 780d23b749325da0a95aa9cc7898df19f2ac1b31 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 21 Mar 2020 14:12:22 -0400 Subject: Add more tests for convertReading --- test/test-japanese.js | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/test/test-japanese.js b/test/test-japanese.js index 32e4d176..c5d220e7 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -188,7 +188,47 @@ function testConvertReading() { [['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'], [['有り難う', 'ありがとう', 'romaji'], 'arigatou'], [['有り難う', 'ありがとう', 'none'], null], - [['有り難う', 'ありがとう', 'default'], 'ありがとう'] + [['有り難う', 'ありがとう', 'default'], 'ありがとう'], + + // Cases with falsy readings + + [['ありがとう', '', 'hiragana'], ''], + [['ありがとう', '', 'katakana'], ''], + [['ありがとう', '', 'romaji'], 'arigatou'], + [['ありがとう', '', 'none'], null], + [['ありがとう', '', 'default'], ''], + + [['ありがとう', null, 'hiragana'], ''], + [['ありがとう', null, 'katakana'], ''], + [['ありがとう', null, 'romaji'], 'arigatou'], + [['ありがとう', null, 'none'], null], + [['ありがとう', null, 'default'], null], + + [['ありがとう', void 0, 'hiragana'], ''], + [['ありがとう', void 0, 'katakana'], ''], + [['ありがとう', void 0, 'romaji'], 'arigatou'], + [['ありがとう', void 0, 'none'], null], + [['ありがとう', void 0, 'default'], void 0], + + // Cases with falsy readings and kanji expressions + + [['有り難う', '', 'hiragana'], ''], + [['有り難う', '', 'katakana'], ''], + [['有り難う', '', 'romaji'], ''], + [['有り難う', '', 'none'], null], + [['有り難う', '', 'default'], ''], + + [['有り難う', null, 'hiragana'], ''], + [['有り難う', null, 'katakana'], ''], + [['有り難う', null, 'romaji'], null], + [['有り難う', null, 'none'], null], + [['有り難う', null, 'default'], null], + + [['有り難う', void 0, 'hiragana'], ''], + [['有り難う', void 0, 'katakana'], ''], + [['有り難う', void 0, 'romaji'], void 0], + [['有り難う', void 0, 'none'], null], + [['有り難う', void 0, 'default'], void 0] ]; for (const [[expressionFragment, readingFragment, readingMode], expected] of data) { -- cgit v1.2.3 From 962c2a381f3dace4d97fd0625504ec841e378354 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Fri, 13 Mar 2020 23:23:08 +0200 Subject: apply all options on profile change --- ext/bg/js/search-frontend.js | 7 +------ ext/bg/js/search-query-parser.js | 17 ++++++----------- ext/bg/js/search.js | 16 +++++++--------- ext/fg/js/float.js | 17 ++++++++--------- ext/fg/js/frontend-initialize.js | 4 ++-- ext/fg/js/frontend.js | 18 ++++++++++++++++-- ext/fg/js/popup-nested.js | 7 +------ ext/fg/js/popup-proxy.js | 4 ++++ ext/fg/js/popup.js | 34 +++++++++++++++------------------- ext/mixed/js/display.js | 18 +++++++++--------- ext/mixed/js/text-scanner.js | 7 +++++-- 11 files changed, 74 insertions(+), 75 deletions(-) diff --git a/ext/bg/js/search-frontend.js b/ext/bg/js/search-frontend.js index a470e873..2d2aa8d4 100644 --- a/ext/bg/js/search-frontend.js +++ b/ext/bg/js/search-frontend.js @@ -30,12 +30,7 @@ async function searchFrontendSetup() { const options = await apiOptionsGet(optionsContext); if (!options.scanning.enableOnSearchPage) { return; } - const ignoreNodes = ['.scan-disable', '.scan-disable *']; - if (!options.scanning.enableOnPopupExpressions) { - ignoreNodes.push('.source-text', '.source-text *'); - } - - window.frontendInitializationData = {depth: 1, ignoreNodes, proxy: false}; + window.frontendInitializationData = {depth: 1, proxy: false}; const scriptSrcs = [ '/mixed/js/text-scanner.js', diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 06316ce2..6e18073b 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -28,11 +28,10 @@ class QueryParser extends TextScanner { constructor(search) { - super(document.querySelector('#query-parser-content'), [], [], []); + super(document.querySelector('#query-parser-content'), [], []); this.search = search; this.parseResults = []; - this.selectedParser = null; this.queryParser = document.querySelector('#query-parser-content'); this.queryParserSelect = document.querySelector('#query-parser-select-container'); @@ -79,9 +78,7 @@ class QueryParser extends TextScanner { onParserChange(e) { const selectedParser = e.target.value; - this.selectedParser = selectedParser; apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); - this.renderParseResult(); } getMouseEventListeners() { @@ -112,19 +109,16 @@ class QueryParser extends TextScanner { refreshSelectedParser() { if (this.parseResults.length > 0) { - if (this.selectedParser === null) { - this.selectedParser = this.search.options.parsing.selectedParser; - } - if (this.selectedParser === null || !this.getParseResult()) { + if (!this.getParseResult()) { const selectedParser = this.parseResults[0].id; - this.selectedParser = selectedParser; apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); } } } getParseResult() { - return this.parseResults.find((r) => r.id === this.selectedParser); + const {selectedParser} = this.options.parsing; + return this.parseResults.find((r) => r.id === selectedParser); } async setText(text) { @@ -176,7 +170,8 @@ class QueryParser extends TextScanner { renderParserSelect() { this.queryParserSelect.textContent = ''; if (this.parseResults.length > 1) { - const select = this.queryParserGenerator.createParserSelect(this.parseResults, this.selectedParser); + const {selectedParser} = this.options.parsing; + const select = this.queryParserGenerator.createParserSelect(this.parseResults, selectedParser); select.addEventListener('change', this.onParserChange.bind(this)); this.queryParserSelect.appendChild(select); } diff --git a/ext/bg/js/search.js b/ext/bg/js/search.js index e2bdff73..8b8ee55e 100644 --- a/ext/bg/js/search.js +++ b/ext/bg/js/search.js @@ -247,15 +247,12 @@ class DisplaySearch extends Display { } onWanakanaEnableChange(e) { - const {queryParams: {query=''}} = parseUrl(window.location.href); const enableWanakana = e.target.checked; if (enableWanakana) { window.wanakana.bind(this.query); } else { window.wanakana.unbind(this.query); } - this.setQuery(query); - this.onSearchQueryUpdated(this.query.value, false); apiOptionsSet({general: {enableWanakana}}, this.getOptionsContext()); } @@ -278,19 +275,20 @@ class DisplaySearch extends Display { } } - async updateOptions(options) { - await super.updateOptions(options); + async updateOptions() { + await super.updateOptions(); this.queryParser.setOptions(this.options); + const query = this.query.value; + if (query) { + this.setQuery(query); + this.onSearchQueryUpdated(query, false); + } } isWanakanaEnabled() { return this.wanakanaEnable !== null && this.wanakanaEnable.checked; } - getOptionsContext() { - return this.optionsContext; - } - setQuery(query) { const interpretedQuery = this.isWanakanaEnabled() ? window.wanakana.toKana(query) : query; this.query.value = interpretedQuery; diff --git a/ext/fg/js/float.js b/ext/fg/js/float.js index 393c2719..9b720ebe 100644 --- a/ext/fg/js/float.js +++ b/ext/fg/js/float.js @@ -28,6 +28,8 @@ class DisplayFloat extends Display { super(document.querySelector('#spinner'), document.querySelector('#definitions')); this.autoPlayAudioTimer = null; + this._popupId = null; + this.optionsContext = { depth: 0, url: window.location.href @@ -53,7 +55,7 @@ class DisplayFloat extends Display { ['setContent', ({type, details}) => this.setContent(type, details)], ['clearAutoPlayTimer', () => this.clearAutoPlayTimer()], ['setCustomCss', ({css}) => this.setCustomCss(css)], - ['prepare', ({options, popupInfo, url, childrenSupported, scale, uniqueId}) => this.prepare(options, popupInfo, url, childrenSupported, scale, uniqueId)], + ['prepare', ({popupInfo, url, childrenSupported, scale}) => this.prepare(popupInfo, url, childrenSupported, scale)], ['setContentScale', ({scale}) => this.setContentScale(scale)] ]); @@ -61,23 +63,24 @@ class DisplayFloat extends Display { window.addEventListener('message', this.onMessage.bind(this), false); } - async prepare(options, popupInfo, url, childrenSupported, scale, uniqueId) { + async prepare(popupInfo, url, childrenSupported, scale) { if (this._prepareInvoked) { return; } this._prepareInvoked = true; - await super.prepare(options); - const {id, depth, parentFrameId} = popupInfo; + this._popupId = id; this.optionsContext.depth = depth; this.optionsContext.url = url; + await super.prepare(); + if (childrenSupported) { popupNestedInitialize(id, depth, parentFrameId, url); } this.setContentScale(scale); - apiForward('popupPrepareCompleted', {uniqueId}); + apiForward('popupPrepareCompleted', {targetPopupId: this._popupId}); } onError(error) { @@ -144,10 +147,6 @@ class DisplayFloat extends Display { handler(params); } - getOptionsContext() { - return this.optionsContext; - } - autoPlayAudio() { this.clearAutoPlayTimer(); this.autoPlayAudioTimer = window.setTimeout(() => super.autoPlayAudio(), 400); diff --git a/ext/fg/js/frontend-initialize.js b/ext/fg/js/frontend-initialize.js index 8424b21d..3a191247 100644 --- a/ext/fg/js/frontend-initialize.js +++ b/ext/fg/js/frontend-initialize.js @@ -26,7 +26,7 @@ async function main() { await yomichan.prepare(); const data = window.frontendInitializationData || {}; - const {id, depth=0, parentFrameId, ignoreNodes, url, proxy=false} = data; + const {id, depth=0, parentFrameId, url, proxy=false} = data; let popup; if (proxy) { @@ -38,7 +38,7 @@ async function main() { popup = popupHost.getOrCreatePopup(null, null, depth); } - const frontend = new Frontend(popup, ignoreNodes); + const frontend = new Frontend(popup); await frontend.prepare(); } diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index 768b9326..d7bc02cc 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -26,10 +26,9 @@ */ class Frontend extends TextScanner { - constructor(popup, ignoreNodes) { + constructor(popup) { super( window, - ignoreNodes, popup.isProxy() ? [] : [popup.getContainer()], [(x, y) => this.popup.containsPoint(x, y)] ); @@ -95,6 +94,9 @@ class Frontend extends TextScanner { } onRuntimeMessage({action, params}, sender, callback) { + const {targetPopupId} = params || {}; + if (targetPopupId !== 'all' && targetPopupId !== this.popup.id) { return; } + const handler = this._runtimeMessageHandlers.get(action); if (typeof handler !== 'function') { return false; } @@ -129,8 +131,20 @@ class Frontend extends TextScanner { async updateOptions() { this.setOptions(await apiOptionsGet(this.getOptionsContext())); + + const ignoreNodes = ['.scan-disable', '.scan-disable *']; + if (!this.options.scanning.enableOnPopupExpressions) { + ignoreNodes.push('.source-text', '.source-text *'); + } + this.ignoreNodes = ignoreNodes.join(','); + await this.popup.setOptions(this.options); + this._updateContentScale(); + + if (this.textSourceCurrent !== null && this.causeCurrent !== null) { + await this.onSearchSource(this.textSourceCurrent, this.causeCurrent); + } } async onSearchSource(textSource, cause) { diff --git a/ext/fg/js/popup-nested.js b/ext/fg/js/popup-nested.js index 06f8fc4b..39d91fd8 100644 --- a/ext/fg/js/popup-nested.js +++ b/ext/fg/js/popup-nested.js @@ -36,12 +36,7 @@ async function popupNestedInitialize(id, depth, parentFrameId, url) { return; } - const ignoreNodes = ['.scan-disable', '.scan-disable *']; - if (!options.scanning.enableOnPopupExpressions) { - ignoreNodes.push('.source-text', '.source-text *'); - } - - window.frontendInitializationData = {id, depth, parentFrameId, ignoreNodes, url, proxy: true}; + window.frontendInitializationData = {id, depth, parentFrameId, url, proxy: true}; const scriptSrcs = [ '/mixed/js/text-scanner.js', diff --git a/ext/fg/js/popup-proxy.js b/ext/fg/js/popup-proxy.js index f7cef214..997b1317 100644 --- a/ext/fg/js/popup-proxy.js +++ b/ext/fg/js/popup-proxy.js @@ -33,6 +33,10 @@ class PopupProxy { // Public properties + get id() { + return this._id; + } + get parent() { return null; } diff --git a/ext/fg/js/popup.js b/ext/fg/js/popup.js index d752812e..e6e93a76 100644 --- a/ext/fg/js/popup.js +++ b/ext/fg/js/popup.js @@ -210,11 +210,9 @@ class Popup { const parentFrameId = (typeof this._frameId === 'number' ? this._frameId : null); this._container.setAttribute('src', chrome.runtime.getURL('/fg/float.html')); this._container.addEventListener('load', () => { - const uniqueId = yomichan.generateId(32); - Popup._listenForDisplayPrepareCompleted(uniqueId, resolve); + this._listenForDisplayPrepareCompleted(resolve); this._invokeApi('prepare', { - options: this._options, popupInfo: { id: this._id, depth: this._depth, @@ -222,8 +220,7 @@ class Popup { }, url: this.url, childrenSupported: this._childrenSupported, - scale: this._contentScale, - uniqueId + scale: this._contentScale }); }); this._observeFullscreen(true); @@ -364,23 +361,12 @@ class Popup { contentWindow.postMessage({action, params, token}, this._targetOrigin); } - static _getFullscreenElement() { - return ( - document.fullscreenElement || - document.msFullscreenElement || - document.mozFullScreenElement || - document.webkitFullscreenElement || - null - ); - } - - static _listenForDisplayPrepareCompleted(uniqueId, resolve) { + _listenForDisplayPrepareCompleted(resolve) { const runtimeMessageCallback = ({action, params}, sender, callback) => { if ( action === 'popupPrepareCompleted' && - typeof params === 'object' && - params !== null && - params.uniqueId === uniqueId + isObject(params) && + params.targetPopupId === this._id ) { chrome.runtime.onMessage.removeListener(runtimeMessageCallback); callback(); @@ -391,6 +377,16 @@ class Popup { chrome.runtime.onMessage.addListener(runtimeMessageCallback); } + static _getFullscreenElement() { + return ( + document.fullscreenElement || + document.msFullscreenElement || + document.mozFullScreenElement || + document.webkitFullscreenElement || + null + ); + } + static _getPositionForHorizontalText(elementRect, width, height, viewport, offsetScale, optionsGeneral) { const preferBelow = (optionsGeneral.popupHorizontalTextPosition === 'below'); const horizontalOffset = optionsGeneral.popupHorizontalOffset * offsetScale; diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 515e28a7..9a7a91f3 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -40,6 +40,7 @@ class Display { this.spinner = spinner; this.container = container; this.definitions = []; + this.optionsContext = null; this.options = null; this.context = null; this.index = 0; @@ -165,12 +166,11 @@ class Display { this.setInteractive(true); } - async prepare(options=null) { + async prepare() { await yomichan.prepare(); - const displayGeneratorPromise = this.displayGenerator.prepare(); - const updateOptionsPromise = this.updateOptions(options); - await Promise.all([displayGeneratorPromise, updateOptionsPromise]); - yomichan.on('optionsUpdated', () => this.updateOptions(null)); + await this.displayGenerator.prepare(); + await this.updateOptions(); + yomichan.on('optionsUpdated', () => this.updateOptions()); } onError(_error) { @@ -369,11 +369,11 @@ class Display { } getOptionsContext() { - throw new Error('Override me'); + return this.optionsContext; } - async updateOptions(options) { - this.options = options ? options : await apiOptionsGet(this.getOptionsContext()); + async updateOptions() { + this.options = await apiOptionsGet(this.getOptionsContext()); this.updateDocumentOptions(this.options); this.updateTheme(this.options.general.popupTheme); this.setCustomCss(this.options.general.customPopupCss); @@ -851,7 +851,7 @@ class Display { } setPopupVisibleOverride(visible) { - return apiForward('popupSetVisibleOverride', {visible}); + return apiForward('popupSetVisibleOverride', {visible, targetPopupId: 'all'}); } setSpinnerVisible(visible) { diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js index a08e09fb..b8156c01 100644 --- a/ext/mixed/js/text-scanner.js +++ b/ext/mixed/js/text-scanner.js @@ -23,13 +23,15 @@ */ class TextScanner { - constructor(node, ignoreNodes, ignoreElements, ignorePoints) { + constructor(node, ignoreElements, ignorePoints) { this.node = node; - this.ignoreNodes = (Array.isArray(ignoreNodes) && ignoreNodes.length > 0 ? ignoreNodes.join(',') : null); this.ignoreElements = ignoreElements; this.ignorePoints = ignorePoints; + this.ignoreNodes = null; + this.scanTimerPromise = null; + this.causeCurrent = null; this.textSourceCurrent = null; this.pendingLookup = false; this.options = null; @@ -298,6 +300,7 @@ class TextScanner { this.pendingLookup = true; const result = await this.onSearchSource(textSource, cause); if (result !== null) { + this.causeCurrent = cause; this.textSourceCurrent = textSource; if (this.options.scanning.selectText) { textSource.select(); -- cgit v1.2.3 From 46c6ad98f33ea1536452beb7e41f78f9a1895997 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sat, 14 Mar 2020 02:51:39 +0200 Subject: use dependency injection in QueryParser Also fix an issue with settings update triggering a lookup on unprepared QueryParser. --- ext/bg/js/search-query-parser.js | 43 +++++++++++++++++++++++++++------------- ext/bg/js/search.js | 11 +++++++++- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 6e18073b..4a4fcdde 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -27,9 +27,12 @@ */ class QueryParser extends TextScanner { - constructor(search) { + constructor({getOptionsContext, setContent, setSpinnerVisible}) { super(document.querySelector('#query-parser-content'), [], []); - this.search = search; + + this.getOptionsContext = getOptionsContext; + this.setContent = setContent; + this.setSpinnerVisible = setSpinnerVisible; this.parseResults = []; @@ -55,18 +58,18 @@ class QueryParser extends TextScanner { async onSearchSource(textSource, cause) { if (textSource === null) { return null; } - this.setTextSourceScanLength(textSource, this.search.options.scanning.length); + this.setTextSourceScanLength(textSource, this.options.scanning.length); const searchText = textSource.text(); if (searchText.length === 0) { return; } - const {definitions, length} = await apiTermsFind(searchText, {}, this.search.getOptionsContext()); + const {definitions, length} = await apiTermsFind(searchText, {}, this.getOptionsContext()); if (definitions.length === 0) { return null; } - const sentence = docSentenceExtract(textSource, this.search.options.anki.sentenceExt); + const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt); textSource.setEndOffset(length); - this.search.setContent('terms', {definitions, context: { + this.setContent('terms', {definitions, context: { focus: false, disableHistory: cause === 'mouse', sentence, @@ -78,7 +81,7 @@ class QueryParser extends TextScanner { onParserChange(e) { const selectedParser = e.target.value; - apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); + apiOptionsSet({parsing: {selectedParser}}, this.getOptionsContext()); } getMouseEventListeners() { @@ -107,11 +110,23 @@ class QueryParser extends TextScanner { this.queryParser.dataset.termSpacing = `${options.parsing.termSpacing}`; } + getOptionsContext() { + throw new Error('Override me'); + } + + setContent(_type, _details) { + throw new Error('Override me'); + } + + setSpinnerVisible(_visible) { + throw new Error('Override me'); + } + refreshSelectedParser() { if (this.parseResults.length > 0) { if (!this.getParseResult()) { const selectedParser = this.parseResults[0].id; - apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); + apiOptionsSet({parsing: {selectedParser}}, this.getOptionsContext()); } } } @@ -122,7 +137,7 @@ class QueryParser extends TextScanner { } async setText(text) { - this.search.setSpinnerVisible(true); + this.setSpinnerVisible(true); this.setPreview(text); @@ -132,20 +147,20 @@ class QueryParser extends TextScanner { this.renderParserSelect(); this.renderParseResult(); - this.search.setSpinnerVisible(false); + this.setSpinnerVisible(false); } async parseText(text) { const results = []; - if (this.search.options.parsing.enableScanningParser) { + if (this.options.parsing.enableScanningParser) { results.push({ name: 'Scanning parser', id: 'scan', - parsedText: await apiTextParse(text, this.search.getOptionsContext()) + parsedText: await apiTextParse(text, this.getOptionsContext()) }); } - if (this.search.options.parsing.enableMecabParser) { - const mecabResults = await apiTextParseMecab(text, this.search.getOptionsContext()); + if (this.options.parsing.enableMecabParser) { + const mecabResults = await apiTextParseMecab(text, this.getOptionsContext()); for (const [mecabDictName, mecabDictResults] of mecabResults) { results.push({ name: `MeCab: ${mecabDictName}`, diff --git a/ext/bg/js/search.js b/ext/bg/js/search.js index 8b8ee55e..9250fdde 100644 --- a/ext/bg/js/search.js +++ b/ext/bg/js/search.js @@ -29,12 +29,18 @@ class DisplaySearch extends Display { constructor() { super(document.querySelector('#spinner'), document.querySelector('#content')); + this._isPrepared = false; + this.optionsContext = { depth: 0, url: window.location.href }; - this.queryParser = new QueryParser(this); + this.queryParser = new QueryParser({ + getOptionsContext: this.getOptionsContext.bind(this), + setContent: this.setContent.bind(this), + setSpinnerVisible: this.setSpinnerVisible.bind(this) + }); this.search = document.querySelector('#search'); this.query = document.querySelector('#query'); @@ -112,6 +118,8 @@ class DisplaySearch extends Display { this.clipboardMonitor.on('change', this.onExternalSearchUpdate.bind(this)); this.updateSearchButton(); + + this._isPrepared = true; } catch (e) { this.onError(e); } @@ -278,6 +286,7 @@ class DisplaySearch extends Display { async updateOptions() { await super.updateOptions(); this.queryParser.setOptions(this.options); + if (!this._isPrepared) { return; } const query = this.query.value; if (query) { this.setQuery(query); -- cgit v1.2.3 From 2c4fd648dbc37d3d5e10acfe2db054d7cc876a63 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sat, 14 Mar 2020 13:21:05 +0200 Subject: remove stubs --- ext/bg/js/search-query-parser.js | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 4a4fcdde..9f59f2e5 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -110,18 +110,6 @@ class QueryParser extends TextScanner { this.queryParser.dataset.termSpacing = `${options.parsing.termSpacing}`; } - getOptionsContext() { - throw new Error('Override me'); - } - - setContent(_type, _details) { - throw new Error('Override me'); - } - - setSpinnerVisible(_visible) { - throw new Error('Override me'); - } - refreshSelectedParser() { if (this.parseResults.length > 0) { if (!this.getParseResult()) { -- cgit v1.2.3 From 17934cce4bd7c0775ab51b69d81db585b2de14e8 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sun, 15 Mar 2020 18:18:18 +0200 Subject: use random ID as popup ID --- ext/fg/js/popup-proxy-host.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ext/fg/js/popup-proxy-host.js b/ext/fg/js/popup-proxy-host.js index 793d3949..6f1c13c6 100644 --- a/ext/fg/js/popup-proxy-host.js +++ b/ext/fg/js/popup-proxy-host.js @@ -25,7 +25,6 @@ class PopupProxyHost { constructor() { this._popups = new Map(); - this._nextId = 0; this._apiReceiver = null; this._frameIdPromise = null; } @@ -76,7 +75,7 @@ class PopupProxyHost { // New unique id if (id === null) { - id = this._nextId++; + id = yomichan.generateId(16); } // Create new popup -- cgit v1.2.3 From b616bac66ed0735c1e7ebbaf1ceba20b081f1a6f Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sun, 15 Mar 2020 18:19:00 +0200 Subject: remove targetPopupId 'all' Make unset targetPopupId mean the same thing instead --- ext/fg/js/frontend.js | 2 +- ext/mixed/js/display.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index d7bc02cc..d6c5eac6 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -95,7 +95,7 @@ class Frontend extends TextScanner { onRuntimeMessage({action, params}, sender, callback) { const {targetPopupId} = params || {}; - if (targetPopupId !== 'all' && targetPopupId !== this.popup.id) { return; } + if (typeof targetPopupId !== 'undefined' && targetPopupId !== this.popup.id) { return; } const handler = this._runtimeMessageHandlers.get(action); if (typeof handler !== 'function') { return false; } diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 9a7a91f3..6898a6eb 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -851,7 +851,7 @@ class Display { } setPopupVisibleOverride(visible) { - return apiForward('popupSetVisibleOverride', {visible, targetPopupId: 'all'}); + return apiForward('popupSetVisibleOverride', {visible}); } setSpinnerVisible(visible) { -- cgit v1.2.3 From 93f7278586f7b943ae49c00cd14559a2f4b99561 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 23 Feb 2020 14:03:37 -0500 Subject: Update dictionary schema to support pitch accent data --- .../data/dictionary-term-meta-bank-v3-schema.json | 64 +++++++++++++++++++++- .../dictionaries/valid-dictionary1/tag_bank_3.json | 4 ++ .../valid-dictionary1/term_meta_bank_1.json | 36 +++++++++++- test/test-database.js | 9 +-- 4 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 test/data/dictionaries/valid-dictionary1/tag_bank_3.json diff --git a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json index 1cc0557f..8475db81 100644 --- a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json +++ b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json @@ -13,13 +13,71 @@ }, { "type": "string", - "enum": ["freq"], - "description": "Type of data. \"freq\" corresponds to frequency information." + "enum": ["freq", "pitch"], + "description": "Type of data. \"freq\" corresponds to frequency information; \"pitch\" corresponds to pitch information." }, { - "type": ["string", "number"], "description": "Data for the term/expression." } + ], + "oneOf": [ + { + "items": [ + {}, + {"enum": ["freq"]}, + { + "type": ["string", "number"], + "description": "Frequency information for the term or expression." + } + ] + }, + { + "items": [ + {}, + {"enum": ["pitch"]}, + { + "type": ["object"], + "description": "Pitch accent information for the term or expression.", + "required": [ + "reading", + "pitches" + ], + "additionalProperties": false, + "properties": { + "reading": { + "type": "string", + "description": "Reading for the term or expression." + }, + "pitches": { + "type": "array", + "description": "List of different pitch accent information for the term and reading combination.", + "additionalItems": { + "type": "object", + "required": [ + "position" + ], + "additionalProperties": false, + "properties": { + "position": { + "type": "integer", + "description": "Mora position of the pitch accent downstep. A value of 0 indicates that the word does not have a downstep (heiban).", + "minimum": 0 + }, + "tags": { + "type": "array", + "description": "List of tags for this pitch accent.", + "items": { + "type": "string", + "description": "Tag for this pitch accent. This typically corresponds to a certain type of part of speech." + } + } + } + } + } + } + } + ] + } ] } } \ No newline at end of file diff --git a/test/data/dictionaries/valid-dictionary1/tag_bank_3.json b/test/data/dictionaries/valid-dictionary1/tag_bank_3.json new file mode 100644 index 00000000..572221fe --- /dev/null +++ b/test/data/dictionaries/valid-dictionary1/tag_bank_3.json @@ -0,0 +1,4 @@ +[ + ["ptag1", "pcategory1", 0, "ptag1 notes", 0], + ["ptag2", "pcategory2", 0, "ptag2 notes", 0] +] \ No newline at end of file diff --git a/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json index 78096502..26922394 100644 --- a/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json @@ -1,5 +1,39 @@ [ ["打", "freq", 1], ["打つ", "freq", 2], - ["打ち込む", "freq", 3] + ["打ち込む", "freq", 3], + [ + "打ち込む", + "pitch", + { + "reading": "うちこむ", + "pitches": [ + {"position": 0}, + {"position": 3} + ] + } + ], + [ + "打ち込む", + "pitch", + { + "reading": "ぶちこむ", + "pitches": [ + {"position": 0}, + {"position": 3} + ] + } + ], + [ + "お手前", + "pitch", + { + "reading": "おてまえ", + "pitches": [ + {"position": 2, "tags": ["ptag1"]}, + {"position": 2, "tags": ["ptag2"]}, + {"position": 0, "tags": ["ptag2"]} + ] + } + ] ] \ No newline at end of file diff --git a/test/test-database.js b/test/test-database.js index 833aa75d..dbd67257 100644 --- a/test/test-database.js +++ b/test/test-database.js @@ -231,8 +231,8 @@ async function testDatabase1() { true ); vm.assert.deepStrictEqual(counts, { - counts: [{kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 3, tagMeta: 12}], - total: {kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 3, tagMeta: 12} + counts: [{kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 6, tagMeta: 14}], + total: {kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 6, tagMeta: 14} }); // Test find* functions @@ -648,9 +648,10 @@ async function testFindTermMetaBulk1(database, titles) { } ], expectedResults: { - total: 1, + total: 3, modes: [ - ['freq', 1] + ['freq', 1], + ['pitch', 2] ] } }, -- cgit v1.2.3 From 168bd72d0a5289646c78c57f6f36fe2aa1a194eb Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 25 Jan 2020 00:14:27 -0500 Subject: Update _appendMultiple to support general iterables --- ext/mixed/js/display-generator.js | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/ext/mixed/js/display-generator.js b/ext/mixed/js/display-generator.js index 41f7315a..d88b8648 100644 --- a/ext/mixed/js/display-generator.js +++ b/ext/mixed/js/display-generator.js @@ -301,22 +301,28 @@ class DisplayGenerator { } } - static _appendMultiple(container, createItem, detailsArray, fallback=[]) { + static _appendMultiple(container, createItem, detailsIterable, fallback=[]) { if (container === null) { return 0; } - const isArray = Array.isArray(detailsArray); - if (!isArray) { detailsArray = fallback; } + const multi = ( + detailsIterable !== null && + typeof detailsIterable === 'object' && + typeof detailsIterable[Symbol.iterator] !== 'undefined' + ); + if (!multi) { detailsIterable = fallback; } - container.dataset.multi = `${isArray}`; - container.dataset.count = `${detailsArray.length}`; - - for (const details of detailsArray) { + let count = 0; + for (const details of detailsIterable) { const item = createItem(details); if (item === null) { continue; } container.appendChild(item); + ++count; } - return detailsArray.length; + container.dataset.multi = `${multi}`; + container.dataset.count = `${count}`; + + return count; } static _appendFurigana(container, segments, addText) { -- cgit v1.2.3 From 9e8a22b08a9ea3e746c4c16f0a06beabd2dd7294 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 1 Mar 2020 14:04:54 -0500 Subject: Add support for different sections for term content --- ext/mixed/css/display-dark.css | 2 ++ ext/mixed/css/display-default.css | 2 ++ ext/mixed/css/display.css | 13 +++++++++++++ ext/mixed/display-templates.html | 4 +++- ext/mixed/js/display-generator.js | 10 ++++++++-- 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/ext/mixed/css/display-dark.css b/ext/mixed/css/display-dark.css index c9cd9f90..908d9cc5 100644 --- a/ext/mixed/css/display-dark.css +++ b/ext/mixed/css/display-dark.css @@ -19,6 +19,8 @@ body { background-color: #1e1e1e; color: #d4d4d4; } +h2 { border-bottom-color: #2f2f2f; } + .navigation-header { background-color: #1e1e1e; border-bottom-color: #2f2f2f; diff --git a/ext/mixed/css/display-default.css b/ext/mixed/css/display-default.css index 6eee43c4..e43e3742 100644 --- a/ext/mixed/css/display-default.css +++ b/ext/mixed/css/display-default.css @@ -19,6 +19,8 @@ body { background-color: #ffffff; color: #333333; } +h2 { border-bottom-color: #eeeeee; } + .navigation-header { background-color: #ffffff; border-bottom-color: #eeeeee; diff --git a/ext/mixed/css/display.css b/ext/mixed/css/display.css index 688a357c..51015057 100644 --- a/ext/mixed/css/display.css +++ b/ext/mixed/css/display.css @@ -65,6 +65,14 @@ ol, ul { height: 2.28571428em; /* 14px => 32px */ } +h2 { + font-size: 1.25em; + font-weight: normal; + margin: 0.25em 0 0; + border-bottom-width: 0.05714285714285714em; /* 14px * 1.25em => 1px */ + border-bottom-style: solid; +} + /* * Navigation */ @@ -422,6 +430,11 @@ button.action-button { display: inline; } +.term-entry-body[data-section-count="0"] .term-entry-body-section-header, +.term-entry-body[data-section-count="1"] .term-entry-body-section-header { + display: none; +} + /* * Kanji diff --git a/ext/mixed/display-templates.html b/ext/mixed/display-templates.html index 7ae51a62..837245cf 100644 --- a/ext/mixed/display-templates.html +++ b/ext/mixed/display-templates.html @@ -17,7 +17,9 @@
-
    +
    +

    Pitch Accents

      +
      
       
       
      @@ -36,6 +37,11 @@
       
       
       
      +
      +
      +
      +
      +
       
       
      -
      -
      +
      +