From 701f73440c661b19cecefeb02ce03dfa9db76fb3 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 8 Mar 2020 21:24:05 -0400 Subject: Add tests --- test/test-object-property-accessor.js | 287 ++++++++++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 test/test-object-property-accessor.js (limited to 'test') diff --git a/test/test-object-property-accessor.js b/test/test-object-property-accessor.js new file mode 100644 index 00000000..69e5dbdb --- /dev/null +++ b/test/test-object-property-accessor.js @@ -0,0 +1,287 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const assert = require('assert'); +const {VM} = require('./yomichan-vm'); + +const vm = new VM({}); +vm.execute('mixed/js/object-property-accessor.js'); +const ObjectPropertyAccessor = vm.get('ObjectPropertyAccessor'); + + +function createTestObject() { + return { + 0: null, + value1: { + value2: {}, + value3: [], + value4: null + }, + value5: [ + {}, + [], + null + ] + }; +} + + +function testGetProperty1() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const data = [ + [[], object], + [['0'], object['0']], + [['value1'], object.value1], + [['value1', 'value2'], object.value1.value2], + [['value1', 'value3'], object.value1.value3], + [['value1', 'value4'], object.value1.value4], + [['value5'], object.value5], + [['value5', 0], object.value5[0]], + [['value5', 1], object.value5[1]], + [['value5', 2], object.value5[2]] + ]; + + for (const [pathArray, expected] of data) { + assert.strictEqual(accessor.getProperty(pathArray), expected); + } +} + +function testGetProperty2() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const data = [ + [0], + ['0', 'invalid'], + ['invalid'], + ['value1', 'invalid'], + ['value1', 'value2', 'invalid'], + ['value1', 'value2', 0], + ['value1', 'value3', 'invalid'], + ['value1', 'value3', 0], + ['value1', 'value4', 'invalid'], + ['value1', 'value4', 0], + ['value5', 'length'], + ['value5', 0, 'invalid'], + ['value5', 0, 0], + ['value5', 1, 'invalid'], + ['value5', 1, 0], + ['value5', 2, 'invalid'], + ['value5', 2, 0], + ['value5', 2.5] + ]; + + for (const pathArray of data) { + assert.throws(() => accessor.getProperty(pathArray)); + } +} + + +function testSetProperty1() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const testValue = {}; + const data = [ + ['0'], + ['value1', 'value2'], + ['value1', 'value3'], + ['value1', 'value4'], + ['value1'], + ['value5', 0], + ['value5', 1], + ['value5', 2], + ['value5'] + ]; + + for (const pathArray of data) { + accessor.setProperty(pathArray, testValue); + assert.strictEqual(accessor.getProperty(pathArray), testValue); + } +} + +function testSetProperty2() { + const object = createTestObject(); + const accessor = new ObjectPropertyAccessor(object); + + const testValue = {}; + const data = [ + [0], + ['0', 'invalid'], + ['value1', 'value2', 0], + ['value1', 'value3', 'invalid'], + ['value1', 'value4', 'invalid'], + ['value1', 'value4', 0], + ['value5', 1, 'invalid'], + ['value5', 2, 'invalid'], + ['value5', 2, 0], + ['value5', 2.5] + ]; + + for (const pathArray of data) { + assert.throws(() => accessor.setProperty(pathArray, testValue)); + } +} + + +function testGetPathString1() { + const data = [ + [[], ''], + [[0], '[0]'], + [['escape\\'], '["escape\\\\"]'], + [['\'quote\''], '["\'quote\'"]'], + [['"quote"'], '["\\"quote\\""]'], + [['part1', 'part2'], 'part1.part2'], + [['part1', 'part2', 3], 'part1.part2[3]'], + [['part1', 'part2', '3'], 'part1.part2["3"]'], + [['part1', 'part2', '3part'], 'part1.part2["3part"]'], + [['part1', 'part2', '3part', 'part4'], 'part1.part2["3part"].part4'], + [['part1', 'part2', '3part', '4part'], 'part1.part2["3part"]["4part"]'] + ]; + + for (const [pathArray, expected] of data) { + assert.strictEqual(ObjectPropertyAccessor.getPathString(pathArray), expected); + } +} + +function testGetPathString2() { + const data = [ + [1.5], + [null] + ]; + + for (const pathArray of data) { + assert.throws(() => ObjectPropertyAccessor.getPathString(pathArray)); + } +} + + +function testGetPathArray1() { + const data = [ + ['', []], + ['[0]', [0]], + ['["escape\\\\"]', ['escape\\']], + ['["\'quote\'"]', ['\'quote\'']], + ['["\\"quote\\""]', ['"quote"']], + ['part1.part2', ['part1', 'part2']], + ['part1.part2[3]', ['part1', 'part2', 3]], + ['part1.part2["3"]', ['part1', 'part2', '3']], + ['part1.part2[\'3\']', ['part1', 'part2', '3']], + ['part1.part2["3part"]', ['part1', 'part2', '3part']], + ['part1.part2[\'3part\']', ['part1', 'part2', '3part']], + ['part1.part2["3part"].part4', ['part1', 'part2', '3part', 'part4']], + ['part1.part2[\'3part\'].part4', ['part1', 'part2', '3part', 'part4']], + ['part1.part2["3part"]["4part"]', ['part1', 'part2', '3part', '4part']], + ['part1.part2[\'3part\'][\'4part\']', ['part1', 'part2', '3part', '4part']] + ]; + + for (const [pathString, expected] of data) { + vm.assert.deepStrictEqual(ObjectPropertyAccessor.getPathArray(pathString), expected); + } +} + +function testGetPathArray2() { + const data = [ + ['?', 'Unexpected character: ?'], + ['.', 'Unexpected character: .'], + ['0', 'Unexpected character: 0'], + ['part1.[0]', 'Unexpected character: ['], + ['part1?', 'Unexpected character: ?'], + ['[part1]', 'Unexpected character: p'], + ['[0a]', 'Unexpected character: a'], + ['["part1"x]', 'Unexpected character: x'], + ['[\'part1\'x]', 'Unexpected character: x'], + ['["part1"]x', 'Unexpected character: x'], + ['[\'part1\']x', 'Unexpected character: x'], + ['part1..part2', 'Unexpected character: .'], + + ['[', 'Path not terminated correctly'], + ['part1.', 'Path not terminated correctly'], + ['part1[', 'Path not terminated correctly'], + ['part1["', 'Path not terminated correctly'], + ['part1[\'', 'Path not terminated correctly'], + ['part1[""', 'Path not terminated correctly'], + ['part1[\'\'', 'Path not terminated correctly'], + ['part1[0', 'Path not terminated correctly'], + ['part1[0].', 'Path not terminated correctly'] + ]; + + for (const [pathString, message] of data) { + assert.throws(() => ObjectPropertyAccessor.getPathArray(pathString), {message}); + } +} + + +function testHasProperty() { + const data = [ + [{}, 'invalid', false], + [{}, 0, false], + [{valid: 0}, 'valid', true], + [{null: 0}, null, false], + [[], 'invalid', false], + [[], 0, false], + [[0], 0, true], + [[0], null, false], + ['string', 0, false], + ['string', 'length', false], + ['string', null, false] + ]; + + for (const [object, property, expected] of data) { + assert.strictEqual(ObjectPropertyAccessor.hasProperty(object, property), expected); + } +} + +function testIsValidPropertyType() { + const data = [ + [{}, 'invalid', true], + [{}, 0, false], + [{valid: 0}, 'valid', true], + [{null: 0}, null, false], + [[], 'invalid', false], + [[], 0, true], + [[0], 0, true], + [[0], null, false], + ['string', 0, false], + ['string', 'length', false], + ['string', null, false] + ]; + + for (const [object, property, expected] of data) { + assert.strictEqual(ObjectPropertyAccessor.isValidPropertyType(object, property), expected); + } +} + + +function main() { + testGetProperty1(); + testGetProperty2(); + testSetProperty1(); + testSetProperty2(); + testGetPathString1(); + testGetPathString2(); + testGetPathArray1(); + testGetPathArray2(); + testHasProperty(); + testIsValidPropertyType(); +} + + +if (require.main === module) { main(); } -- cgit v1.2.3 From 7e1e7d59cd8f076b8ee07c354ed11724364cc9fa Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 13 Mar 2020 18:17:40 -0400 Subject: Add error message checking --- test/test-object-property-accessor.js | 72 +++++++++++++++++------------------ 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'test') diff --git a/test/test-object-property-accessor.js b/test/test-object-property-accessor.js index 69e5dbdb..fb912b21 100644 --- a/test/test-object-property-accessor.js +++ b/test/test-object-property-accessor.js @@ -68,28 +68,28 @@ function testGetProperty2() { const accessor = new ObjectPropertyAccessor(object); const data = [ - [0], - ['0', 'invalid'], - ['invalid'], - ['value1', 'invalid'], - ['value1', 'value2', 'invalid'], - ['value1', 'value2', 0], - ['value1', 'value3', 'invalid'], - ['value1', 'value3', 0], - ['value1', 'value4', 'invalid'], - ['value1', 'value4', 0], - ['value5', 'length'], - ['value5', 0, 'invalid'], - ['value5', 0, 0], - ['value5', 1, 'invalid'], - ['value5', 1, 0], - ['value5', 2, 'invalid'], - ['value5', 2, 0], - ['value5', 2.5] + [[0], 'Invalid path: [0]'], + [['0', 'invalid'], 'Invalid path: ["0"].invalid'], + [['invalid'], 'Invalid path: invalid'], + [['value1', 'invalid'], 'Invalid path: value1.invalid'], + [['value1', 'value2', 'invalid'], 'Invalid path: value1.value2.invalid'], + [['value1', 'value2', 0], 'Invalid path: value1.value2[0]'], + [['value1', 'value3', 'invalid'], 'Invalid path: value1.value3.invalid'], + [['value1', 'value3', 0], 'Invalid path: value1.value3[0]'], + [['value1', 'value4', 'invalid'], 'Invalid path: value1.value4.invalid'], + [['value1', 'value4', 0], 'Invalid path: value1.value4[0]'], + [['value5', 'length'], 'Invalid path: value5.length'], + [['value5', 0, 'invalid'], 'Invalid path: value5[0].invalid'], + [['value5', 0, 0], 'Invalid path: value5[0][0]'], + [['value5', 1, 'invalid'], 'Invalid path: value5[1].invalid'], + [['value5', 1, 0], 'Invalid path: value5[1][0]'], + [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], + [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2.5], 'Invalid index'] ]; - for (const pathArray of data) { - assert.throws(() => accessor.getProperty(pathArray)); + for (const [pathArray, message] of data) { + assert.throws(() => accessor.getProperty(pathArray), {message}); } } @@ -123,20 +123,20 @@ function testSetProperty2() { const testValue = {}; const data = [ - [0], - ['0', 'invalid'], - ['value1', 'value2', 0], - ['value1', 'value3', 'invalid'], - ['value1', 'value4', 'invalid'], - ['value1', 'value4', 0], - ['value5', 1, 'invalid'], - ['value5', 2, 'invalid'], - ['value5', 2, 0], - ['value5', 2.5] + [[0], 'Invalid path: [0]'], + [['0', 'invalid'], 'Invalid path: ["0"].invalid'], + [['value1', 'value2', 0], 'Invalid path: value1.value2[0]'], + [['value1', 'value3', 'invalid'], 'Invalid path: value1.value3.invalid'], + [['value1', 'value4', 'invalid'], 'Invalid path: value1.value4.invalid'], + [['value1', 'value4', 0], 'Invalid path: value1.value4[0]'], + [['value5', 1, 'invalid'], 'Invalid path: value5[1].invalid'], + [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], + [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2.5], 'Invalid index'] ]; - for (const pathArray of data) { - assert.throws(() => accessor.setProperty(pathArray, testValue)); + for (const [pathArray, message] of data) { + assert.throws(() => accessor.setProperty(pathArray, testValue), {message}); } } @@ -163,12 +163,12 @@ function testGetPathString1() { function testGetPathString2() { const data = [ - [1.5], - [null] + [[1.5], 'Invalid index'], + [[null], 'Invalid type: object'] ]; - for (const pathArray of data) { - assert.throws(() => ObjectPropertyAccessor.getPathString(pathArray)); + for (const [pathArray, message] of data) { + assert.throws(() => ObjectPropertyAccessor.getPathString(pathArray), {message}); } } -- cgit v1.2.3 From a267799cd91e6d7e23395abd110f2348413cad58 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 13 Mar 2020 18:20:22 -0400 Subject: Add some extra tests --- test/test-object-property-accessor.js | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test') diff --git a/test/test-object-property-accessor.js b/test/test-object-property-accessor.js index fb912b21..47d2e451 100644 --- a/test/test-object-property-accessor.js +++ b/test/test-object-property-accessor.js @@ -85,6 +85,7 @@ function testGetProperty2() { [['value5', 1, 0], 'Invalid path: value5[1][0]'], [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2, 0, 'invalid'], 'Invalid path: value5[2][0]'], [['value5', 2.5], 'Invalid index'] ]; @@ -132,6 +133,7 @@ function testSetProperty2() { [['value5', 1, 'invalid'], 'Invalid path: value5[1].invalid'], [['value5', 2, 'invalid'], 'Invalid path: value5[2].invalid'], [['value5', 2, 0], 'Invalid path: value5[2][0]'], + [['value5', 2, 0, 'invalid'], 'Invalid path: value5[2][0]'], [['value5', 2.5], 'Invalid index'] ]; -- cgit v1.2.3 From 487d4b239b88fd57fefb0ec3a9d1bd0d25984660 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 14 Mar 2020 17:48:40 -0400 Subject: Add unit tests --- package.json | 2 +- test/test-japanese.js | 373 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 test/test-japanese.js (limited to 'test') diff --git a/package.json b/package.json index 23f0eb25..1b2104f4 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "scripts": { "test": "npm run test-lint && npm run test-code", "test-lint": "eslint . && node ./test/lint/global-declarations.js", - "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js" + "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js" }, "repository": { "type": "git", diff --git a/test/test-japanese.js b/test/test-japanese.js new file mode 100644 index 00000000..78f63c0b --- /dev/null +++ b/test/test-japanese.js @@ -0,0 +1,373 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const assert = require('assert'); +const {VM} = require('./yomichan-vm'); + +const vm = new VM(); +vm.execute([ + 'mixed/lib/wanakana.min.js', + 'bg/js/japanese.js' +]); +const jp = vm.get('jp'); + + +function testIsCodePointKanji() { + const data = [ + ['力方', true], + ['\u53f1\u{20b9f}', true], + ['かたカタ々kata、。?,.?', false] + ]; + + for (const [characters, expected] of data) { + for (const character of characters) { + const codePoint = character.codePointAt(0); + const actual = jp.isCodePointKanji(codePoint); + assert.strictEqual(actual, expected, `isCodePointKanji failed for ${character} (\\u{${codePoint.toString(16)}})`); + } + } +} + +function testIsCodePointKana() { + const data = [ + ['かたカタ', true], + ['力方々kata、。?,.?', false], + ['\u53f1\u{20b9f}', false] + ]; + + for (const [characters, expected] of data) { + for (const character of characters) { + const codePoint = character.codePointAt(0); + const actual = jp.isCodePointKana(codePoint); + assert.strictEqual(actual, expected, `isCodePointKana failed for ${character} (\\u{${codePoint.toString(16)}})`); + } + } +} + +function testIsCodePointJapanese() { + const data = [ + ['かたカタ力方々、。?', true], + ['\u53f1\u{20b9f}', true], + ['kata,.?', false] + ]; + + for (const [characters, expected] of data) { + for (const character of characters) { + const codePoint = character.codePointAt(0); + const actual = jp.isCodePointJapanese(codePoint); + assert.strictEqual(actual, expected, `isCodePointJapanese failed for ${character} (\\u{${codePoint.toString(16)}})`); + } + } +} + +function testIsStringEntirelyKana() { + const data = [ + ['かたかな', true], + ['カタカナ', true], + ['ひらがな', true], + ['ヒラガナ', true], + ['カタカナひらがな', true], + ['かたカタ力方々、。?', false], + ['\u53f1\u{20b9f}', false], + ['kata,.?', false], + ['かたカタ力方々、。?invalid', false], + ['\u53f1\u{20b9f}invalid', false], + ['kata,.?かた', false] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.isStringEntirelyKana(string), expected); + } +} + +function testIsStringPartiallyJapanese() { + const data = [ + ['かたかな', true], + ['カタカナ', true], + ['ひらがな', true], + ['ヒラガナ', true], + ['カタカナひらがな', true], + ['かたカタ力方々、。?', true], + ['\u53f1\u{20b9f}', true], + ['kata,.?', false], + ['かたカタ力方々、。?invalid', true], + ['\u53f1\u{20b9f}invalid', true], + ['kata,.?かた', true] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.isStringPartiallyJapanese(string), expected); + } +} + +function testConvertKatakanaToHiragana() { + const data = [ + ['かたかな', 'かたかな'], + ['ひらがな', 'ひらがな'], + ['カタカナ', 'かたかな'], + ['ヒラガナ', 'ひらがな'], + ['カタカナかたかな', 'かたかなかたかな'], + ['ヒラガナひらがな', 'ひらがなひらがな'], + ['chikaraちからチカラ力', 'chikaraちからちから力'], + ['katakana', 'katakana'], + ['hiragana', 'hiragana'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertKatakanaToHiragana(string), expected); + } +} + +function testConvertHiraganaToKatakana() { + const data = [ + ['かたかな', 'カタカナ'], + ['ひらがな', 'ヒラガナ'], + ['カタカナ', 'カタカナ'], + ['ヒラガナ', 'ヒラガナ'], + ['カタカナかたかな', 'カタカナカタカナ'], + ['ヒラガナひらがな', 'ヒラガナヒラガナ'], + ['chikaraちからチカラ力', 'chikaraチカラチカラ力'], + ['katakana', 'katakana'], + ['hiragana', 'hiragana'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertHiraganaToKatakana(string), expected); + } +} + +function testConvertToRomaji() { + const data = [ + ['かたかな', 'katakana'], + ['ひらがな', 'hiragana'], + ['カタカナ', 'katakana'], + ['ヒラガナ', 'hiragana'], + ['カタカナかたかな', 'katakanakatakana'], + ['ヒラガナひらがな', 'hiraganahiragana'], + ['chikaraちからチカラ力', 'chikarachikarachikara力'], + ['katakana', 'katakana'], + ['hiragana', 'hiragana'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertToRomaji(string), expected); + } +} + +function testConvertReading() { + const data = [ + [['アリガトウ', 'アリガトウ', 'hiragana'], 'ありがとう'], + [['アリガトウ', 'アリガトウ', 'katakana'], 'アリガトウ'], + [['アリガトウ', 'アリガトウ', 'romaji'], 'arigatou'], + [['アリガトウ', 'アリガトウ', 'none'], null], + [['アリガトウ', 'アリガトウ', 'default'], 'アリガトウ'], + + [['ありがとう', 'ありがとう', 'hiragana'], 'ありがとう'], + [['ありがとう', 'ありがとう', 'katakana'], 'アリガトウ'], + [['ありがとう', 'ありがとう', 'romaji'], 'arigatou'], + [['ありがとう', 'ありがとう', 'none'], null], + [['ありがとう', 'ありがとう', 'default'], 'ありがとう'], + + [['有り難う', 'ありがとう', 'hiragana'], 'ありがとう'], + [['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'], + [['有り難う', 'ありがとう', 'romaji'], 'arigatou'], + [['有り難う', 'ありがとう', 'none'], null], + [['有り難う', 'ありがとう', 'default'], 'ありがとう'] + ]; + + for (const [[expressionFragment, readingFragment, readingMode], expected] of data) { + assert.strictEqual(jp.convertReading(expressionFragment, readingFragment, readingMode), expected); + } +} + +function testConvertNumericToFullWidth() { + const data = [ + ['0123456789', '0123456789'], + ['abcdefghij', 'abcdefghij'], + ['カタカナ', 'カタカナ'], + ['ひらがな', 'ひらがな'] + ]; + + for (const [string, expected] of data) { + assert.strictEqual(jp.convertNumericToFullWidth(string), expected); + } +} + +function testConvertHalfWidthKanaToFullWidth() { + const data = [ + ['0123456789', '0123456789'], + ['abcdefghij', 'abcdefghij'], + ['カタカナ', 'カタカナ'], + ['ひらがな', 'ひらがな'], + ['カキ', 'カキ', [1, 1]], + ['ガキ', 'ガキ', [2, 1]], + ['ニホン', 'ニホン', [1, 1, 1]], + ['ニッポン', 'ニッポン', [1, 1, 2, 1]] + ]; + + for (const [string, expected, expectedSourceMapping] of data) { + const sourceMapping = new Array(string.length).fill(1); + const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null); + const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping); + assert.strictEqual(actual1, expected); + assert.strictEqual(actual2, expected); + if (Array.isArray(expectedSourceMapping)) { + vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + } + } +} + +function testConvertAlphabeticToKana() { + const data = [ + ['0123456789', '0123456789'], + ['abcdefghij', 'あbcでfgひj', [1, 1, 1, 2, 1, 1, 2, 1]], + ['ABCDEFGHIJ', 'あbcでfgひj', [1, 1, 1, 2, 1, 1, 2, 1]], // wanakana.toHiragana converts text to lower case + ['カタカナ', 'カタカナ'], + ['ひらがな', 'ひらがな'], + ['chikara', 'ちから', [3, 2, 2]], + ['CHIKARA', 'ちから', [3, 2, 2]] + ]; + + for (const [string, expected, expectedSourceMapping] of data) { + const sourceMapping = new Array(string.length).fill(1); + const actual1 = jp.convertAlphabeticToKana(string, null); + const actual2 = jp.convertAlphabeticToKana(string, sourceMapping); + assert.strictEqual(actual1, expected); + assert.strictEqual(actual2, expected); + if (Array.isArray(expectedSourceMapping)) { + vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + } + } +} + +function testDistributeFurigana() { + const data = [ + [ + ['有り難う', 'ありがとう'], + [ + {text: '有', furigana: 'あ'}, + {text: 'り'}, + {text: '難', furigana: 'がと'}, + {text: 'う'} + ] + ], + [ + ['方々', 'かたがた'], + [ + {text: '方々', furigana: 'かたがた'} + ] + ], + [ + ['お祝い', 'おいわい'], + [ + {text: 'お'}, + {text: '祝', furigana: 'いわ'}, + {text: 'い'} + ] + ], + [ + ['美味しい', 'おいしい'], + [ + {text: '美味', furigana: 'おい'}, + {text: 'しい'} + ] + ], + [ + ['食べ物', 'たべもの'], + [ + {text: '食', furigana: 'た'}, + {text: 'べ'}, + {text: '物', furigana: 'もの'} + ] + ], + [ + ['試し切り', 'ためしぎり'], + [ + {text: '試', furigana: 'ため'}, + {text: 'し'}, + {text: '切', furigana: 'ぎ'}, + {text: 'り'} + ] + ], + // Ambiguous + [ + ['飼い犬', 'かいいぬ'], + [ + {text: '飼い犬', furigana: 'かいいぬ'} + ] + ], + [ + ['長い間', 'ながいあいだ'], + [ + {text: '長い間', furigana: 'ながいあいだ'} + ] + ] + ]; + + for (const [[expression, reading], expected] of data) { + const actual = jp.distributeFurigana(expression, reading); + vm.assert.deepStrictEqual(actual, expected); + } +} + +function testDistributeFuriganaInflected() { + const data = [ + [ + ['美味しい', 'おいしい', '美味しかた'], + [ + {text: '美味', furigana: 'おい'}, + {text: 'し'}, + {text: 'かた'} + ] + ], + [ + ['食べる', 'たべる', '食べた'], + [ + {text: '食', furigana: 'た'}, + {text: 'べ'}, + {text: 'た'} + ] + ] + ]; + + for (const [[expression, reading, source], expected] of data) { + const actual = jp.distributeFuriganaInflected(expression, reading, source); + vm.assert.deepStrictEqual(actual, expected); + } +} + + +function main() { + testIsCodePointKanji(); + testIsCodePointKana(); + testIsCodePointJapanese(); + testIsStringEntirelyKana(); + testIsStringPartiallyJapanese(); + testConvertKatakanaToHiragana(); + testConvertHiraganaToKatakana(); + testConvertToRomaji(); + testConvertReading(); + testConvertNumericToFullWidth(); + testConvertHalfWidthKanaToFullWidth(); + testConvertAlphabeticToKana(); + testDistributeFurigana(); + testDistributeFuriganaInflected(); +} + + +if (require.main === module) { main(); } -- cgit v1.2.3 From 77a2cc60e9a4a89da354cadb1bf060204ee3b951 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 21 Mar 2020 13:18:34 -0400 Subject: Move basic string/character testing functions into a mixed/js/japanese.js --- ext/bg/background.html | 1 + ext/bg/js/japanese.js | 106 +++------------------------------------- ext/bg/search.html | 1 + ext/bg/settings.html | 1 + ext/mixed/js/japanese.js | 124 +++++++++++++++++++++++++++++++++++++++++++++++ test/test-japanese.js | 1 + 6 files changed, 135 insertions(+), 99 deletions(-) create mode 100644 ext/mixed/js/japanese.js (limited to 'test') diff --git a/ext/bg/background.html b/ext/bg/background.html index 44abe8fd..f7cf6e55 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -20,6 +20,7 @@ + diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index d2a577e6..c5873cf1 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -17,10 +17,11 @@ */ /* global + * jp * wanakana */ -const jp = (() => { +(() => { const HALFWIDTH_KATAKANA_MAPPING = new Map([ ['ヲ', 'ヲヺ-'], ['ァ', 'ァ--'], @@ -80,101 +81,13 @@ const jp = (() => { ['ン', 'ン--'] ]); - const HIRAGANA_RANGE = [0x3040, 0x309f]; - const KATAKANA_RANGE = [0x30a0, 0x30ff]; - const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; - - const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; - const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; - const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; - const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ - CJK_UNIFIED_IDEOGRAPHS_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, - CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, - CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE - ]; - const ITERATION_MARK_CODE_POINT = 0x3005; - // Japanese character ranges, roughly ordered in order of expected frequency - const JAPANESE_RANGES = [ - HIRAGANA_RANGE, - KATAKANA_RANGE, - - ...CJK_UNIFIED_IDEOGRAPHS_RANGES, - - [0xff66, 0xff9f], // Halfwidth katakana - - [0x30fb, 0x30fc], // Katakana punctuation - [0xff61, 0xff65], // Kana punctuation - [0x3000, 0x303f], // CJK punctuation - - [0xff10, 0xff19], // Fullwidth numbers - [0xff21, 0xff3a], // Fullwidth upper case Latin letters - [0xff41, 0xff5a], // Fullwidth lower case Latin letters - - [0xff01, 0xff0f], // Fullwidth punctuation 1 - [0xff1a, 0xff1f], // Fullwidth punctuation 2 - [0xff3b, 0xff3f], // Fullwidth punctuation 3 - [0xff5b, 0xff60], // Fullwidth punctuation 4 - [0xffe0, 0xffee] // Currency markers - ]; - - - // Character code testing functions - - function isCodePointKanji(codePoint) { - return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); - } - - function isCodePointKana(codePoint) { - return isCodePointInRanges(codePoint, KANA_RANGES); - } - - function isCodePointJapanese(codePoint) { - return isCodePointInRanges(codePoint, JAPANESE_RANGES); - } - function isCodePointInRanges(codePoint, ranges) { - for (const [min, max] of ranges) { - if (codePoint >= min && codePoint <= max) { - return true; - } - } - return false; - } + // Existing functions - - // String testing functions - - function isStringEntirelyKana(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (!isCodePointKana(c.codePointAt(0))) { - return false; - } - } - return true; - } - - function isStringPartiallyJapanese(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (isCodePointJapanese(c.codePointAt(0))) { - return true; - } - } - return false; - } + const isCodePointKanji = jp.isCodePointKanji; + const isStringEntirelyKana = jp.isStringEntirelyKana; // Conversion functions @@ -469,12 +382,7 @@ const jp = (() => { // Exports - return { - isCodePointKanji, - isCodePointKana, - isCodePointJapanese, - isStringEntirelyKana, - isStringPartiallyJapanese, + Object.assign(jp, { convertKatakanaToHiragana, convertHiraganaToKatakana, convertToRomaji, @@ -484,5 +392,5 @@ const jp = (() => { convertAlphabeticToKana, distributeFurigana, distributeFuriganaInflected - }; + }); })(); diff --git a/ext/bg/search.html b/ext/bg/search.html index f4c1a737..eacc1893 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -74,6 +74,7 @@ + diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 0db76d71..cfe20be4 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -1088,6 +1088,7 @@ + diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js new file mode 100644 index 00000000..61a247b2 --- /dev/null +++ b/ext/mixed/js/japanese.js @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const jp = (() => { + const HIRAGANA_RANGE = [0x3040, 0x309f]; + const KATAKANA_RANGE = [0x30a0, 0x30ff]; + const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; + + const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; + const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; + const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; + const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ + CJK_UNIFIED_IDEOGRAPHS_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE + ]; + + // Japanese character ranges, roughly ordered in order of expected frequency + const JAPANESE_RANGES = [ + HIRAGANA_RANGE, + KATAKANA_RANGE, + + ...CJK_UNIFIED_IDEOGRAPHS_RANGES, + + [0xff66, 0xff9f], // Halfwidth katakana + + [0x30fb, 0x30fc], // Katakana punctuation + [0xff61, 0xff65], // Kana punctuation + [0x3000, 0x303f], // CJK punctuation + + [0xff10, 0xff19], // Fullwidth numbers + [0xff21, 0xff3a], // Fullwidth upper case Latin letters + [0xff41, 0xff5a], // Fullwidth lower case Latin letters + + [0xff01, 0xff0f], // Fullwidth punctuation 1 + [0xff1a, 0xff1f], // Fullwidth punctuation 2 + [0xff3b, 0xff3f], // Fullwidth punctuation 3 + [0xff5b, 0xff60], // Fullwidth punctuation 4 + [0xffe0, 0xffee] // Currency markers + ]; + + + // Character code testing functions + + function isCodePointKanji(codePoint) { + return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); + } + + function isCodePointKana(codePoint) { + return isCodePointInRanges(codePoint, KANA_RANGES); + } + + function isCodePointJapanese(codePoint) { + return isCodePointInRanges(codePoint, JAPANESE_RANGES); + } + + function isCodePointInRanges(codePoint, ranges) { + for (const [min, max] of ranges) { + if (codePoint >= min && codePoint <= max) { + return true; + } + } + return false; + } + + + // String testing functions + + function isStringEntirelyKana(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (!isCodePointKana(c.codePointAt(0))) { + return false; + } + } + return true; + } + + function isStringPartiallyJapanese(str) { + if (str.length === 0) { return false; } + for (const c of str) { + if (isCodePointJapanese(c.codePointAt(0))) { + return true; + } + } + return false; + } + + + // Exports + + return { + isCodePointKanji, + isCodePointKana, + isCodePointJapanese, + isStringEntirelyKana, + isStringPartiallyJapanese + }; +})(); diff --git a/test/test-japanese.js b/test/test-japanese.js index 78f63c0b..32e4d176 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -22,6 +22,7 @@ const {VM} = require('./yomichan-vm'); const vm = new VM(); vm.execute([ 'mixed/lib/wanakana.min.js', + 'mixed/js/japanese.js', 'bg/js/japanese.js' ]); const jp = vm.get('jp'); -- cgit v1.2.3 From 780d23b749325da0a95aa9cc7898df19f2ac1b31 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 21 Mar 2020 14:12:22 -0400 Subject: Add more tests for convertReading --- test/test-japanese.js | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/test-japanese.js b/test/test-japanese.js index 32e4d176..c5d220e7 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -188,7 +188,47 @@ function testConvertReading() { [['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'], [['有り難う', 'ありがとう', 'romaji'], 'arigatou'], [['有り難う', 'ありがとう', 'none'], null], - [['有り難う', 'ありがとう', 'default'], 'ありがとう'] + [['有り難う', 'ありがとう', 'default'], 'ありがとう'], + + // Cases with falsy readings + + [['ありがとう', '', 'hiragana'], ''], + [['ありがとう', '', 'katakana'], ''], + [['ありがとう', '', 'romaji'], 'arigatou'], + [['ありがとう', '', 'none'], null], + [['ありがとう', '', 'default'], ''], + + [['ありがとう', null, 'hiragana'], ''], + [['ありがとう', null, 'katakana'], ''], + [['ありがとう', null, 'romaji'], 'arigatou'], + [['ありがとう', null, 'none'], null], + [['ありがとう', null, 'default'], null], + + [['ありがとう', void 0, 'hiragana'], ''], + [['ありがとう', void 0, 'katakana'], ''], + [['ありがとう', void 0, 'romaji'], 'arigatou'], + [['ありがとう', void 0, 'none'], null], + [['ありがとう', void 0, 'default'], void 0], + + // Cases with falsy readings and kanji expressions + + [['有り難う', '', 'hiragana'], ''], + [['有り難う', '', 'katakana'], ''], + [['有り難う', '', 'romaji'], ''], + [['有り難う', '', 'none'], null], + [['有り難う', '', 'default'], ''], + + [['有り難う', null, 'hiragana'], ''], + [['有り難う', null, 'katakana'], ''], + [['有り難う', null, 'romaji'], null], + [['有り難う', null, 'none'], null], + [['有り難う', null, 'default'], null], + + [['有り難う', void 0, 'hiragana'], ''], + [['有り難う', void 0, 'katakana'], ''], + [['有り難う', void 0, 'romaji'], void 0], + [['有り難う', void 0, 'none'], null], + [['有り難う', void 0, 'default'], void 0] ]; for (const [[expressionFragment, readingFragment, readingMode], expected] of data) { -- cgit v1.2.3 From 93f7278586f7b943ae49c00cd14559a2f4b99561 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 23 Feb 2020 14:03:37 -0500 Subject: Update dictionary schema to support pitch accent data --- .../data/dictionary-term-meta-bank-v3-schema.json | 64 +++++++++++++++++++++- .../dictionaries/valid-dictionary1/tag_bank_3.json | 4 ++ .../valid-dictionary1/term_meta_bank_1.json | 36 +++++++++++- test/test-database.js | 9 +-- 4 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 test/data/dictionaries/valid-dictionary1/tag_bank_3.json (limited to 'test') diff --git a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json index 1cc0557f..8475db81 100644 --- a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json +++ b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json @@ -13,13 +13,71 @@ }, { "type": "string", - "enum": ["freq"], - "description": "Type of data. \"freq\" corresponds to frequency information." + "enum": ["freq", "pitch"], + "description": "Type of data. \"freq\" corresponds to frequency information; \"pitch\" corresponds to pitch information." }, { - "type": ["string", "number"], "description": "Data for the term/expression." } + ], + "oneOf": [ + { + "items": [ + {}, + {"enum": ["freq"]}, + { + "type": ["string", "number"], + "description": "Frequency information for the term or expression." + } + ] + }, + { + "items": [ + {}, + {"enum": ["pitch"]}, + { + "type": ["object"], + "description": "Pitch accent information for the term or expression.", + "required": [ + "reading", + "pitches" + ], + "additionalProperties": false, + "properties": { + "reading": { + "type": "string", + "description": "Reading for the term or expression." + }, + "pitches": { + "type": "array", + "description": "List of different pitch accent information for the term and reading combination.", + "additionalItems": { + "type": "object", + "required": [ + "position" + ], + "additionalProperties": false, + "properties": { + "position": { + "type": "integer", + "description": "Mora position of the pitch accent downstep. A value of 0 indicates that the word does not have a downstep (heiban).", + "minimum": 0 + }, + "tags": { + "type": "array", + "description": "List of tags for this pitch accent.", + "items": { + "type": "string", + "description": "Tag for this pitch accent. This typically corresponds to a certain type of part of speech." + } + } + } + } + } + } + } + ] + } ] } } \ No newline at end of file diff --git a/test/data/dictionaries/valid-dictionary1/tag_bank_3.json b/test/data/dictionaries/valid-dictionary1/tag_bank_3.json new file mode 100644 index 00000000..572221fe --- /dev/null +++ b/test/data/dictionaries/valid-dictionary1/tag_bank_3.json @@ -0,0 +1,4 @@ +[ + ["ptag1", "pcategory1", 0, "ptag1 notes", 0], + ["ptag2", "pcategory2", 0, "ptag2 notes", 0] +] \ No newline at end of file diff --git a/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json index 78096502..26922394 100644 --- a/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json @@ -1,5 +1,39 @@ [ ["打", "freq", 1], ["打つ", "freq", 2], - ["打ち込む", "freq", 3] + ["打ち込む", "freq", 3], + [ + "打ち込む", + "pitch", + { + "reading": "うちこむ", + "pitches": [ + {"position": 0}, + {"position": 3} + ] + } + ], + [ + "打ち込む", + "pitch", + { + "reading": "ぶちこむ", + "pitches": [ + {"position": 0}, + {"position": 3} + ] + } + ], + [ + "お手前", + "pitch", + { + "reading": "おてまえ", + "pitches": [ + {"position": 2, "tags": ["ptag1"]}, + {"position": 2, "tags": ["ptag2"]}, + {"position": 0, "tags": ["ptag2"]} + ] + } + ] ] \ No newline at end of file diff --git a/test/test-database.js b/test/test-database.js index 833aa75d..dbd67257 100644 --- a/test/test-database.js +++ b/test/test-database.js @@ -231,8 +231,8 @@ async function testDatabase1() { true ); vm.assert.deepStrictEqual(counts, { - counts: [{kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 3, tagMeta: 12}], - total: {kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 3, tagMeta: 12} + counts: [{kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 6, tagMeta: 14}], + total: {kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 6, tagMeta: 14} }); // Test find* functions @@ -648,9 +648,10 @@ async function testFindTermMetaBulk1(database, titles) { } ], expectedResults: { - total: 1, + total: 3, modes: [ - ['freq', 1] + ['freq', 1], + ['pitch', 2] ] } }, -- cgit v1.2.3 From 0d80fcdf86745da133e4510eeea809a4eeafe120 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 28 Mar 2020 10:47:02 -0400 Subject: Move Japanese utility functions out of display-generator.js --- ext/mixed/js/display-generator.js | 38 +++++---------------------- ext/mixed/js/japanese.js | 26 +++++++++++++++++- test/test-japanese.js | 55 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 33 deletions(-) (limited to 'test') diff --git a/ext/mixed/js/display-generator.js b/ext/mixed/js/display-generator.js index 326f3f54..90361328 100644 --- a/ext/mixed/js/display-generator.js +++ b/ext/mixed/js/display-generator.js @@ -305,7 +305,7 @@ class DisplayGenerator { createPitch(details) { const {expressions, reading, position, tags} = details; - const morae = DisplayGenerator._jpGetKanaMorae(reading); + const morae = jp.getKanaMorae(reading); const node = this._templateHandler.instantiate('term-pitch-accent'); @@ -324,8 +324,8 @@ class DisplayGenerator { n = node.querySelector('.term-pitch-accent-characters'); for (let i = 0, ii = morae.length; i < ii; ++i) { const mora = morae[i]; - const highPitch = DisplayGenerator._jpIsMoraPitchHigh(i, position); - const highPitchNext = DisplayGenerator._jpIsMoraPitchHigh(i + 1, position); + const highPitch = jp.isMoraPitchHigh(i, position); + const highPitchNext = jp.isMoraPitchHigh(i + 1, position); const n1 = this._templateHandler.instantiate('term-pitch-accent-character'); const n2 = n1.querySelector('.term-pitch-accent-character-inner'); @@ -358,8 +358,8 @@ class DisplayGenerator { const pathPoints = []; for (let i = 0; i < ii; ++i) { - const highPitch = DisplayGenerator._jpIsMoraPitchHigh(i, position); - const highPitchNext = DisplayGenerator._jpIsMoraPitchHigh(i + 1, position); + const highPitch = jp.isMoraPitchHigh(i, position); + const highPitchNext = jp.isMoraPitchHigh(i + 1, position); const graphic = (highPitch && !highPitchNext ? '#term-pitch-accent-graph-dot-downstep' : '#term-pitch-accent-graph-dot'); const x = `${i * 50 + 25}`; const y = highPitch ? '25' : '75'; @@ -376,7 +376,7 @@ class DisplayGenerator { pathPoints.splice(0, ii - 1); { - const highPitch = DisplayGenerator._jpIsMoraPitchHigh(ii, position); + const highPitch = jp.isMoraPitchHigh(ii, position); const x = `${ii * 50 + 25}`; const y = highPitch ? '25' : '75'; const use = document.createElementNS(svgns, 'use'); @@ -532,30 +532,4 @@ class DisplayGenerator { return true; } - - static _jpGetKanaMorae(text) { - // This function splits Japanese kana reading into its individual mora - // components. It is assumed that the text is well-formed. - const smallKanaSet = DisplayGenerator._smallKanaSet; - const morae = []; - let i; - for (const c of text) { - if (smallKanaSet.has(c) && (i = morae.length) > 0) { - morae[i - 1] += c; - } else { - morae.push(c); - } - } - return morae; - } - - static _jpCreateSmallKanaSet() { - return new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ')); - } - - static _jpIsMoraPitchHigh(moraIndex, pitchAccentPosition) { - return pitchAccentPosition === 0 ? (moraIndex > 0) : (moraIndex < pitchAccentPosition); - } } - -DisplayGenerator._smallKanaSet = DisplayGenerator._jpCreateSmallKanaSet(); diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index 61a247b2..e6b9a8a0 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -64,6 +64,8 @@ const jp = (() => { [0xffe0, 0xffee] // Currency markers ]; + const SMALL_KANA_SET = new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ')); + // Character code testing functions @@ -112,6 +114,26 @@ const jp = (() => { } + // Mora functions + + function isMoraPitchHigh(moraIndex, pitchAccentPosition) { + return pitchAccentPosition === 0 ? (moraIndex > 0) : (moraIndex < pitchAccentPosition); + } + + function getKanaMorae(text) { + const morae = []; + let i; + for (const c of text) { + if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) { + morae[i - 1] += c; + } else { + morae.push(c); + } + } + return morae; + } + + // Exports return { @@ -119,6 +141,8 @@ const jp = (() => { isCodePointKana, isCodePointJapanese, isStringEntirelyKana, - isStringPartiallyJapanese + isStringPartiallyJapanese, + isMoraPitchHigh, + getKanaMorae }; })(); diff --git a/test/test-japanese.js b/test/test-japanese.js index c5d220e7..eab632bf 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -392,6 +392,59 @@ function testDistributeFuriganaInflected() { } } +function testIsMoraPitchHigh() { + const data = [ + [[0, 0], false], + [[1, 0], true], + [[2, 0], true], + [[3, 0], true], + + [[0, 1], true], + [[1, 1], false], + [[2, 1], false], + [[3, 1], false], + + [[0, 2], true], + [[1, 2], true], + [[2, 2], false], + [[3, 2], false], + + [[0, 3], true], + [[1, 3], true], + [[2, 3], true], + [[3, 3], false], + + [[0, 4], true], + [[1, 4], true], + [[2, 4], true], + [[3, 4], true] + ]; + + for (const [[moraIndex, pitchAccentPosition], expected] of data) { + const actual = jp.isMoraPitchHigh(moraIndex, pitchAccentPosition); + assert.strictEqual(actual, expected); + } +} + +function testGetKanaMorae() { + const data = [ + ['かこ', ['か', 'こ']], + ['かっこ', ['か', 'っ', 'こ']], + ['カコ', ['カ', 'コ']], + ['カッコ', ['カ', 'ッ', 'コ']], + ['コート', ['コ', 'ー', 'ト']], + ['ちゃんと', ['ちゃ', 'ん', 'と']], + ['とうきょう', ['と', 'う', 'きょ', 'う']], + ['ぎゅう', ['ぎゅ', 'う']], + ['ディスコ', ['ディ', 'ス', 'コ']] + ]; + + for (const [text, expected] of data) { + const actual = jp.getKanaMorae(text); + vm.assert.deepStrictEqual(actual, expected); + } +} + function main() { testIsCodePointKanji(); @@ -408,6 +461,8 @@ function main() { testConvertAlphabeticToKana(); testDistributeFurigana(); testDistributeFuriganaInflected(); + testIsMoraPitchHigh(); + testGetKanaMorae(); } -- cgit v1.2.3 From ae84d13757a98e640c8d62f8d856cecbd84dd66f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 28 Mar 2020 17:51:58 -0400 Subject: Create simplified source map class --- ext/bg/background.html | 1 + ext/bg/js/japanese.js | 31 +++++------- ext/bg/js/text-source-map.js | 115 +++++++++++++++++++++++++++++++++++++++++++ ext/bg/js/translator.js | 30 ++--------- test/test-japanese.js | 18 ++++--- 5 files changed, 143 insertions(+), 52 deletions(-) create mode 100644 ext/bg/js/text-source-map.js (limited to 'test') diff --git a/ext/bg/background.html b/ext/bg/background.html index f7cf6e55..e456717e 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -38,6 +38,7 @@ + diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index c5873cf1..2a2b39fd 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -158,9 +158,8 @@ return result; } - function convertHalfWidthKanaToFullWidth(text, sourceMapping) { + function convertHalfWidthKanaToFullWidth(text, sourceMap=null) { let result = ''; - const hasSourceMapping = Array.isArray(sourceMapping); // This function is safe to use charCodeAt instead of codePointAt, since all // the relevant characters are represented with a single UTF-16 character code. @@ -192,10 +191,8 @@ } } - if (hasSourceMapping && index > 0) { - index = result.length; - const v = sourceMapping.splice(index + 1, 1)[0]; - sourceMapping[index] += v; + if (sourceMap !== null && index > 0) { + sourceMap.combine(result.length, 1); } result += c2; } @@ -203,7 +200,7 @@ return result; } - function convertAlphabeticToKana(text, sourceMapping) { + function convertAlphabeticToKana(text, sourceMap=null) { let part = ''; let result = ''; @@ -222,7 +219,7 @@ c = 0x2d; // '-' } else { if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMapping, result.length); + result += convertAlphabeticPartToKana(part, sourceMap, result.length); part = ''; } result += char; @@ -232,17 +229,16 @@ } if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMapping, result.length); + result += convertAlphabeticPartToKana(part, sourceMap, result.length); } return result; } - function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) { + function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) { const result = wanakana.toHiragana(text); // Generate source mapping - if (Array.isArray(sourceMapping)) { - if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; } + if (sourceMap !== null) { let i = 0; let resultPos = 0; const ii = text.length; @@ -262,18 +258,15 @@ // Merge characters const removals = iNext - i - 1; if (removals > 0) { - let sum = 0; - const vs = sourceMapping.splice(sourceMappingStart + 1, removals); - for (const v of vs) { sum += v; } - sourceMapping[sourceMappingStart] += sum; + sourceMap.combine(sourceMapStart, removals); } - ++sourceMappingStart; + ++sourceMapStart; // Empty elements const additions = resultPosNext - resultPos - 1; for (let j = 0; j < additions; ++j) { - sourceMapping.splice(sourceMappingStart, 0, 0); - ++sourceMappingStart; + sourceMap.insert(sourceMapStart, 0); + ++sourceMapStart; } i = iNext; diff --git a/ext/bg/js/text-source-map.js b/ext/bg/js/text-source-map.js new file mode 100644 index 00000000..24970978 --- /dev/null +++ b/ext/bg/js/text-source-map.js @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +class TextSourceMap { + constructor(source, mapping=null) { + this._source = source; + this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null); + } + + get source() { + return this._source; + } + + equals(other) { + if (this === other) { + return true; + } + + const source = this._source; + if (!(other instanceof TextSourceMap && source === other._source)) { + return false; + } + + let mapping = this._mapping; + let otherMapping = other._mapping; + if (mapping === null) { + if (otherMapping === null) { + return true; + } + mapping = TextSourceMap._createMapping(source); + } else if (otherMapping === null) { + otherMapping = TextSourceMap._createMapping(source); + } + + const mappingLength = mapping.length; + if (mappingLength !== otherMapping.length) { + return false; + } + + for (let i = 0; i < mappingLength; ++i) { + if (mapping[i] !== otherMapping[i]) { + return false; + } + } + + return true; + } + + getSourceLength(finalLength) { + const mapping = this._mapping; + if (mapping === null) { + return finalLength; + } + + let sourceLength = 0; + for (let i = 0; i < finalLength; ++i) { + sourceLength += mapping[i]; + } + return sourceLength; + } + + combine(index, count) { + if (count <= 0) { return; } + + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + let sum = this._mapping[index]; + const parts = this._mapping.splice(index + 1, count); + for (const part of parts) { + sum += part; + } + this._mapping[index] = sum; + } + + insert(index, ...items) { + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + this._mapping.splice(index, 0, ...items); + } + + static _createMapping(text) { + return new Array(text.length).fill(1); + } + + static _normalizeMapping(mapping) { + const result = []; + for (const value of mapping) { + result.push( + (typeof value === 'number' && Number.isFinite(value)) ? + Math.floor(value) : + 0 + ); + } + return result; + } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 6f43f7b0..584da02c 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -19,6 +19,7 @@ /* global * Database * Deinflector + * TextSourceMap * dictEnabledSet * dictTagBuildSource * dictTagSanitize @@ -367,17 +368,15 @@ class Translator { const used = new Set(); for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { let text2 = text; - let sourceMapping = null; + const sourceMap = new TextSourceMap(text2); if (halfWidth) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping); + text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap); } if (numeric) { text2 = jp.convertNumericToFullWidth(text2); } if (alphabetic) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jp.convertAlphabeticToKana(text2, sourceMapping); + text2 = jp.convertAlphabeticToKana(text2, sourceMap); } if (katakana) { text2 = jp.convertHiraganaToKatakana(text2); @@ -391,7 +390,7 @@ class Translator { if (used.has(text2Substring)) { break; } used.add(text2Substring); for (const deinflection of this.deinflector.deinflect(text2Substring)) { - deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping); + deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); deinflections.push(deinflection); } } @@ -407,25 +406,6 @@ class Translator { } } - static getDeinflectionRawSource(source, length, sourceMapping) { - if (sourceMapping === null) { - return source.substring(0, length); - } - - let result = ''; - let index = 0; - for (let i = 0; i < length; ++i) { - const c = sourceMapping[i]; - result += source.substring(index, index + c); - index += c; - } - return result; - } - - static createTextSourceMapping(text) { - return new Array(text.length).fill(1); - } - async findKanji(text, options) { const dictionaries = dictEnabledSet(options); const kanjiUnique = new Set(); diff --git a/test/test-japanese.js b/test/test-japanese.js index c5d220e7..a16a73b7 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -23,9 +23,11 @@ const vm = new VM(); vm.execute([ 'mixed/lib/wanakana.min.js', 'mixed/js/japanese.js', + 'bg/js/text-source-map.js', 'bg/js/japanese.js' ]); const jp = vm.get('jp'); +const TextSourceMap = vm.get('TextSourceMap'); function testIsCodePointKanji() { @@ -262,13 +264,13 @@ function testConvertHalfWidthKanaToFullWidth() { ]; for (const [string, expected, expectedSourceMapping] of data) { - const sourceMapping = new Array(string.length).fill(1); + const sourceMap = new TextSourceMap(string); const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null); - const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping); + const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMap); assert.strictEqual(actual1, expected); assert.strictEqual(actual2, expected); - if (Array.isArray(expectedSourceMapping)) { - vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + if (typeof expectedSourceMapping !== 'undefined') { + assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping))); } } } @@ -285,13 +287,13 @@ function testConvertAlphabeticToKana() { ]; for (const [string, expected, expectedSourceMapping] of data) { - const sourceMapping = new Array(string.length).fill(1); + const sourceMap = new TextSourceMap(string); const actual1 = jp.convertAlphabeticToKana(string, null); - const actual2 = jp.convertAlphabeticToKana(string, sourceMapping); + const actual2 = jp.convertAlphabeticToKana(string, sourceMap); assert.strictEqual(actual1, expected); assert.strictEqual(actual2, expected); - if (Array.isArray(expectedSourceMapping)) { - vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + if (typeof expectedSourceMapping !== 'undefined') { + assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping))); } } } -- cgit v1.2.3 From 7225201fb6776664d7a820e45e85c3500e83c80f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 28 Mar 2020 18:24:51 -0400 Subject: Add tests --- package.json | 2 +- test/test-text-source-map.js | 234 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 test/test-text-source-map.js (limited to 'test') diff --git a/package.json b/package.json index 8ae103a0..b02ec179 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "scripts": { "test": "npm run test-lint && npm run test-code", "test-lint": "eslint . && node ./test/lint/global-declarations.js", - "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js" + "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js && node ./test/test-text-source-map.js" }, "repository": { "type": "git", diff --git a/test/test-text-source-map.js b/test/test-text-source-map.js new file mode 100644 index 00000000..25bd8fc2 --- /dev/null +++ b/test/test-text-source-map.js @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const assert = require('assert'); +const {VM} = require('./yomichan-vm'); + +const vm = new VM(); +vm.execute(['bg/js/text-source-map.js']); +const TextSourceMap = vm.get('TextSourceMap'); + + +function testSource() { + const data = [ + ['source1'], + ['source2'], + ['source3'] + ]; + + for (const [source] of data) { + const sourceMap = new TextSourceMap(source); + assert.strictEqual(source, sourceMap.source); + } +} + +function testEquals() { + const data = [ + [['source1', null], ['source1', null], true], + [['source2', null], ['source2', null], true], + [['source3', null], ['source3', null], true], + + [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', null], true], + [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', null], true], + [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', null], true], + + [['source1', null], ['source1', [1, 1, 1, 1, 1, 1, 1]], true], + [['source2', null], ['source2', [1, 1, 1, 1, 1, 1, 1]], true], + [['source3', null], ['source3', [1, 1, 1, 1, 1, 1, 1]], true], + + [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', [1, 1, 1, 1, 1, 1, 1]], true], + [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', [1, 1, 1, 1, 1, 1, 1]], true], + [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', [1, 1, 1, 1, 1, 1, 1]], true], + + [['source1', [1, 2, 1, 3]], ['source1', [1, 2, 1, 3]], true], + [['source2', [1, 2, 1, 3]], ['source2', [1, 2, 1, 3]], true], + [['source3', [1, 2, 1, 3]], ['source3', [1, 2, 1, 3]], true], + + [['source1', [1, 3, 1, 2]], ['source1', [1, 2, 1, 3]], false], + [['source2', [1, 3, 1, 2]], ['source2', [1, 2, 1, 3]], false], + [['source3', [1, 3, 1, 2]], ['source3', [1, 2, 1, 3]], false], + + [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source4', [1, 1, 1, 1, 1, 1, 1]], false], + [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source5', [1, 1, 1, 1, 1, 1, 1]], false], + [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source6', [1, 1, 1, 1, 1, 1, 1]], false] + ]; + + for (const [[source1, mapping1], [source2, mapping2], expectedEquals] of data) { + const sourceMap1 = new TextSourceMap(source1, mapping1); + const sourceMap2 = new TextSourceMap(source2, mapping2); + assert.ok(sourceMap1.equals(sourceMap1)); + assert.ok(sourceMap2.equals(sourceMap2)); + assert.strictEqual(sourceMap1.equals(sourceMap2), expectedEquals); + } +} + +function testGetSourceLength() { + const data = [ + [['source', [1, 1, 1, 1, 1, 1]], 1, 1], + [['source', [1, 1, 1, 1, 1, 1]], 2, 2], + [['source', [1, 1, 1, 1, 1, 1]], 3, 3], + [['source', [1, 1, 1, 1, 1, 1]], 4, 4], + [['source', [1, 1, 1, 1, 1, 1]], 5, 5], + [['source', [1, 1, 1, 1, 1, 1]], 6, 6], + + [['source', [2, 2, 2]], 1, 2], + [['source', [2, 2, 2]], 2, 4], + [['source', [2, 2, 2]], 3, 6], + + [['source', [3, 3]], 1, 3], + [['source', [3, 3]], 2, 6], + + [['source', [6, 6]], 1, 6] + ]; + + for (const [[source, mapping], finalLength, expectedValue] of data) { + const sourceMap = new TextSourceMap(source, mapping); + assert.strictEqual(sourceMap.getSourceLength(finalLength), expectedValue); + } +} + +function testCombineInsert() { + const data = [ + // No operations + [ + ['source', null], + ['source', [1, 1, 1, 1, 1, 1]], + [] + ], + + // Combine + [ + ['source', null], + ['source', [3, 1, 1, 1]], + [ + ['combine', 0, 2] + ] + ], + [ + ['source', null], + ['source', [1, 1, 1, 3]], + [ + ['combine', 3, 2] + ] + ], + [ + ['source', null], + ['source', [3, 3]], + [ + ['combine', 0, 2], + ['combine', 1, 2] + ] + ], + [ + ['source', null], + ['source', [3, 3]], + [ + ['combine', 3, 2], + ['combine', 0, 2] + ] + ], + + // Insert + [ + ['source', null], + ['source', [0, 1, 1, 1, 1, 1, 1]], + [ + ['insert', 0, 0] + ] + ], + [ + ['source', null], + ['source', [1, 1, 1, 1, 1, 1, 0]], + [ + ['insert', 6, 0] + ] + ], + [ + ['source', null], + ['source', [0, 1, 1, 1, 1, 1, 1, 0]], + [ + ['insert', 0, 0], + ['insert', 7, 0] + ] + ], + [ + ['source', null], + ['source', [0, 1, 1, 1, 1, 1, 1, 0]], + [ + ['insert', 6, 0], + ['insert', 0, 0] + ] + ], + + // Mixed + [ + ['source', null], + ['source', [3, 0, 3]], + [ + ['combine', 0, 2], + ['insert', 1, 0], + ['combine', 2, 2] + ] + ], + [ + ['source', null], + ['source', [3, 0, 3]], + [ + ['combine', 0, 2], + ['combine', 1, 2], + ['insert', 1, 0] + ] + ], + [ + ['source', null], + ['source', [3, 0, 3]], + [ + ['insert', 3, 0], + ['combine', 0, 2], + ['combine', 2, 2] + ] + ] + ]; + + for (const [[source, mapping], [expectedSource, expectedMapping], operations] of data) { + const sourceMap = new TextSourceMap(source, mapping); + const expectedSourceMap = new TextSourceMap(expectedSource, expectedMapping); + for (const [operation, ...args] of operations) { + switch (operation) { + case 'combine': + sourceMap.combine(...args); + break; + case 'insert': + sourceMap.insert(...args); + break; + } + } + assert.ok(sourceMap.equals(expectedSourceMap)); + } +} + + +function main() { + testSource(); + testEquals(); + testGetSourceLength(); + testCombineInsert(); +} + + +if (require.main === module) { main(); } -- cgit v1.2.3 From 9052ab8ebd5af505f1992bfc001b226202e2f393 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Mon, 30 Mar 2020 20:51:20 -0400 Subject: Move dictionary import functionality into a new class --- ext/bg/background.html | 1 + ext/bg/js/backend.js | 4 +- ext/bg/js/database.js | 231 ---------------------------------- ext/bg/js/dictionary-importer.js | 266 +++++++++++++++++++++++++++++++++++++++ test/test-database.js | 16 ++- 5 files changed, 281 insertions(+), 237 deletions(-) create mode 100644 ext/bg/js/dictionary-importer.js (limited to 'test') diff --git a/ext/bg/background.html b/ext/bg/background.html index f7cf6e55..62802341 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -30,6 +30,7 @@ + diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index 3ef7c62c..1e8c979f 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -25,6 +25,7 @@ * BackendApiForwarder * ClipboardMonitor * Database + * DictionaryImporter * JsonSchema * Mecab * Translator @@ -45,6 +46,7 @@ class Backend { constructor() { this.database = new Database(); + this.dictionaryImporter = new DictionaryImporter(); this.translator = new Translator(this.database); this.anki = new AnkiNull(); this.mecab = new Mecab(); @@ -300,7 +302,7 @@ class Backend { } async importDictionary(archiveSource, onProgress, details) { - return await this.translator.database.importDictionary(archiveSource, onProgress, details); + return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details); } // Message handlers diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 269ad57e..7a4d094b 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -366,172 +366,6 @@ class Database { }); } - async importDictionary(archiveSource, onProgress, details) { - this._validate(); - const hasOnProgress = (typeof onProgress === 'function'); - - // Read archive - const archive = await JSZip.loadAsync(archiveSource); - - // Read and validate index - const indexFileName = 'index.json'; - const indexFile = archive.files[indexFileName]; - if (!indexFile) { - throw new Error('No dictionary index found in archive'); - } - - const index = JSON.parse(await indexFile.async('string')); - - const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); - Database._validateJsonSchema(index, indexSchema, indexFileName); - - const dictionaryTitle = index.title; - const version = index.format || index.version; - - if (!dictionaryTitle || !index.revision) { - throw new Error('Unrecognized dictionary format'); - } - - // Verify database is not already imported - if (await this.dictionaryExists(dictionaryTitle)) { - throw new Error('Dictionary is already imported'); - } - - // Data format converters - const convertTermBankEntry = (entry) => { - if (version === 1) { - const [expression, reading, definitionTags, rules, score, ...glossary] = entry; - return {expression, reading, definitionTags, rules, score, glossary}; - } else { - const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; - return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; - } - }; - - const convertTermMetaBankEntry = (entry) => { - const [expression, mode, data] = entry; - return {expression, mode, data}; - }; - - const convertKanjiBankEntry = (entry) => { - if (version === 1) { - const [character, onyomi, kunyomi, tags, ...meanings] = entry; - return {character, onyomi, kunyomi, tags, meanings}; - } else { - const [character, onyomi, kunyomi, tags, meanings, stats] = entry; - return {character, onyomi, kunyomi, tags, meanings, stats}; - } - }; - - const convertKanjiMetaBankEntry = (entry) => { - const [character, mode, data] = entry; - return {character, mode, data}; - }; - - const convertTagBankEntry = (entry) => { - const [name, category, order, notes, score] = entry; - return {name, category, order, notes, score}; - }; - - // Archive file reading - const readFileSequence = async (fileNameFormat, convertEntry, schema) => { - const results = []; - for (let i = 1; true; ++i) { - const fileName = fileNameFormat.replace(/\?/, `${i}`); - const file = archive.files[fileName]; - if (!file) { break; } - - const entries = JSON.parse(await file.async('string')); - Database._validateJsonSchema(entries, schema, fileName); - - for (let entry of entries) { - entry = convertEntry(entry); - entry.dictionary = dictionaryTitle; - results.push(entry); - } - } - return results; - }; - - // Load schemas - const dataBankSchemaPaths = this.constructor._getDataBankSchemaPaths(version); - const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); - - // Load data - const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]); - const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]); - const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]); - const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); - const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]); - - // Old tags - const indexTagMeta = index.tagMeta; - if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { - for (const name of Object.keys(indexTagMeta)) { - const {category, order, notes, score} = indexTagMeta[name]; - tagList.push({name, category, order, notes, score}); - } - } - - // Prefix wildcard support - const prefixWildcardsSupported = !!details.prefixWildcardsSupported; - if (prefixWildcardsSupported) { - for (const entry of termList) { - entry.expressionReverse = stringReverse(entry.expression); - entry.readingReverse = stringReverse(entry.reading); - } - } - - // Add dictionary - const summary = { - title: dictionaryTitle, - revision: index.revision, - sequenced: index.sequenced, - version, - prefixWildcardsSupported - }; - - await this.bulkAdd('dictionaries', [summary], 0, 1); - - // Add data - const errors = []; - const total = ( - termList.length + - termMetaList.length + - kanjiList.length + - kanjiMetaList.length + - tagList.length - ); - let loadedCount = 0; - const maxTransactionLength = 1000; - - const bulkAdd = async (objectStoreName, entries) => { - const ii = entries.length; - for (let i = 0; i < ii; i += maxTransactionLength) { - const count = Math.min(maxTransactionLength, ii - i); - - try { - await this.bulkAdd(objectStoreName, entries, i, count); - } catch (e) { - errors.push(e); - } - - loadedCount += count; - if (hasOnProgress) { - onProgress(total, loadedCount); - } - } - }; - - await bulkAdd('terms', termList); - await bulkAdd('termMeta', termMetaList); - await bulkAdd('kanji', kanjiList); - await bulkAdd('kanjiMeta', kanjiMetaList); - await bulkAdd('tagMeta', tagList); - - return {result: summary, errors}; - } - // Private _validate() { @@ -540,71 +374,6 @@ class Database { } } - async _getSchema(fileName) { - let schemaPromise = this._schemas.get(fileName); - if (typeof schemaPromise !== 'undefined') { - return schemaPromise; - } - - schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); - this._schemas.set(fileName, schemaPromise); - return schemaPromise; - } - - static _validateJsonSchema(value, schema, fileName) { - try { - JsonSchema.validate(value, schema); - } catch (e) { - throw Database._formatSchemaError(e, fileName); - } - } - - static _formatSchemaError(e, fileName) { - const valuePathString = Database._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); - const schemaPathString = Database._getSchemaErrorPathString(e.info.schemaPath, 'schema'); - - const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); - e2.data = e; - - return e2; - } - - static _getSchemaErrorPathString(infoList, base='') { - let result = base; - for (const [part] of infoList) { - switch (typeof part) { - case 'string': - if (result.length > 0) { - result += '.'; - } - result += part; - break; - case 'number': - result += `[${part}]`; - break; - } - } - return result; - } - - static _getDataBankSchemaPaths(version) { - const termBank = ( - version === 1 ? - '/bg/data/dictionary-term-bank-v1-schema.json' : - '/bg/data/dictionary-term-bank-v3-schema.json' - ); - const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; - const kanjiBank = ( - version === 1 ? - '/bg/data/dictionary-kanji-bank-v1-schema.json' : - '/bg/data/dictionary-kanji-bank-v3-schema.json' - ); - const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; - const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; - - return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; - } - async _findGenericBulk(tableName, indexName, indexValueList, dictionaries, createResult) { this._validate(); diff --git a/ext/bg/js/dictionary-importer.js b/ext/bg/js/dictionary-importer.js new file mode 100644 index 00000000..589e7656 --- /dev/null +++ b/ext/bg/js/dictionary-importer.js @@ -0,0 +1,266 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* global + * JSZip + * JsonSchema + * requestJson + */ + +class DictionaryImporter { + constructor() { + this._schemas = new Map(); + } + + async import(database, archiveSource, onProgress, details) { + if (!database) { + throw new Error('Invalid database'); + } + if (!database.isPrepared()) { + throw new Error('Database is not ready'); + } + + const hasOnProgress = (typeof onProgress === 'function'); + + // Read archive + const archive = await JSZip.loadAsync(archiveSource); + + // Read and validate index + const indexFileName = 'index.json'; + const indexFile = archive.files[indexFileName]; + if (!indexFile) { + throw new Error('No dictionary index found in archive'); + } + + const index = JSON.parse(await indexFile.async('string')); + + const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); + this._validateJsonSchema(index, indexSchema, indexFileName); + + const dictionaryTitle = index.title; + const version = index.format || index.version; + + if (!dictionaryTitle || !index.revision) { + throw new Error('Unrecognized dictionary format'); + } + + // Verify database is not already imported + if (await database.dictionaryExists(dictionaryTitle)) { + throw new Error('Dictionary is already imported'); + } + + // Data format converters + const convertTermBankEntry = (entry) => { + if (version === 1) { + const [expression, reading, definitionTags, rules, score, ...glossary] = entry; + return {expression, reading, definitionTags, rules, score, glossary}; + } else { + const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; + return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; + } + }; + + const convertTermMetaBankEntry = (entry) => { + const [expression, mode, data] = entry; + return {expression, mode, data}; + }; + + const convertKanjiBankEntry = (entry) => { + if (version === 1) { + const [character, onyomi, kunyomi, tags, ...meanings] = entry; + return {character, onyomi, kunyomi, tags, meanings}; + } else { + const [character, onyomi, kunyomi, tags, meanings, stats] = entry; + return {character, onyomi, kunyomi, tags, meanings, stats}; + } + }; + + const convertKanjiMetaBankEntry = (entry) => { + const [character, mode, data] = entry; + return {character, mode, data}; + }; + + const convertTagBankEntry = (entry) => { + const [name, category, order, notes, score] = entry; + return {name, category, order, notes, score}; + }; + + // Archive file reading + const readFileSequence = async (fileNameFormat, convertEntry, schema) => { + const results = []; + for (let i = 1; true; ++i) { + const fileName = fileNameFormat.replace(/\?/, `${i}`); + const file = archive.files[fileName]; + if (!file) { break; } + + const entries = JSON.parse(await file.async('string')); + this._validateJsonSchema(entries, schema, fileName); + + for (let entry of entries) { + entry = convertEntry(entry); + entry.dictionary = dictionaryTitle; + results.push(entry); + } + } + return results; + }; + + // Load schemas + const dataBankSchemaPaths = this._getDataBankSchemaPaths(version); + const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); + + // Load data + const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]); + const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]); + const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]); + const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); + const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]); + + // Old tags + const indexTagMeta = index.tagMeta; + if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { + for (const name of Object.keys(indexTagMeta)) { + const {category, order, notes, score} = indexTagMeta[name]; + tagList.push({name, category, order, notes, score}); + } + } + + // Prefix wildcard support + const prefixWildcardsSupported = !!details.prefixWildcardsSupported; + if (prefixWildcardsSupported) { + for (const entry of termList) { + entry.expressionReverse = stringReverse(entry.expression); + entry.readingReverse = stringReverse(entry.reading); + } + } + + // Add dictionary + const summary = { + title: dictionaryTitle, + revision: index.revision, + sequenced: index.sequenced, + version, + prefixWildcardsSupported + }; + + database.bulkAdd('dictionaries', [summary], 0, 1); + + // Add data + const errors = []; + const total = ( + termList.length + + termMetaList.length + + kanjiList.length + + kanjiMetaList.length + + tagList.length + ); + let loadedCount = 0; + const maxTransactionLength = 1000; + + const bulkAdd = async (objectStoreName, entries) => { + const ii = entries.length; + for (let i = 0; i < ii; i += maxTransactionLength) { + const count = Math.min(maxTransactionLength, ii - i); + + try { + await database.bulkAdd(objectStoreName, entries, i, count); + } catch (e) { + errors.push(e); + } + + loadedCount += count; + if (hasOnProgress) { + onProgress(total, loadedCount); + } + } + }; + + await bulkAdd('terms', termList); + await bulkAdd('termMeta', termMetaList); + await bulkAdd('kanji', kanjiList); + await bulkAdd('kanjiMeta', kanjiMetaList); + await bulkAdd('tagMeta', tagList); + + return {result: summary, errors}; + } + + async _getSchema(fileName) { + let schemaPromise = this._schemas.get(fileName); + if (typeof schemaPromise !== 'undefined') { + return schemaPromise; + } + + schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); + this._schemas.set(fileName, schemaPromise); + return schemaPromise; + } + + _validateJsonSchema(value, schema, fileName) { + try { + JsonSchema.validate(value, schema); + } catch (e) { + throw this._formatSchemaError(e, fileName); + } + } + + _formatSchemaError(e, fileName) { + const valuePathString = this._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); + const schemaPathString = this._getSchemaErrorPathString(e.info.schemaPath, 'schema'); + + const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); + e2.data = e; + + return e2; + } + + _getSchemaErrorPathString(infoList, base='') { + let result = base; + for (const [part] of infoList) { + switch (typeof part) { + case 'string': + if (result.length > 0) { + result += '.'; + } + result += part; + break; + case 'number': + result += `[${part}]`; + break; + } + } + return result; + } + + _getDataBankSchemaPaths(version) { + const termBank = ( + version === 1 ? + '/bg/data/dictionary-term-bank-v1-schema.json' : + '/bg/data/dictionary-term-bank-v3-schema.json' + ); + const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; + const kanjiBank = ( + version === 1 ? + '/bg/data/dictionary-kanji-bank-v1-schema.json' : + '/bg/data/dictionary-kanji-bank-v3-schema.json' + ); + const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; + const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; + + return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; + } +} diff --git a/test/test-database.js b/test/test-database.js index 833aa75d..c3402b73 100644 --- a/test/test-database.js +++ b/test/test-database.js @@ -107,8 +107,10 @@ vm.execute([ 'bg/js/dictionary.js', 'mixed/js/core.js', 'bg/js/request.js', + 'bg/js/dictionary-importer.js', 'bg/js/database.js' ]); +const DictionaryImporter = vm.get('DictionaryImporter'); const Database = vm.get('Database'); @@ -196,6 +198,7 @@ async function testDatabase1() { ]; // Setup database + const dictionaryImporter = new DictionaryImporter(); const database = new Database(); await database.prepare(); @@ -210,7 +213,8 @@ async function testDatabase1() { // Import data let progressEvent = false; - const {result, errors} = await database.importDictionary( + const {result, errors} = await dictionaryImporter.import( + database, testDictionarySource, () => { progressEvent = true; @@ -847,6 +851,7 @@ async function testDatabase2() { ]); // Setup database + const dictionaryImporter = new DictionaryImporter(); const database = new Database(); // Error: not prepared @@ -862,17 +867,17 @@ async function testDatabase2() { await assert.rejects(async () => await database.findTagForTitle('tag', title)); await assert.rejects(async () => await database.getDictionaryInfo()); await assert.rejects(async () => await database.getDictionaryCounts(titles, true)); - await assert.rejects(async () => await database.importDictionary(testDictionarySource, () => {}, {})); + await assert.rejects(async () => await dictionaryImporter.import(database, testDictionarySource, () => {}, {})); await database.prepare(); // Error: already prepared await assert.rejects(async () => await database.prepare()); - await database.importDictionary(testDictionarySource, () => {}, {}); + await dictionaryImporter.import(database, testDictionarySource, () => {}, {}); // Error: dictionary already imported - await assert.rejects(async () => await database.importDictionary(testDictionarySource, () => {}, {})); + await assert.rejects(async () => await dictionaryImporter.import(database, testDictionarySource, () => {}, {})); await database.close(); } @@ -889,6 +894,7 @@ async function testDatabase3() { ]; // Setup database + const dictionaryImporter = new DictionaryImporter(); const database = new Database(); await database.prepare(); @@ -898,7 +904,7 @@ async function testDatabase3() { let error = null; try { - await database.importDictionary(testDictionarySource, () => {}, {}); + await dictionaryImporter.import(database, testDictionarySource, () => {}, {}); } catch (e) { error = e; } -- cgit v1.2.3 From ae246093a98ab1386c4422306928091f30a27fda Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 4 Apr 2020 16:56:06 -0400 Subject: Create some tests for fullscreen elements, + + +
+
<iframe> element inside of a shadow DOM.
+
+ + +
+ + + \ No newline at end of file diff --git a/test/data/html/test-stylesheet.css b/test/data/html/test-stylesheet.css index ab25732e..f63d2481 100644 --- a/test/data/html/test-stylesheet.css +++ b/test/data/html/test-stylesheet.css @@ -7,6 +7,7 @@ body { margin: 0 auto; background-color: #f8f8f8; counter-reset: test-id; + overflow-y: scroll; } h1 { @@ -14,6 +15,19 @@ h1 { margin: 0.67em 0; } +p { + margin: 0.33em 0; +} + +h1+p { + margin-top: -0.67em; +} + +a, a:visited { + color: #1080c0; + text-decoration: underline; +} + .test { background-color: #ffffff; margin: 1em 0; @@ -30,3 +44,8 @@ h1 { border-bottom: 1px solid #d8d8d8; font-weight: bold; } + +.description { + color: #444444; + font-style: italic; +} -- cgit v1.2.3 From a4c7d243f4cddfc4e2b29132fe65fe07ff86bc36 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 4 Apr 2020 20:21:39 -0400 Subject: Add allowfullscreen="true" --- test/data/html/test-document2.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/data/html/test-document2.html b/test/data/html/test-document2.html index 0561a176..3a22a5bf 100644 --- a/test/data/html/test-document2.html +++ b/test/data/html/test-document2.html @@ -57,14 +57,14 @@ document.querySelector('#fullscreen-link1').addEventListener('click', () => togg
<iframe> element.
- +
<iframe> element inside of a shadow DOM.