diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-04-05 12:52:07 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-05 12:52:07 -0400 |
commit | 3684a479c5e12efe63c54e5532a264d157a6816d (patch) | |
tree | bb1a3141c37836ab0f8a1cbff5ed781cc41eac39 /ext/bg/js | |
parent | 058f626efd6c5d6fae66346d487c10930d769971 (diff) | |
parent | 9052ab8ebd5af505f1992bfc001b226202e2f393 (diff) |
Merge pull request #424 from toasted-nutbread/dictionary-importer
Dictionary importer
Diffstat (limited to 'ext/bg/js')
-rw-r--r-- | ext/bg/js/backend.js | 11 | ||||
-rw-r--r-- | ext/bg/js/database.js | 295 | ||||
-rw-r--r-- | ext/bg/js/dictionary-importer.js | 266 | ||||
-rw-r--r-- | ext/bg/js/translator.js | 18 | ||||
-rw-r--r-- | ext/bg/js/util.js | 2 |
5 files changed, 314 insertions, 278 deletions
diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index b217e64d..1e8c979f 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -24,6 +24,8 @@ * AudioUriBuilder * BackendApiForwarder * ClipboardMonitor + * Database + * DictionaryImporter * JsonSchema * Mecab * Translator @@ -43,7 +45,9 @@ class Backend { constructor() { - this.translator = new Translator(); + this.database = new Database(); + this.dictionaryImporter = new DictionaryImporter(); + this.translator = new Translator(this.database); this.anki = new AnkiNull(); this.mecab = new Mecab(); this.clipboardMonitor = new ClipboardMonitor({getClipboard: this._onApiClipboardGet.bind(this)}); @@ -107,6 +111,7 @@ class Backend { } async prepare() { + await this.database.prepare(); await this.translator.prepare(); this.optionsSchema = await requestJson(chrome.runtime.getURL('/bg/data/options-schema.json'), 'GET'); @@ -296,6 +301,10 @@ class Backend { return true; } + async importDictionary(archiveSource, onProgress, details) { + return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details); + } + // Message handlers _onApiYomichanCoreReady(_params, sender) { diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 08a2a39f..7a4d094b 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -110,6 +110,10 @@ class Database { this.db = null; } + isPrepared() { + return this.db !== null; + } + async purge() { this._validate(); @@ -322,177 +326,44 @@ class Database { return result; } - async importDictionary(archiveSource, onProgress, details) { + async dictionaryExists(title) { this._validate(); - const db = this.db; - const hasOnProgress = (typeof onProgress === 'function'); - - // Read archive - const archive = await JSZip.loadAsync(archiveSource); - - // Read and validate index - const indexFileName = 'index.json'; - const indexFile = archive.files[indexFileName]; - if (!indexFile) { - throw new Error('No dictionary index found in archive'); - } - - const index = JSON.parse(await indexFile.async('string')); - - const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); - Database._validateJsonSchema(index, indexSchema, indexFileName); - - const dictionaryTitle = index.title; - const version = index.format || index.version; - - if (!dictionaryTitle || !index.revision) { - throw new Error('Unrecognized dictionary format'); - } - - // Verify database is not already imported - if (await this._dictionaryExists(dictionaryTitle)) { - throw new Error('Dictionary is already imported'); - } - - // Data format converters - const convertTermBankEntry = (entry) => { - if (version === 1) { - const [expression, reading, definitionTags, rules, score, ...glossary] = entry; - return {expression, reading, definitionTags, rules, score, glossary}; - } else { - const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; - return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; - } - }; - - const convertTermMetaBankEntry = (entry) => { - const [expression, mode, data] = entry; - return {expression, mode, data}; - }; - - const convertKanjiBankEntry = (entry) => { - if (version === 1) { - const [character, onyomi, kunyomi, tags, ...meanings] = entry; - return {character, onyomi, kunyomi, tags, meanings}; - } else { - const [character, onyomi, kunyomi, tags, meanings, stats] = entry; - return {character, onyomi, kunyomi, tags, meanings, stats}; - } - }; - - const convertKanjiMetaBankEntry = (entry) => { - const [character, mode, data] = entry; - return {character, mode, data}; - }; - - const convertTagBankEntry = (entry) => { - const [name, category, order, notes, score] = entry; - return {name, category, order, notes, score}; - }; + const transaction = this.db.transaction(['dictionaries'], 'readonly'); + const index = transaction.objectStore('dictionaries').index('title'); + const query = IDBKeyRange.only(title); + const count = await Database._getCount(index, query); + return count > 0; + } - // Archive file reading - const readFileSequence = async (fileNameFormat, convertEntry, schema) => { - const results = []; - for (let i = 1; true; ++i) { - const fileName = fileNameFormat.replace(/\?/, `${i}`); - const file = archive.files[fileName]; - if (!file) { break; } - - const entries = JSON.parse(await file.async('string')); - Database._validateJsonSchema(entries, schema, fileName); - - for (let entry of entries) { - entry = convertEntry(entry); - entry.dictionary = dictionaryTitle; - results.push(entry); - } - } - return results; - }; + bulkAdd(objectStoreName, items, start, count) { + return new Promise((resolve, reject) => { + const transaction = this.db.transaction([objectStoreName], 'readwrite'); + const objectStore = transaction.objectStore(objectStoreName); - // Load schemas - const dataBankSchemaPaths = this.constructor._getDataBankSchemaPaths(version); - const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); - - // Load data - const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]); - const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]); - const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]); - const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); - const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]); - - // Old tags - const indexTagMeta = index.tagMeta; - if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { - for (const name of Object.keys(indexTagMeta)) { - const {category, order, notes, score} = indexTagMeta[name]; - tagList.push({name, category, order, notes, score}); + if (start + count > items.length) { + count = items.length - start; } - } - // Prefix wildcard support - const prefixWildcardsSupported = !!details.prefixWildcardsSupported; - if (prefixWildcardsSupported) { - for (const entry of termList) { - entry.expressionReverse = stringReverse(entry.expression); - entry.readingReverse = stringReverse(entry.reading); + if (count <= 0) { + resolve(); + return; } - } - - // Add dictionary - const summary = { - title: dictionaryTitle, - revision: index.revision, - sequenced: index.sequenced, - version, - prefixWildcardsSupported - }; - - { - const transaction = db.transaction(['dictionaries'], 'readwrite'); - const objectStore = transaction.objectStore('dictionaries'); - await Database._bulkAdd(objectStore, [summary], 0, 1); - } - - // Add data - const errors = []; - const total = ( - termList.length + - termMetaList.length + - kanjiList.length + - kanjiMetaList.length + - tagList.length - ); - let loadedCount = 0; - const maxTransactionLength = 1000; - - const bulkAdd = async (objectStoreName, entries) => { - const ii = entries.length; - for (let i = 0; i < ii; i += maxTransactionLength) { - const count = Math.min(maxTransactionLength, ii - i); - try { - const transaction = db.transaction([objectStoreName], 'readwrite'); - const objectStore = transaction.objectStore(objectStoreName); - await Database._bulkAdd(objectStore, entries, i, count); - } catch (e) { - errors.push(e); + const end = start + count; + let completedCount = 0; + const onError = (e) => reject(e); + const onSuccess = () => { + if (++completedCount >= count) { + resolve(); } + }; - loadedCount += count; - if (hasOnProgress) { - onProgress(total, loadedCount); - } + for (let i = start; i < end; ++i) { + const request = objectStore.add(items[i]); + request.onerror = onError; + request.onsuccess = onSuccess; } - }; - - await bulkAdd('terms', termList); - await bulkAdd('termMeta', termMetaList); - await bulkAdd('kanji', kanjiList); - await bulkAdd('kanjiMeta', kanjiMetaList); - await bulkAdd('tagMeta', tagList); - - return {result: summary, errors}; + }); } // Private @@ -503,80 +374,6 @@ class Database { } } - async _getSchema(fileName) { - let schemaPromise = this._schemas.get(fileName); - if (typeof schemaPromise !== 'undefined') { - return schemaPromise; - } - - schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); - this._schemas.set(fileName, schemaPromise); - return schemaPromise; - } - - static _validateJsonSchema(value, schema, fileName) { - try { - JsonSchema.validate(value, schema); - } catch (e) { - throw Database._formatSchemaError(e, fileName); - } - } - - static _formatSchemaError(e, fileName) { - const valuePathString = Database._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); - const schemaPathString = Database._getSchemaErrorPathString(e.info.schemaPath, 'schema'); - - const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); - e2.data = e; - - return e2; - } - - static _getSchemaErrorPathString(infoList, base='') { - let result = base; - for (const [part] of infoList) { - switch (typeof part) { - case 'string': - if (result.length > 0) { - result += '.'; - } - result += part; - break; - case 'number': - result += `[${part}]`; - break; - } - } - return result; - } - - static _getDataBankSchemaPaths(version) { - const termBank = ( - version === 1 ? - '/bg/data/dictionary-term-bank-v1-schema.json' : - '/bg/data/dictionary-term-bank-v3-schema.json' - ); - const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; - const kanjiBank = ( - version === 1 ? - '/bg/data/dictionary-kanji-bank-v1-schema.json' : - '/bg/data/dictionary-kanji-bank-v3-schema.json' - ); - const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; - const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; - - return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; - } - - async _dictionaryExists(title) { - const db = this.db; - const dbCountTransaction = db.transaction(['dictionaries'], 'readonly'); - const dbIndex = dbCountTransaction.objectStore('dictionaries').index('title'); - const only = IDBKeyRange.only(title); - const count = await Database._getCount(dbIndex, only); - return count > 0; - } - async _findGenericBulk(tableName, indexName, indexValueList, dictionaries, createResult) { this._validate(); @@ -760,34 +557,6 @@ class Database { }); } - static _bulkAdd(objectStore, items, start, count) { - return new Promise((resolve, reject) => { - if (start + count > items.length) { - count = items.length - start; - } - - if (count <= 0) { - resolve(); - return; - } - - const end = start + count; - let completedCount = 0; - const onError = (e) => reject(e); - const onSuccess = () => { - if (++completedCount >= count) { - resolve(); - } - }; - - for (let i = start; i < end; ++i) { - const request = objectStore.add(items[i]); - request.onerror = onError; - request.onsuccess = onSuccess; - } - }); - } - static _open(name, version, onUpgradeNeeded) { return new Promise((resolve, reject) => { const request = window.indexedDB.open(name, version * 10); diff --git a/ext/bg/js/dictionary-importer.js b/ext/bg/js/dictionary-importer.js new file mode 100644 index 00000000..589e7656 --- /dev/null +++ b/ext/bg/js/dictionary-importer.js @@ -0,0 +1,266 @@ +/* + * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/* global + * JSZip + * JsonSchema + * requestJson + */ + +class DictionaryImporter { + constructor() { + this._schemas = new Map(); + } + + async import(database, archiveSource, onProgress, details) { + if (!database) { + throw new Error('Invalid database'); + } + if (!database.isPrepared()) { + throw new Error('Database is not ready'); + } + + const hasOnProgress = (typeof onProgress === 'function'); + + // Read archive + const archive = await JSZip.loadAsync(archiveSource); + + // Read and validate index + const indexFileName = 'index.json'; + const indexFile = archive.files[indexFileName]; + if (!indexFile) { + throw new Error('No dictionary index found in archive'); + } + + const index = JSON.parse(await indexFile.async('string')); + + const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); + this._validateJsonSchema(index, indexSchema, indexFileName); + + const dictionaryTitle = index.title; + const version = index.format || index.version; + + if (!dictionaryTitle || !index.revision) { + throw new Error('Unrecognized dictionary format'); + } + + // Verify database is not already imported + if (await database.dictionaryExists(dictionaryTitle)) { + throw new Error('Dictionary is already imported'); + } + + // Data format converters + const convertTermBankEntry = (entry) => { + if (version === 1) { + const [expression, reading, definitionTags, rules, score, ...glossary] = entry; + return {expression, reading, definitionTags, rules, score, glossary}; + } else { + const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; + return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; + } + }; + + const convertTermMetaBankEntry = (entry) => { + const [expression, mode, data] = entry; + return {expression, mode, data}; + }; + + const convertKanjiBankEntry = (entry) => { + if (version === 1) { + const [character, onyomi, kunyomi, tags, ...meanings] = entry; + return {character, onyomi, kunyomi, tags, meanings}; + } else { + const [character, onyomi, kunyomi, tags, meanings, stats] = entry; + return {character, onyomi, kunyomi, tags, meanings, stats}; + } + }; + + const convertKanjiMetaBankEntry = (entry) => { + const [character, mode, data] = entry; + return {character, mode, data}; + }; + + const convertTagBankEntry = (entry) => { + const [name, category, order, notes, score] = entry; + return {name, category, order, notes, score}; + }; + + // Archive file reading + const readFileSequence = async (fileNameFormat, convertEntry, schema) => { + const results = []; + for (let i = 1; true; ++i) { + const fileName = fileNameFormat.replace(/\?/, `${i}`); + const file = archive.files[fileName]; + if (!file) { break; } + + const entries = JSON.parse(await file.async('string')); + this._validateJsonSchema(entries, schema, fileName); + + for (let entry of entries) { + entry = convertEntry(entry); + entry.dictionary = dictionaryTitle; + results.push(entry); + } + } + return results; + }; + + // Load schemas + const dataBankSchemaPaths = this._getDataBankSchemaPaths(version); + const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); + + // Load data + const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]); + const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]); + const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]); + const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); + const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]); + + // Old tags + const indexTagMeta = index.tagMeta; + if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { + for (const name of Object.keys(indexTagMeta)) { + const {category, order, notes, score} = indexTagMeta[name]; + tagList.push({name, category, order, notes, score}); + } + } + + // Prefix wildcard support + const prefixWildcardsSupported = !!details.prefixWildcardsSupported; + if (prefixWildcardsSupported) { + for (const entry of termList) { + entry.expressionReverse = stringReverse(entry.expression); + entry.readingReverse = stringReverse(entry.reading); + } + } + + // Add dictionary + const summary = { + title: dictionaryTitle, + revision: index.revision, + sequenced: index.sequenced, + version, + prefixWildcardsSupported + }; + + database.bulkAdd('dictionaries', [summary], 0, 1); + + // Add data + const errors = []; + const total = ( + termList.length + + termMetaList.length + + kanjiList.length + + kanjiMetaList.length + + tagList.length + ); + let loadedCount = 0; + const maxTransactionLength = 1000; + + const bulkAdd = async (objectStoreName, entries) => { + const ii = entries.length; + for (let i = 0; i < ii; i += maxTransactionLength) { + const count = Math.min(maxTransactionLength, ii - i); + + try { + await database.bulkAdd(objectStoreName, entries, i, count); + } catch (e) { + errors.push(e); + } + + loadedCount += count; + if (hasOnProgress) { + onProgress(total, loadedCount); + } + } + }; + + await bulkAdd('terms', termList); + await bulkAdd('termMeta', termMetaList); + await bulkAdd('kanji', kanjiList); + await bulkAdd('kanjiMeta', kanjiMetaList); + await bulkAdd('tagMeta', tagList); + + return {result: summary, errors}; + } + + async _getSchema(fileName) { + let schemaPromise = this._schemas.get(fileName); + if (typeof schemaPromise !== 'undefined') { + return schemaPromise; + } + + schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); + this._schemas.set(fileName, schemaPromise); + return schemaPromise; + } + + _validateJsonSchema(value, schema, fileName) { + try { + JsonSchema.validate(value, schema); + } catch (e) { + throw this._formatSchemaError(e, fileName); + } + } + + _formatSchemaError(e, fileName) { + const valuePathString = this._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); + const schemaPathString = this._getSchemaErrorPathString(e.info.schemaPath, 'schema'); + + const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); + e2.data = e; + + return e2; + } + + _getSchemaErrorPathString(infoList, base='') { + let result = base; + for (const [part] of infoList) { + switch (typeof part) { + case 'string': + if (result.length > 0) { + result += '.'; + } + result += part; + break; + case 'number': + result += `[${part}]`; + break; + } + } + return result; + } + + _getDataBankSchemaPaths(version) { + const termBank = ( + version === 1 ? + '/bg/data/dictionary-term-bank-v1-schema.json' : + '/bg/data/dictionary-term-bank-v3-schema.json' + ); + const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; + const kanjiBank = ( + version === 1 ? + '/bg/data/dictionary-kanji-bank-v1-schema.json' : + '/bg/data/dictionary-kanji-bank-v3-schema.json' + ); + const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; + const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; + + return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; + } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index cd991efa..27f91c05 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -17,7 +17,6 @@ */ /* global - * Database * Deinflector * TextSourceMap * dictEnabledSet @@ -35,23 +34,16 @@ */ class Translator { - constructor() { - this.database = null; + constructor(database) { + this.database = database; this.deinflector = null; this.tagCache = new Map(); } async prepare() { - if (!this.database) { - this.database = new Database(); - await this.database.prepare(); - } - - if (!this.deinflector) { - const url = chrome.runtime.getURL('/bg/lang/deinflect.json'); - const reasons = await requestJson(url, 'GET'); - this.deinflector = new Deinflector(reasons); - } + const url = chrome.runtime.getURL('/bg/lang/deinflect.json'); + const reasons = await requestJson(url, 'GET'); + this.deinflector = new Deinflector(reasons); } async purgeDatabase() { diff --git a/ext/bg/js/util.js b/ext/bg/js/util.js index 79c6af06..a7ed4a34 100644 --- a/ext/bg/js/util.js +++ b/ext/bg/js/util.js @@ -118,7 +118,7 @@ async function utilDatabaseDeleteDictionary(dictionaryName, onProgress) { async function utilDatabaseImport(data, onProgress, details) { data = await utilReadFile(data); - return utilIsolate(await utilBackend().translator.database.importDictionary( + return utilIsolate(await utilBackend().importDictionary( utilBackgroundIsolate(data), utilBackgroundFunctionIsolate(onProgress), utilBackgroundIsolate(details) |