diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-04-05 12:52:07 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-05 12:52:07 -0400 | 
| commit | 3684a479c5e12efe63c54e5532a264d157a6816d (patch) | |
| tree | bb1a3141c37836ab0f8a1cbff5ed781cc41eac39 /ext | |
| parent | 058f626efd6c5d6fae66346d487c10930d769971 (diff) | |
| parent | 9052ab8ebd5af505f1992bfc001b226202e2f393 (diff) | |
Merge pull request #424 from toasted-nutbread/dictionary-importer
Dictionary importer
Diffstat (limited to 'ext')
| -rw-r--r-- | ext/bg/background.html | 1 | ||||
| -rw-r--r-- | ext/bg/js/backend.js | 11 | ||||
| -rw-r--r-- | ext/bg/js/database.js | 295 | ||||
| -rw-r--r-- | ext/bg/js/dictionary-importer.js | 266 | ||||
| -rw-r--r-- | ext/bg/js/translator.js | 18 | ||||
| -rw-r--r-- | ext/bg/js/util.js | 2 | 
6 files changed, 315 insertions, 278 deletions
| diff --git a/ext/bg/background.html b/ext/bg/background.html index e456717e..afe9c5d1 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -30,6 +30,7 @@          <script src="/bg/js/clipboard-monitor.js"></script>          <script src="/bg/js/conditions.js"></script>          <script src="/bg/js/database.js"></script> +        <script src="/bg/js/dictionary-importer.js"></script>          <script src="/bg/js/deinflector.js"></script>          <script src="/bg/js/dictionary.js"></script>          <script src="/bg/js/handlebars.js"></script> diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index b217e64d..1e8c979f 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -24,6 +24,8 @@   * AudioUriBuilder   * BackendApiForwarder   * ClipboardMonitor + * Database + * DictionaryImporter   * JsonSchema   * Mecab   * Translator @@ -43,7 +45,9 @@  class Backend {      constructor() { -        this.translator = new Translator(); +        this.database = new Database(); +        this.dictionaryImporter = new DictionaryImporter(); +        this.translator = new Translator(this.database);          this.anki = new AnkiNull();          this.mecab = new Mecab();          this.clipboardMonitor = new ClipboardMonitor({getClipboard: this._onApiClipboardGet.bind(this)}); @@ -107,6 +111,7 @@ class Backend {      }      async prepare() { +        await this.database.prepare();          await this.translator.prepare();          this.optionsSchema = await requestJson(chrome.runtime.getURL('/bg/data/options-schema.json'), 'GET'); @@ -296,6 +301,10 @@ class Backend {          return true;      } +    async importDictionary(archiveSource, onProgress, details) { +        return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details); +    } +      // Message handlers      _onApiYomichanCoreReady(_params, sender) { diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 08a2a39f..7a4d094b 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -110,6 +110,10 @@ class Database {          this.db = null;      } +    isPrepared() { +        return this.db !== null; +    } +      async purge() {          this._validate(); @@ -322,177 +326,44 @@ class Database {          return result;      } -    async importDictionary(archiveSource, onProgress, details) { +    async dictionaryExists(title) {          this._validate(); -        const db = this.db; -        const hasOnProgress = (typeof onProgress === 'function'); - -        // Read archive -        const archive = await JSZip.loadAsync(archiveSource); - -        // Read and validate index -        const indexFileName = 'index.json'; -        const indexFile = archive.files[indexFileName]; -        if (!indexFile) { -            throw new Error('No dictionary index found in archive'); -        } - -        const index = JSON.parse(await indexFile.async('string')); - -        const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); -        Database._validateJsonSchema(index, indexSchema, indexFileName); - -        const dictionaryTitle = index.title; -        const version = index.format || index.version; - -        if (!dictionaryTitle || !index.revision) { -            throw new Error('Unrecognized dictionary format'); -        } - -        // Verify database is not already imported -        if (await this._dictionaryExists(dictionaryTitle)) { -            throw new Error('Dictionary is already imported'); -        } - -        // Data format converters -        const convertTermBankEntry = (entry) => { -            if (version === 1) { -                const [expression, reading, definitionTags, rules, score, ...glossary] = entry; -                return {expression, reading, definitionTags, rules, score, glossary}; -            } else { -                const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; -                return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; -            } -        }; - -        const convertTermMetaBankEntry = (entry) => { -            const [expression, mode, data] = entry; -            return {expression, mode, data}; -        }; - -        const convertKanjiBankEntry = (entry) => { -            if (version === 1) { -                const [character, onyomi, kunyomi, tags, ...meanings] = entry; -                return {character, onyomi, kunyomi, tags, meanings}; -            } else { -                const [character, onyomi, kunyomi, tags, meanings, stats] = entry; -                return {character, onyomi, kunyomi, tags, meanings, stats}; -            } -        }; - -        const convertKanjiMetaBankEntry = (entry) => { -            const [character, mode, data] = entry; -            return {character, mode, data}; -        }; - -        const convertTagBankEntry = (entry) => { -            const [name, category, order, notes, score] = entry; -            return {name, category, order, notes, score}; -        }; +        const transaction = this.db.transaction(['dictionaries'], 'readonly'); +        const index = transaction.objectStore('dictionaries').index('title'); +        const query = IDBKeyRange.only(title); +        const count = await Database._getCount(index, query); +        return count > 0; +    } -        // Archive file reading -        const readFileSequence = async (fileNameFormat, convertEntry, schema) => { -            const results = []; -            for (let i = 1; true; ++i) { -                const fileName = fileNameFormat.replace(/\?/, `${i}`); -                const file = archive.files[fileName]; -                if (!file) { break; } - -                const entries = JSON.parse(await file.async('string')); -                Database._validateJsonSchema(entries, schema, fileName); - -                for (let entry of entries) { -                    entry = convertEntry(entry); -                    entry.dictionary = dictionaryTitle; -                    results.push(entry); -                } -            } -            return results; -        }; +    bulkAdd(objectStoreName, items, start, count) { +        return new Promise((resolve, reject) => { +            const transaction = this.db.transaction([objectStoreName], 'readwrite'); +            const objectStore = transaction.objectStore(objectStoreName); -        // Load schemas -        const dataBankSchemaPaths = this.constructor._getDataBankSchemaPaths(version); -        const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); - -        // Load data -        const termList      = await readFileSequence('term_bank_?.json',       convertTermBankEntry,      dataBankSchemas[0]); -        const termMetaList  = await readFileSequence('term_meta_bank_?.json',  convertTermMetaBankEntry,  dataBankSchemas[1]); -        const kanjiList     = await readFileSequence('kanji_bank_?.json',      convertKanjiBankEntry,     dataBankSchemas[2]); -        const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); -        const tagList       = await readFileSequence('tag_bank_?.json',        convertTagBankEntry,       dataBankSchemas[4]); - -        // Old tags -        const indexTagMeta = index.tagMeta; -        if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { -            for (const name of Object.keys(indexTagMeta)) { -                const {category, order, notes, score} = indexTagMeta[name]; -                tagList.push({name, category, order, notes, score}); +            if (start + count > items.length) { +                count = items.length - start;              } -        } -        // Prefix wildcard support -        const prefixWildcardsSupported = !!details.prefixWildcardsSupported; -        if (prefixWildcardsSupported) { -            for (const entry of termList) { -                entry.expressionReverse = stringReverse(entry.expression); -                entry.readingReverse = stringReverse(entry.reading); +            if (count <= 0) { +                resolve(); +                return;              } -        } - -        // Add dictionary -        const summary = { -            title: dictionaryTitle, -            revision: index.revision, -            sequenced: index.sequenced, -            version, -            prefixWildcardsSupported -        }; - -        { -            const transaction = db.transaction(['dictionaries'], 'readwrite'); -            const objectStore = transaction.objectStore('dictionaries'); -            await Database._bulkAdd(objectStore, [summary], 0, 1); -        } - -        // Add data -        const errors = []; -        const total = ( -            termList.length + -            termMetaList.length + -            kanjiList.length + -            kanjiMetaList.length + -            tagList.length -        ); -        let loadedCount = 0; -        const maxTransactionLength = 1000; - -        const bulkAdd = async (objectStoreName, entries) => { -            const ii = entries.length; -            for (let i = 0; i < ii; i += maxTransactionLength) { -                const count = Math.min(maxTransactionLength, ii - i); -                try { -                    const transaction = db.transaction([objectStoreName], 'readwrite'); -                    const objectStore = transaction.objectStore(objectStoreName); -                    await Database._bulkAdd(objectStore, entries, i, count); -                } catch (e) { -                    errors.push(e); +            const end = start + count; +            let completedCount = 0; +            const onError = (e) => reject(e); +            const onSuccess = () => { +                if (++completedCount >= count) { +                    resolve();                  } +            }; -                loadedCount += count; -                if (hasOnProgress) { -                    onProgress(total, loadedCount); -                } +            for (let i = start; i < end; ++i) { +                const request = objectStore.add(items[i]); +                request.onerror = onError; +                request.onsuccess = onSuccess;              } -        }; - -        await bulkAdd('terms', termList); -        await bulkAdd('termMeta', termMetaList); -        await bulkAdd('kanji', kanjiList); -        await bulkAdd('kanjiMeta', kanjiMetaList); -        await bulkAdd('tagMeta', tagList); - -        return {result: summary, errors}; +        });      }      // Private @@ -503,80 +374,6 @@ class Database {          }      } -    async _getSchema(fileName) { -        let schemaPromise = this._schemas.get(fileName); -        if (typeof schemaPromise !== 'undefined') { -            return schemaPromise; -        } - -        schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); -        this._schemas.set(fileName, schemaPromise); -        return schemaPromise; -    } - -    static _validateJsonSchema(value, schema, fileName) { -        try { -            JsonSchema.validate(value, schema); -        } catch (e) { -            throw Database._formatSchemaError(e, fileName); -        } -    } - -    static _formatSchemaError(e, fileName) { -        const valuePathString = Database._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); -        const schemaPathString = Database._getSchemaErrorPathString(e.info.schemaPath, 'schema'); - -        const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); -        e2.data = e; - -        return e2; -    } - -    static _getSchemaErrorPathString(infoList, base='') { -        let result = base; -        for (const [part] of infoList) { -            switch (typeof part) { -                case 'string': -                    if (result.length > 0) { -                        result += '.'; -                    } -                    result += part; -                    break; -                case 'number': -                    result += `[${part}]`; -                    break; -            } -        } -        return result; -    } - -    static _getDataBankSchemaPaths(version) { -        const termBank = ( -            version === 1 ? -            '/bg/data/dictionary-term-bank-v1-schema.json' : -            '/bg/data/dictionary-term-bank-v3-schema.json' -        ); -        const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; -        const kanjiBank = ( -            version === 1 ? -            '/bg/data/dictionary-kanji-bank-v1-schema.json' : -            '/bg/data/dictionary-kanji-bank-v3-schema.json' -        ); -        const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; -        const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; - -        return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; -    } - -    async _dictionaryExists(title) { -        const db = this.db; -        const dbCountTransaction = db.transaction(['dictionaries'], 'readonly'); -        const dbIndex = dbCountTransaction.objectStore('dictionaries').index('title'); -        const only = IDBKeyRange.only(title); -        const count = await Database._getCount(dbIndex, only); -        return count > 0; -    } -      async _findGenericBulk(tableName, indexName, indexValueList, dictionaries, createResult) {          this._validate(); @@ -760,34 +557,6 @@ class Database {          });      } -    static _bulkAdd(objectStore, items, start, count) { -        return new Promise((resolve, reject) => { -            if (start + count > items.length) { -                count = items.length - start; -            } - -            if (count <= 0) { -                resolve(); -                return; -            } - -            const end = start + count; -            let completedCount = 0; -            const onError = (e) => reject(e); -            const onSuccess = () => { -                if (++completedCount >= count) { -                    resolve(); -                } -            }; - -            for (let i = start; i < end; ++i) { -                const request = objectStore.add(items[i]); -                request.onerror = onError; -                request.onsuccess = onSuccess; -            } -        }); -    } -      static _open(name, version, onUpgradeNeeded) {          return new Promise((resolve, reject) => {              const request = window.indexedDB.open(name, version * 10); diff --git a/ext/bg/js/dictionary-importer.js b/ext/bg/js/dictionary-importer.js new file mode 100644 index 00000000..589e7656 --- /dev/null +++ b/ext/bg/js/dictionary-importer.js @@ -0,0 +1,266 @@ +/* + * Copyright (C) 2020  Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +/* global + * JSZip + * JsonSchema + * requestJson + */ + +class DictionaryImporter { +    constructor() { +        this._schemas = new Map(); +    } + +    async import(database, archiveSource, onProgress, details) { +        if (!database) { +            throw new Error('Invalid database'); +        } +        if (!database.isPrepared()) { +            throw new Error('Database is not ready'); +        } + +        const hasOnProgress = (typeof onProgress === 'function'); + +        // Read archive +        const archive = await JSZip.loadAsync(archiveSource); + +        // Read and validate index +        const indexFileName = 'index.json'; +        const indexFile = archive.files[indexFileName]; +        if (!indexFile) { +            throw new Error('No dictionary index found in archive'); +        } + +        const index = JSON.parse(await indexFile.async('string')); + +        const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); +        this._validateJsonSchema(index, indexSchema, indexFileName); + +        const dictionaryTitle = index.title; +        const version = index.format || index.version; + +        if (!dictionaryTitle || !index.revision) { +            throw new Error('Unrecognized dictionary format'); +        } + +        // Verify database is not already imported +        if (await database.dictionaryExists(dictionaryTitle)) { +            throw new Error('Dictionary is already imported'); +        } + +        // Data format converters +        const convertTermBankEntry = (entry) => { +            if (version === 1) { +                const [expression, reading, definitionTags, rules, score, ...glossary] = entry; +                return {expression, reading, definitionTags, rules, score, glossary}; +            } else { +                const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; +                return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; +            } +        }; + +        const convertTermMetaBankEntry = (entry) => { +            const [expression, mode, data] = entry; +            return {expression, mode, data}; +        }; + +        const convertKanjiBankEntry = (entry) => { +            if (version === 1) { +                const [character, onyomi, kunyomi, tags, ...meanings] = entry; +                return {character, onyomi, kunyomi, tags, meanings}; +            } else { +                const [character, onyomi, kunyomi, tags, meanings, stats] = entry; +                return {character, onyomi, kunyomi, tags, meanings, stats}; +            } +        }; + +        const convertKanjiMetaBankEntry = (entry) => { +            const [character, mode, data] = entry; +            return {character, mode, data}; +        }; + +        const convertTagBankEntry = (entry) => { +            const [name, category, order, notes, score] = entry; +            return {name, category, order, notes, score}; +        }; + +        // Archive file reading +        const readFileSequence = async (fileNameFormat, convertEntry, schema) => { +            const results = []; +            for (let i = 1; true; ++i) { +                const fileName = fileNameFormat.replace(/\?/, `${i}`); +                const file = archive.files[fileName]; +                if (!file) { break; } + +                const entries = JSON.parse(await file.async('string')); +                this._validateJsonSchema(entries, schema, fileName); + +                for (let entry of entries) { +                    entry = convertEntry(entry); +                    entry.dictionary = dictionaryTitle; +                    results.push(entry); +                } +            } +            return results; +        }; + +        // Load schemas +        const dataBankSchemaPaths = this._getDataBankSchemaPaths(version); +        const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); + +        // Load data +        const termList      = await readFileSequence('term_bank_?.json',       convertTermBankEntry,      dataBankSchemas[0]); +        const termMetaList  = await readFileSequence('term_meta_bank_?.json',  convertTermMetaBankEntry,  dataBankSchemas[1]); +        const kanjiList     = await readFileSequence('kanji_bank_?.json',      convertKanjiBankEntry,     dataBankSchemas[2]); +        const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); +        const tagList       = await readFileSequence('tag_bank_?.json',        convertTagBankEntry,       dataBankSchemas[4]); + +        // Old tags +        const indexTagMeta = index.tagMeta; +        if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { +            for (const name of Object.keys(indexTagMeta)) { +                const {category, order, notes, score} = indexTagMeta[name]; +                tagList.push({name, category, order, notes, score}); +            } +        } + +        // Prefix wildcard support +        const prefixWildcardsSupported = !!details.prefixWildcardsSupported; +        if (prefixWildcardsSupported) { +            for (const entry of termList) { +                entry.expressionReverse = stringReverse(entry.expression); +                entry.readingReverse = stringReverse(entry.reading); +            } +        } + +        // Add dictionary +        const summary = { +            title: dictionaryTitle, +            revision: index.revision, +            sequenced: index.sequenced, +            version, +            prefixWildcardsSupported +        }; + +        database.bulkAdd('dictionaries', [summary], 0, 1); + +        // Add data +        const errors = []; +        const total = ( +            termList.length + +            termMetaList.length + +            kanjiList.length + +            kanjiMetaList.length + +            tagList.length +        ); +        let loadedCount = 0; +        const maxTransactionLength = 1000; + +        const bulkAdd = async (objectStoreName, entries) => { +            const ii = entries.length; +            for (let i = 0; i < ii; i += maxTransactionLength) { +                const count = Math.min(maxTransactionLength, ii - i); + +                try { +                    await database.bulkAdd(objectStoreName, entries, i, count); +                } catch (e) { +                    errors.push(e); +                } + +                loadedCount += count; +                if (hasOnProgress) { +                    onProgress(total, loadedCount); +                } +            } +        }; + +        await bulkAdd('terms', termList); +        await bulkAdd('termMeta', termMetaList); +        await bulkAdd('kanji', kanjiList); +        await bulkAdd('kanjiMeta', kanjiMetaList); +        await bulkAdd('tagMeta', tagList); + +        return {result: summary, errors}; +    } + +    async _getSchema(fileName) { +        let schemaPromise = this._schemas.get(fileName); +        if (typeof schemaPromise !== 'undefined') { +            return schemaPromise; +        } + +        schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); +        this._schemas.set(fileName, schemaPromise); +        return schemaPromise; +    } + +    _validateJsonSchema(value, schema, fileName) { +        try { +            JsonSchema.validate(value, schema); +        } catch (e) { +            throw this._formatSchemaError(e, fileName); +        } +    } + +    _formatSchemaError(e, fileName) { +        const valuePathString = this._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); +        const schemaPathString = this._getSchemaErrorPathString(e.info.schemaPath, 'schema'); + +        const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); +        e2.data = e; + +        return e2; +    } + +    _getSchemaErrorPathString(infoList, base='') { +        let result = base; +        for (const [part] of infoList) { +            switch (typeof part) { +                case 'string': +                    if (result.length > 0) { +                        result += '.'; +                    } +                    result += part; +                    break; +                case 'number': +                    result += `[${part}]`; +                    break; +            } +        } +        return result; +    } + +    _getDataBankSchemaPaths(version) { +        const termBank = ( +            version === 1 ? +            '/bg/data/dictionary-term-bank-v1-schema.json' : +            '/bg/data/dictionary-term-bank-v3-schema.json' +        ); +        const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; +        const kanjiBank = ( +            version === 1 ? +            '/bg/data/dictionary-kanji-bank-v1-schema.json' : +            '/bg/data/dictionary-kanji-bank-v3-schema.json' +        ); +        const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; +        const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; + +        return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; +    } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index cd991efa..27f91c05 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -17,7 +17,6 @@   */  /* global - * Database   * Deinflector   * TextSourceMap   * dictEnabledSet @@ -35,23 +34,16 @@   */  class Translator { -    constructor() { -        this.database = null; +    constructor(database) { +        this.database = database;          this.deinflector = null;          this.tagCache = new Map();      }      async prepare() { -        if (!this.database) { -            this.database = new Database(); -            await this.database.prepare(); -        } - -        if (!this.deinflector) { -            const url = chrome.runtime.getURL('/bg/lang/deinflect.json'); -            const reasons = await requestJson(url, 'GET'); -            this.deinflector = new Deinflector(reasons); -        } +        const url = chrome.runtime.getURL('/bg/lang/deinflect.json'); +        const reasons = await requestJson(url, 'GET'); +        this.deinflector = new Deinflector(reasons);      }      async purgeDatabase() { diff --git a/ext/bg/js/util.js b/ext/bg/js/util.js index 79c6af06..a7ed4a34 100644 --- a/ext/bg/js/util.js +++ b/ext/bg/js/util.js @@ -118,7 +118,7 @@ async function utilDatabaseDeleteDictionary(dictionaryName, onProgress) {  async function utilDatabaseImport(data, onProgress, details) {      data = await utilReadFile(data); -    return utilIsolate(await utilBackend().translator.database.importDictionary( +    return utilIsolate(await utilBackend().importDictionary(          utilBackgroundIsolate(data),          utilBackgroundFunctionIsolate(onProgress),          utilBackgroundIsolate(details) |