diff options
Diffstat (limited to 'ext/js/dictionary')
| -rw-r--r-- | ext/js/dictionary/dictionary-data-util.js | 391 | ||||
| -rw-r--r-- | ext/js/dictionary/dictionary-database.js | 661 | ||||
| -rw-r--r-- | ext/js/dictionary/dictionary-importer-media-loader.js | 50 | ||||
| -rw-r--r-- | ext/js/dictionary/dictionary-importer.js | 796 | ||||
| -rw-r--r-- | ext/js/dictionary/dictionary-worker-handler.js | 137 | ||||
| -rw-r--r-- | ext/js/dictionary/dictionary-worker-main.js | 32 | ||||
| -rw-r--r-- | ext/js/dictionary/dictionary-worker-media-loader.js | 64 | ||||
| -rw-r--r-- | ext/js/dictionary/dictionary-worker.js | 206 | 
8 files changed, 2337 insertions, 0 deletions
| diff --git a/ext/js/dictionary/dictionary-data-util.js b/ext/js/dictionary/dictionary-data-util.js new file mode 100644 index 00000000..a54b043b --- /dev/null +++ b/ext/js/dictionary/dictionary-data-util.js @@ -0,0 +1,391 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2020-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +export class DictionaryDataUtil { +    /** +     * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry +     * @returns {import('dictionary-data-util').TagGroup[]} +     */ +    static groupTermTags(dictionaryEntry) { +        const {headwords} = dictionaryEntry; +        const headwordCount = headwords.length; +        const uniqueCheck = (headwordCount > 1); +        const resultsIndexMap = new Map(); +        const results = []; +        for (let i = 0; i < headwordCount; ++i) { +            const {tags} = headwords[i]; +            for (const tag of tags) { +                if (uniqueCheck) { +                    const {name, category, content, dictionaries} = tag; +                    const key = this._createMapKey([name, category, content, dictionaries]); +                    const index = resultsIndexMap.get(key); +                    if (typeof index !== 'undefined') { +                        const existingItem = results[index]; +                        existingItem.headwordIndices.push(i); +                        continue; +                    } +                    resultsIndexMap.set(key, results.length); +                } + +                const item = {tag, headwordIndices: [i]}; +                results.push(item); +            } +        } +        return results; +    } + +    /** +     * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry +     * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').TermFrequency>[]} +     */ +    static groupTermFrequencies(dictionaryEntry) { +        const {headwords, frequencies: sourceFrequencies} = dictionaryEntry; + +        /** @type {import('dictionary-data-util').TermFrequenciesMap1} */ +        const map1 = new Map(); +        for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) { +            const {term, reading} = headwords[headwordIndex]; + +            let map2 = map1.get(dictionary); +            if (typeof map2 === 'undefined') { +                map2 = new Map(); +                map1.set(dictionary, map2); +            } + +            const readingKey = hasReading ? reading : null; +            const key = this._createMapKey([term, readingKey]); +            let frequencyData = map2.get(key); +            if (typeof frequencyData === 'undefined') { +                frequencyData = {term, reading: readingKey, values: new Map()}; +                map2.set(key, frequencyData); +            } + +            frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); +        } + +        const results = []; +        for (const [dictionary, map2] of map1.entries()) { +            const frequencies = []; +            for (const {term, reading, values} of map2.values()) { +                frequencies.push({ +                    term, +                    reading, +                    values: [...values.values()] +                }); +            } +            results.push({dictionary, frequencies}); +        } +        return results; +    } + +    /** +     * @param {import('dictionary').KanjiFrequency[]} sourceFrequencies +     * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').KanjiFrequency>[]} +     */ +    static groupKanjiFrequencies(sourceFrequencies) { +        /** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */ +        const map1 = new Map(); +        for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) { +            let map2 = map1.get(dictionary); +            if (typeof map2 === 'undefined') { +                map2 = new Map(); +                map1.set(dictionary, map2); +            } + +            let frequencyData = map2.get(character); +            if (typeof frequencyData === 'undefined') { +                frequencyData = {character, values: new Map()}; +                map2.set(character, frequencyData); +            } + +            frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); +        } + +        const results = []; +        for (const [dictionary, map2] of map1.entries()) { +            const frequencies = []; +            for (const {character, values} of map2.values()) { +                frequencies.push({ +                    character, +                    values: [...values.values()] +                }); +            } +            results.push({dictionary, frequencies}); +        } +        return results; +    } + +    /** +     * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry +     * @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]} +     */ +    static getGroupedPronunciations(dictionaryEntry) { +        const {headwords, pronunciations} = dictionaryEntry; + +        const allTerms = new Set(); +        const allReadings = new Set(); +        for (const {term, reading} of headwords) { +            allTerms.add(term); +            allReadings.add(reading); +        } + +        /** @type {Map<string, import('dictionary-data-util').GroupedPronunciationInternal[]>} */ +        const groupedPronunciationsMap = new Map(); +        for (const {headwordIndex, dictionary, pitches} of pronunciations) { +            const {term, reading} = headwords[headwordIndex]; +            let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary); +            if (typeof dictionaryGroupedPronunciationList === 'undefined') { +                dictionaryGroupedPronunciationList = []; +                groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList); +            } +            for (const {position, nasalPositions, devoicePositions, tags} of pitches) { +                let groupedPronunciation = this._findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, dictionaryGroupedPronunciationList); +                if (groupedPronunciation === null) { +                    groupedPronunciation = { +                        terms: new Set(), +                        reading, +                        position, +                        nasalPositions, +                        devoicePositions, +                        tags +                    }; +                    dictionaryGroupedPronunciationList.push(groupedPronunciation); +                } +                groupedPronunciation.terms.add(term); +            } +        } + +        /** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */ +        const results2 = []; +        const multipleReadings = (allReadings.size > 1); +        for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) { +            /** @type {import('dictionary-data-util').GroupedPronunciation[]} */ +            const pronunciations2 = []; +            for (const groupedPronunciation of dictionaryGroupedPronunciationList) { +                const {terms, reading, position, nasalPositions, devoicePositions, tags} = groupedPronunciation; +                const exclusiveTerms = !this._areSetsEqual(terms, allTerms) ? this._getSetIntersection(terms, allTerms) : []; +                const exclusiveReadings = []; +                if (multipleReadings) { +                    exclusiveReadings.push(reading); +                } +                pronunciations2.push({ +                    terms: [...terms], +                    reading, +                    position, +                    nasalPositions, +                    devoicePositions, +                    tags, +                    exclusiveTerms, +                    exclusiveReadings +                }); +            } +            results2.push({dictionary, pronunciations: pronunciations2}); +        } +        return results2; +    } + +    /** +     * @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags +     * @returns {import('dictionary-data-util').TermFrequencyType} +     */ +    static getTermFrequency(termTags) { +        let totalScore = 0; +        for (const {score} of termTags) { +            totalScore += score; +        } +        if (totalScore > 0) { +            return 'popular'; +        } else if (totalScore < 0) { +            return 'rare'; +        } else { +            return 'normal'; +        } +    } + +    /** +     * @param {import('dictionary').TermHeadword[]} headwords +     * @param {number[]} headwordIndices +     * @param {Set<string>} allTermsSet +     * @param {Set<string>} allReadingsSet +     * @returns {string[]} +     */ +    static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { +        if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } + +        /** @type {Set<string>} */ +        const terms = new Set(); +        /** @type {Set<string>} */ +        const readings = new Set(); +        for (const headwordIndex of headwordIndices) { +            const {term, reading} = headwords[headwordIndex]; +            terms.add(term); +            readings.add(reading); +        } + +        /** @type {string[]} */ +        const disambiguations = []; +        const addTerms = !this._areSetsEqual(terms, allTermsSet); +        const addReadings = !this._areSetsEqual(readings, allReadingsSet); +        if (addTerms) { +            disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); +        } +        if (addReadings) { +            if (addTerms) { +                for (const term of terms) { +                    readings.delete(term); +                } +            } +            disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); +        } +        return disambiguations; +    } + +    /** +     * @param {string[]} wordClasses +     * @returns {boolean} +     */ +    static isNonNounVerbOrAdjective(wordClasses) { +        let isVerbOrAdjective = false; +        let isSuruVerb = false; +        let isNoun = false; +        for (const wordClass of wordClasses) { +            switch (wordClass) { +                case 'v1': +                case 'v5': +                case 'vk': +                case 'vz': +                case 'adj-i': +                    isVerbOrAdjective = true; +                    break; +                case 'vs': +                    isVerbOrAdjective = true; +                    isSuruVerb = true; +                    break; +                case 'n': +                    isNoun = true; +                    break; +            } +        } +        return isVerbOrAdjective && !(isSuruVerb && isNoun); +    } + +    // Private + +    /** +     * @param {string} reading +     * @param {number} position +     * @param {number[]} nasalPositions +     * @param {number[]} devoicePositions +     * @param {import('dictionary').Tag[]} tags +     * @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList +     * @returns {?import('dictionary-data-util').GroupedPronunciationInternal} +     */ +    static _findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, groupedPronunciationList) { +        for (const pitchInfo of groupedPronunciationList) { +            if ( +                pitchInfo.reading === reading && +                pitchInfo.position === position && +                this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) && +                this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) && +                this._areTagListsEqual(pitchInfo.tags, tags) +            ) { +                return pitchInfo; +            } +        } +        return null; +    } + +    /** +     * @template [T=unknown] +     * @param {T[]} array1 +     * @param {T[]} array2 +     * @returns {boolean} +     */ +    static _areArraysEqual(array1, array2) { +        const ii = array1.length; +        if (ii !== array2.length) { return false; } +        for (let i = 0; i < ii; ++i) { +            if (array1[i] !== array2[i]) { return false; } +        } +        return true; +    } + +    /** +     * @param {import('dictionary').Tag[]} tagList1 +     * @param {import('dictionary').Tag[]} tagList2 +     * @returns {boolean} +     */ +    static _areTagListsEqual(tagList1, tagList2) { +        const ii = tagList1.length; +        if (tagList2.length !== ii) { return false; } + +        for (let i = 0; i < ii; ++i) { +            const tag1 = tagList1[i]; +            const tag2 = tagList2[i]; +            if (tag1.name !== tag2.name || !this._areArraysEqual(tag1.dictionaries, tag2.dictionaries)) { +                return false; +            } +        } + +        return true; +    } + +    /** +     * @template [T=unknown] +     * @param {Set<T>} set1 +     * @param {Set<T>} set2 +     * @returns {boolean} +     */ +    static _areSetsEqual(set1, set2) { +        if (set1.size !== set2.size) { +            return false; +        } + +        for (const value of set1) { +            if (!set2.has(value)) { +                return false; +            } +        } + +        return true; +    } + +    /** +     * @template [T=unknown] +     * @param {Set<T>} set1 +     * @param {Set<T>} set2 +     * @returns {T[]} +     */ +    static _getSetIntersection(set1, set2) { +        const result = []; +        for (const value of set1) { +            if (set2.has(value)) { +                result.push(value); +            } +        } +        return result; +    } + +    /** +     * @param {unknown[]} array +     * @returns {string} +     */ +    static _createMapKey(array) { +        return JSON.stringify(array); +    } +} diff --git a/ext/js/dictionary/dictionary-database.js b/ext/js/dictionary/dictionary-database.js new file mode 100644 index 00000000..45c5c6fd --- /dev/null +++ b/ext/js/dictionary/dictionary-database.js @@ -0,0 +1,661 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2016-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {log, stringReverse} from '../core.js'; +import {Database} from '../data/database.js'; + +export class DictionaryDatabase { +    constructor() { +        /** @type {Database<import('dictionary-database').ObjectStoreName>} */ +        this._db = new Database(); +        /** @type {string} */ +        this._dbName = 'dict'; +        /** @type {import('dictionary-database').CreateQuery<string>} */ +        this._createOnlyQuery1 = (item) => IDBKeyRange.only(item); +        /** @type {import('dictionary-database').CreateQuery<import('dictionary-database').DictionaryAndQueryRequest>} */ +        this._createOnlyQuery2 = (item) => IDBKeyRange.only(item.query); +        /** @type {import('dictionary-database').CreateQuery<import('dictionary-database').TermExactRequest>} */ +        this._createOnlyQuery3 = (item) => IDBKeyRange.only(item.term); +        /** @type {import('dictionary-database').CreateQuery<import('dictionary-database').MediaRequest>} */ +        this._createOnlyQuery4 = (item) => IDBKeyRange.only(item.path); +        /** @type {import('dictionary-database').CreateQuery<string>} */ +        this._createBoundQuery1 = (item) => IDBKeyRange.bound(item, `${item}\uffff`, false, false); +        /** @type {import('dictionary-database').CreateQuery<string>} */ +        this._createBoundQuery2 = (item) => { item = stringReverse(item); return IDBKeyRange.bound(item, `${item}\uffff`, false, false); }; +        /** @type {import('dictionary-database').CreateResult<import('dictionary-database').TermExactRequest, import('dictionary-database').DatabaseTermEntryWithId, import('dictionary-database').TermEntry>} */ +        this._createTermBind1 = this._createTermExact.bind(this); +        /** @type {import('dictionary-database').CreateResult<import('dictionary-database').DictionaryAndQueryRequest, import('dictionary-database').DatabaseTermEntryWithId, import('dictionary-database').TermEntry>} */ +        this._createTermBind2 = this._createTermSequenceExact.bind(this); +        /** @type {import('dictionary-database').CreateResult<string, import('dictionary-database').DatabaseTermMeta, import('dictionary-database').TermMeta>} */ +        this._createTermMetaBind = this._createTermMeta.bind(this); +        /** @type {import('dictionary-database').CreateResult<string, import('dictionary-database').DatabaseKanjiEntry, import('dictionary-database').KanjiEntry>} */ +        this._createKanjiBind = this._createKanji.bind(this); +        /** @type {import('dictionary-database').CreateResult<string, import('dictionary-database').DatabaseKanjiMeta, import('dictionary-database').KanjiMeta>} */ +        this._createKanjiMetaBind = this._createKanjiMeta.bind(this); +        /** @type {import('dictionary-database').CreateResult<import('dictionary-database').MediaRequest, import('dictionary-database').MediaDataArrayBufferContent, import('dictionary-database').Media>} */ +        this._createMediaBind = this._createMedia.bind(this); +    } + +    /** */ +    async prepare() { +        await this._db.open( +            this._dbName, +            60, +            /** @type {import('database').StructureDefinition<import('dictionary-database').ObjectStoreName>[]} */ +            ([ +                /** @type {import('database').StructureDefinition<import('dictionary-database').ObjectStoreName>} */ +                ({ +                    version: 20, +                    stores: { +                        terms: { +                            primaryKey: {keyPath: 'id', autoIncrement: true}, +                            indices: ['dictionary', 'expression', 'reading'] +                        }, +                        kanji: { +                            primaryKey: {autoIncrement: true}, +                            indices: ['dictionary', 'character'] +                        }, +                        tagMeta: { +                            primaryKey: {autoIncrement: true}, +                            indices: ['dictionary'] +                        }, +                        dictionaries: { +                            primaryKey: {autoIncrement: true}, +                            indices: ['title', 'version'] +                        } +                    } +                }), +                { +                    version: 30, +                    stores: { +                        termMeta: { +                            primaryKey: {autoIncrement: true}, +                            indices: ['dictionary', 'expression'] +                        }, +                        kanjiMeta: { +                            primaryKey: {autoIncrement: true}, +                            indices: ['dictionary', 'character'] +                        }, +                        tagMeta: { +                            primaryKey: {autoIncrement: true}, +                            indices: ['dictionary', 'name'] +                        } +                    } +                }, +                { +                    version: 40, +                    stores: { +                        terms: { +                            primaryKey: {keyPath: 'id', autoIncrement: true}, +                            indices: ['dictionary', 'expression', 'reading', 'sequence'] +                        } +                    } +                }, +                { +                    version: 50, +                    stores: { +                        terms: { +                            primaryKey: {keyPath: 'id', autoIncrement: true}, +                            indices: ['dictionary', 'expression', 'reading', 'sequence', 'expressionReverse', 'readingReverse'] +                        } +                    } +                }, +                { +                    version: 60, +                    stores: { +                        media: { +                            primaryKey: {keyPath: 'id', autoIncrement: true}, +                            indices: ['dictionary', 'path'] +                        } +                    } +                } +            ]) +        ); +    } + +    /** */ +    async close() { +        this._db.close(); +    } + +    /** +     * @returns {boolean} +     */ +    isPrepared() { +        return this._db.isOpen(); +    } + +    /** +     * @returns {Promise<boolean>} +     */ +    async purge() { +        if (this._db.isOpening()) { +            throw new Error('Cannot purge database while opening'); +        } +        if (this._db.isOpen()) { +            this._db.close(); +        } +        let result = false; +        try { +            await Database.deleteDatabase(this._dbName); +            result = true; +        } catch (e) { +            log.error(e); +        } +        await this.prepare(); +        return result; +    } + +    /** +     * @param {string} dictionaryName +     * @param {number} progressRate +     * @param {import('dictionary-database').DeleteDictionaryProgressCallback} onProgress +     */ +    async deleteDictionary(dictionaryName, progressRate, onProgress) { +        /** @type {[objectStoreName: import('dictionary-database').ObjectStoreName, key: string][][]} */ +        const targetGroups = [ +            [ +                ['kanji', 'dictionary'], +                ['kanjiMeta', 'dictionary'], +                ['terms', 'dictionary'], +                ['termMeta', 'dictionary'], +                ['tagMeta', 'dictionary'], +                ['media', 'dictionary'] +            ], +            [ +                ['dictionaries', 'title'] +            ] +        ]; + +        let storeCount = 0; +        for (const targets of targetGroups) { +            storeCount += targets.length; +        } + +        /** @type {import('dictionary-database').DeleteDictionaryProgressData} */ +        const progressData = { +            count: 0, +            processed: 0, +            storeCount, +            storesProcesed: 0 +        }; + +        /** +         * @param {IDBValidKey[]} keys +         * @returns {IDBValidKey[]} +         */ +        const filterKeys = (keys) => { +            ++progressData.storesProcesed; +            progressData.count += keys.length; +            onProgress(progressData); +            return keys; +        }; +        const onProgressWrapper = () => { +            const processed = progressData.processed + 1; +            progressData.processed = processed; +            if ((processed % progressRate) === 0 || processed === progressData.count) { +                onProgress(progressData); +            } +        }; + +        for (const targets of targetGroups) { +            const promises = []; +            for (const [objectStoreName, indexName] of targets) { +                const query = IDBKeyRange.only(dictionaryName); +                const promise = this._db.bulkDelete(objectStoreName, indexName, query, filterKeys, onProgressWrapper); +                promises.push(promise); +            } +            await Promise.all(promises); +        } +    } + +    /** +     * @param {string[]} termList +     * @param {import('dictionary-database').DictionarySet} dictionaries +     * @param {import('dictionary-database').MatchType} matchType +     * @returns {Promise<import('dictionary-database').TermEntry[]>} +     */ +    findTermsBulk(termList, dictionaries, matchType) { +        const visited = new Set(); +        /** @type {import('dictionary-database').FindPredicate<string, import('dictionary-database').DatabaseTermEntryWithId>} */ +        const predicate = (row) => { +            if (!dictionaries.has(row.dictionary)) { return false; } +            const {id} = row; +            if (visited.has(id)) { return false; } +            visited.add(id); +            return true; +        }; + +        const indexNames = (matchType === 'suffix') ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; + +        let createQuery = this._createOnlyQuery1; +        switch (matchType) { +            case 'prefix': +                createQuery = this._createBoundQuery1; +                break; +            case 'suffix': +                createQuery = this._createBoundQuery2; +                break; +        } + +        const createResult = this._createTermGeneric.bind(this, matchType); + +        return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, createResult); +    } + +    /** +     * @param {import('dictionary-database').TermExactRequest[]} termList +     * @param {import('dictionary-database').DictionarySet} dictionaries +     * @returns {Promise<import('dictionary-database').TermEntry[]>} +     */ +    findTermsExactBulk(termList, dictionaries) { +        /** @type {import('dictionary-database').FindPredicate<import('dictionary-database').TermExactRequest, import('dictionary-database').DatabaseTermEntry>} */ +        const predicate = (row, item) => (row.reading === item.reading && dictionaries.has(row.dictionary)); +        return this._findMultiBulk('terms', ['expression'], termList, this._createOnlyQuery3, predicate, this._createTermBind1); +    } + +    /** +     * @param {import('dictionary-database').DictionaryAndQueryRequest[]} items +     * @returns {Promise<import('dictionary-database').TermEntry[]>} +     */ +    findTermsBySequenceBulk(items) { +        /** @type {import('dictionary-database').FindPredicate<import('dictionary-database').DictionaryAndQueryRequest, import('dictionary-database').DatabaseTermEntry>} */ +        const predicate = (row, item) => (row.dictionary === item.dictionary); +        return this._findMultiBulk('terms', ['sequence'], items, this._createOnlyQuery2, predicate, this._createTermBind2); +    } + +    /** +     * @param {string[]} termList +     * @param {import('dictionary-database').DictionarySet} dictionaries +     * @returns {Promise<import('dictionary-database').TermMeta[]>} +     */ +    findTermMetaBulk(termList, dictionaries) { +        /** @type {import('dictionary-database').FindPredicate<string, import('dictionary-database').DatabaseTermMeta>} */ +        const predicate = (row) => dictionaries.has(row.dictionary); +        return this._findMultiBulk('termMeta', ['expression'], termList, this._createOnlyQuery1, predicate, this._createTermMetaBind); +    } + +    /** +     * @param {string[]} kanjiList +     * @param {import('dictionary-database').DictionarySet} dictionaries +     * @returns {Promise<import('dictionary-database').KanjiEntry[]>} +     */ +    findKanjiBulk(kanjiList, dictionaries) { +        /** @type {import('dictionary-database').FindPredicate<string, import('dictionary-database').DatabaseKanjiEntry>} */ +        const predicate = (row) => dictionaries.has(row.dictionary); +        return this._findMultiBulk('kanji', ['character'], kanjiList, this._createOnlyQuery1, predicate, this._createKanjiBind); +    } + +    /** +     * @param {string[]} kanjiList +     * @param {import('dictionary-database').DictionarySet} dictionaries +     * @returns {Promise<import('dictionary-database').KanjiMeta[]>} +     */ +    findKanjiMetaBulk(kanjiList, dictionaries) { +        /** @type {import('dictionary-database').FindPredicate<string, import('dictionary-database').DatabaseKanjiMeta>} */ +        const predicate = (row) => dictionaries.has(row.dictionary); +        return this._findMultiBulk('kanjiMeta', ['character'], kanjiList, this._createOnlyQuery1, predicate, this._createKanjiMetaBind); +    } + +    /** +     * @param {import('dictionary-database').DictionaryAndQueryRequest[]} items +     * @returns {Promise<(import('dictionary-database').Tag|undefined)[]>} +     */ +    findTagMetaBulk(items) { +        /** @type {import('dictionary-database').FindPredicate<import('dictionary-database').DictionaryAndQueryRequest, import('dictionary-database').Tag>} */ +        const predicate = (row, item) => (row.dictionary === item.dictionary); +        return this._findFirstBulk('tagMeta', 'name', items, this._createOnlyQuery2, predicate); +    } + +    /** +     * @param {string} name +     * @param {string} dictionary +     * @returns {Promise<?import('dictionary-database').Tag>} +     */ +    findTagForTitle(name, dictionary) { +        const query = IDBKeyRange.only(name); +        return this._db.find('tagMeta', 'name', query, (row) => (/** @type {import('dictionary-database').Tag} */ (row).dictionary === dictionary), null, null); +    } + +    /** +     * @param {import('dictionary-database').MediaRequest[]} items +     * @returns {Promise<import('dictionary-database').Media[]>} +     */ +    getMedia(items) { +        /** @type {import('dictionary-database').FindPredicate<import('dictionary-database').MediaRequest, import('dictionary-database').MediaDataArrayBufferContent>} */ +        const predicate = (row, item) => (row.dictionary === item.dictionary); +        return this._findMultiBulk('media', ['path'], items, this._createOnlyQuery4, predicate, this._createMediaBind); +    } + +    /** +     * @returns {Promise<import('dictionary-importer').Summary[]>} +     */ +    getDictionaryInfo() { +        return new Promise((resolve, reject) => { +            const transaction = this._db.transaction(['dictionaries'], 'readonly'); +            const objectStore = transaction.objectStore('dictionaries'); +            this._db.getAll(objectStore, null, resolve, reject, null); +        }); +    } + +    /** +     * @param {string[]} dictionaryNames +     * @param {boolean} getTotal +     * @returns {Promise<import('dictionary-database').DictionaryCounts>} +     */ +    getDictionaryCounts(dictionaryNames, getTotal) { +        return new Promise((resolve, reject) => { +            const targets = [ +                ['kanji', 'dictionary'], +                ['kanjiMeta', 'dictionary'], +                ['terms', 'dictionary'], +                ['termMeta', 'dictionary'], +                ['tagMeta', 'dictionary'], +                ['media', 'dictionary'] +            ]; +            const objectStoreNames = targets.map(([objectStoreName]) => objectStoreName); +            const transaction = this._db.transaction(objectStoreNames, 'readonly'); +            const databaseTargets = targets.map(([objectStoreName, indexName]) => { +                const objectStore = transaction.objectStore(objectStoreName); +                const index = objectStore.index(indexName); +                return {objectStore, index}; +            }); + +            /** @type {import('database').CountTarget[]} */ +            const countTargets = []; +            if (getTotal) { +                for (const {objectStore} of databaseTargets) { +                    countTargets.push([objectStore, void 0]); +                } +            } +            for (const dictionaryName of dictionaryNames) { +                const query = IDBKeyRange.only(dictionaryName); +                for (const {index} of databaseTargets) { +                    countTargets.push([index, query]); +                } +            } + +            /** +             * @param {number[]} results +             */ +            const onCountComplete = (results) => { +                const resultCount = results.length; +                const targetCount = targets.length; +                /** @type {import('dictionary-database').DictionaryCountGroup[]} */ +                const counts = []; +                for (let i = 0; i < resultCount; i += targetCount) { +                    /** @type {import('dictionary-database').DictionaryCountGroup} */ +                    const countGroup = {}; +                    for (let j = 0; j < targetCount; ++j) { +                        countGroup[targets[j][0]] = results[i + j]; +                    } +                    counts.push(countGroup); +                } +                const total = getTotal ? /** @type {import('dictionary-database').DictionaryCountGroup} */ (counts.shift()) : null; +                resolve({total, counts}); +            }; + +            this._db.bulkCount(countTargets, onCountComplete, reject); +        }); +    } + +    /** +     * @param {string} title +     * @returns {Promise<boolean>} +     */ +    async dictionaryExists(title) { +        const query = IDBKeyRange.only(title); +        const result = await this._db.find('dictionaries', 'title', query, null, null, void 0); +        return typeof result !== 'undefined'; +    } + +    /** +     * @template {import('dictionary-database').ObjectStoreName} T +     * @param {T} objectStoreName +     * @param {import('dictionary-database').ObjectStoreData<T>[]} items +     * @param {number} start +     * @param {number} count +     * @returns {Promise<void>} +     */ +    bulkAdd(objectStoreName, items, start, count) { +        return this._db.bulkAdd(objectStoreName, items, start, count); +    } + +    // Private + +    /** +     * @template [TRow=unknown] +     * @template [TItem=unknown] +     * @template [TResult=unknown] +     * @param {import('dictionary-database').ObjectStoreName} objectStoreName +     * @param {string[]} indexNames +     * @param {TItem[]} items +     * @param {import('dictionary-database').CreateQuery<TItem>} createQuery +     * @param {import('dictionary-database').FindPredicate<TItem, TRow>} predicate +     * @param {import('dictionary-database').CreateResult<TItem, TRow, TResult>} createResult +     * @returns {Promise<TResult[]>} +     */ +    _findMultiBulk(objectStoreName, indexNames, items, createQuery, predicate, createResult) { +        return new Promise((resolve, reject) => { +            const itemCount = items.length; +            const indexCount = indexNames.length; +            /** @type {TResult[]} */ +            const results = []; +            if (itemCount === 0 || indexCount === 0) { +                resolve(results); +                return; +            } + +            const transaction = this._db.transaction([objectStoreName], 'readonly'); +            const objectStore = transaction.objectStore(objectStoreName); +            const indexList = []; +            for (const indexName of indexNames) { +                indexList.push(objectStore.index(indexName)); +            } +            let completeCount = 0; +            const requiredCompleteCount = itemCount * indexCount; +            /** +             * @param {TRow[]} rows +             * @param {import('dictionary-database').FindMultiBulkData<TItem>} data +             */ +            const onGetAll = (rows, data) => { +                for (const row of rows) { +                    if (predicate(row, data.item)) { +                        results.push(createResult(row, data)); +                    } +                } +                if (++completeCount >= requiredCompleteCount) { +                    resolve(results); +                } +            }; +            for (let i = 0; i < itemCount; ++i) { +                const item = items[i]; +                const query = createQuery(item); +                for (let j = 0; j < indexCount; ++j) { +                    /** @type {import('dictionary-database').FindMultiBulkData<TItem>} */ +                    const data = {item, itemIndex: i, indexIndex: j}; +                    this._db.getAll(indexList[j], query, onGetAll, reject, data); +                } +            } +        }); +    } + +    /** +     * @template [TRow=unknown] +     * @template [TItem=unknown] +     * @param {import('dictionary-database').ObjectStoreName} objectStoreName +     * @param {string} indexName +     * @param {TItem[]} items +     * @param {import('dictionary-database').CreateQuery<TItem>} createQuery +     * @param {import('dictionary-database').FindPredicate<TItem, TRow>} predicate +     * @returns {Promise<(TRow|undefined)[]>} +     */ +    _findFirstBulk(objectStoreName, indexName, items, createQuery, predicate) { +        return new Promise((resolve, reject) => { +            const itemCount = items.length; +            /** @type {(TRow|undefined)[]} */ +            const results = new Array(itemCount); +            if (itemCount === 0) { +                resolve(results); +                return; +            } + +            const transaction = this._db.transaction([objectStoreName], 'readonly'); +            const objectStore = transaction.objectStore(objectStoreName); +            const index = objectStore.index(indexName); +            let completeCount = 0; +            /** +             * @param {TRow|undefined} row +             * @param {number} itemIndex +             */ +            const onFind = (row, itemIndex) => { +                results[itemIndex] = row; +                if (++completeCount >= itemCount) { +                    resolve(results); +                } +            }; +            for (let i = 0; i < itemCount; ++i) { +                const item = items[i]; +                const query = createQuery(item); +                this._db.findFirst(index, query, onFind, reject, i, predicate, item, void 0); +            } +        }); +    } + +    /** +     * @param {import('dictionary-database').MatchType} matchType +     * @param {import('dictionary-database').DatabaseTermEntryWithId} row +     * @param {import('dictionary-database').FindMultiBulkData<string>} data +     * @returns {import('dictionary-database').TermEntry} +     */ +    _createTermGeneric(matchType, row, data) { +        const matchSourceIsTerm = (data.indexIndex === 0); +        const matchSource = (matchSourceIsTerm ? 'term' : 'reading'); +        if ((matchSourceIsTerm ? row.expression : row.reading) === data.item) { +            matchType = 'exact'; +        } +        return this._createTerm(matchSource, matchType, row, data.itemIndex); +    } + +    /** +     * @param {import('dictionary-database').DatabaseTermEntryWithId} row +     * @param {import('dictionary-database').FindMultiBulkData<import('dictionary-database').TermExactRequest>} data +     * @returns {import('dictionary-database').TermEntry} +     */ +    _createTermExact(row, data) { +        return this._createTerm('term', 'exact', row, data.itemIndex); +    } + +    /** +     * @param {import('dictionary-database').DatabaseTermEntryWithId} row +     * @param {import('dictionary-database').FindMultiBulkData<import('dictionary-database').DictionaryAndQueryRequest>} data +     * @returns {import('dictionary-database').TermEntry} +     */ +    _createTermSequenceExact(row, data) { +        return this._createTerm('sequence', 'exact', row, data.itemIndex); +    } + +    /** +     * @param {import('dictionary-database').MatchSource} matchSource +     * @param {import('dictionary-database').MatchType} matchType +     * @param {import('dictionary-database').DatabaseTermEntryWithId} row +     * @param {number} index +     * @returns {import('dictionary-database').TermEntry} +     */ +    _createTerm(matchSource, matchType, row, index) { +        const {sequence} = row; +        return { +            index, +            matchType, +            matchSource, +            term: row.expression, +            reading: row.reading, +            definitionTags: this._splitField(row.definitionTags || row.tags), +            termTags: this._splitField(row.termTags), +            rules: this._splitField(row.rules), +            definitions: row.glossary, +            score: row.score, +            dictionary: row.dictionary, +            id: row.id, +            sequence: typeof sequence === 'number' ? sequence : -1 +        }; +    } + +    /** +     * @param {import('dictionary-database').DatabaseKanjiEntry} row +     * @param {import('dictionary-database').FindMultiBulkData<string>} data +     * @returns {import('dictionary-database').KanjiEntry} +     */ +    _createKanji(row, {itemIndex: index}) { +        const {stats} = row; +        return { +            index, +            character: row.character, +            onyomi: this._splitField(row.onyomi), +            kunyomi: this._splitField(row.kunyomi), +            tags: this._splitField(row.tags), +            definitions: row.meanings, +            stats: typeof stats === 'object' && stats !== null ? stats : {}, +            dictionary: row.dictionary +        }; +    } + +    /** +     * @param {import('dictionary-database').DatabaseTermMeta} row +     * @param {import('dictionary-database').FindMultiBulkData<string>} data +     * @returns {import('dictionary-database').TermMeta} +     * @throws {Error} +     */ +    _createTermMeta({expression: term, mode, data, dictionary}, {itemIndex: index}) { +        switch (mode) { +            case 'freq': +                return {index, term, mode, data, dictionary}; +            case 'pitch': +                return {index, term, mode, data, dictionary}; +            default: +                throw new Error(`Unknown mode: ${mode}`); +        } +    } + +    /** +     * @param {import('dictionary-database').DatabaseKanjiMeta} row +     * @param {import('dictionary-database').FindMultiBulkData<string>} data +     * @returns {import('dictionary-database').KanjiMeta} +     */ +    _createKanjiMeta({character, mode, data, dictionary}, {itemIndex: index}) { +        return {index, character, mode, data, dictionary}; +    } + +    /** +     * @param {import('dictionary-database').MediaDataArrayBufferContent} row +     * @param {import('dictionary-database').FindMultiBulkData<import('dictionary-database').MediaRequest>} data +     * @returns {import('dictionary-database').Media} +     */ +    _createMedia(row, {itemIndex: index}) { +        const {dictionary, path, mediaType, width, height, content} = row; +        return {index, dictionary, path, mediaType, width, height, content}; +    } + +    /** +     * @param {unknown} field +     * @returns {string[]} +     */ +    _splitField(field) { +        return typeof field === 'string' && field.length > 0 ? field.split(' ') : []; +    } +} diff --git a/ext/js/dictionary/dictionary-importer-media-loader.js b/ext/js/dictionary/dictionary-importer-media-loader.js new file mode 100644 index 00000000..a5857dce --- /dev/null +++ b/ext/js/dictionary/dictionary-importer-media-loader.js @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2021-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {EventListenerCollection} from '../core.js'; + +/** + * Class used for loading and validating media during the dictionary import process. + */ +export class DictionaryImporterMediaLoader { +    /** @type {import('dictionary-importer-media-loader').GetImageDetailsFunction} */ +    getImageDetails(content, mediaType, transfer) { +        return new Promise((resolve, reject) => { +            const image = new Image(); +            const eventListeners = new EventListenerCollection(); +            const cleanup = () => { +                image.removeAttribute('src'); +                URL.revokeObjectURL(url); +                eventListeners.removeAllEventListeners(); +            }; +            eventListeners.addEventListener(image, 'load', () => { +                const {naturalWidth: width, naturalHeight: height} = image; +                if (Array.isArray(transfer)) { transfer.push(content); } +                cleanup(); +                resolve({content, width, height}); +            }, false); +            eventListeners.addEventListener(image, 'error', () => { +                cleanup(); +                reject(new Error('Image failed to load')); +            }, false); +            const blob = new Blob([content], {type: mediaType}); +            const url = URL.createObjectURL(blob); +            image.src = url; +        }); +    } +} diff --git a/ext/js/dictionary/dictionary-importer.js b/ext/js/dictionary/dictionary-importer.js new file mode 100644 index 00000000..2c0c7e9c --- /dev/null +++ b/ext/js/dictionary/dictionary-importer.js @@ -0,0 +1,796 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2020-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import * as ajvSchemas0 from '../../lib/validate-schemas.js'; +import { +    BlobWriter as BlobWriter0, +    TextWriter as TextWriter0, +    Uint8ArrayReader as Uint8ArrayReader0, +    ZipReader as ZipReader0, +    configure +} from '../../lib/zip.js'; +import {stringReverse} from '../core.js'; +import {ExtensionError} from '../core/extension-error.js'; +import {parseJson} from '../core/json.js'; +import {MediaUtil} from '../media/media-util.js'; + +const ajvSchemas = /** @type {import('dictionary-importer').CompiledSchemaValidators} */ (/** @type {unknown} */ (ajvSchemas0)); +const BlobWriter = /** @type {typeof import('@zip.js/zip.js').BlobWriter} */ (/** @type {unknown} */ (BlobWriter0)); +const TextWriter = /** @type {typeof import('@zip.js/zip.js').TextWriter} */ (/** @type {unknown} */ (TextWriter0)); +const Uint8ArrayReader = /** @type {typeof import('@zip.js/zip.js').Uint8ArrayReader} */ (/** @type {unknown} */ (Uint8ArrayReader0)); +const ZipReader = /** @type {typeof import('@zip.js/zip.js').ZipReader} */ (/** @type {unknown} */ (ZipReader0)); + +export class DictionaryImporter { +    /** +     * @param {import('dictionary-importer-media-loader').GenericMediaLoader} mediaLoader +     * @param {import('dictionary-importer').OnProgressCallback} [onProgress] +     */ +    constructor(mediaLoader, onProgress) { +        /** @type {import('dictionary-importer-media-loader').GenericMediaLoader} */ +        this._mediaLoader = mediaLoader; +        /** @type {import('dictionary-importer').OnProgressCallback} */ +        this._onProgress = typeof onProgress === 'function' ? onProgress : () => {}; +        /** @type {import('dictionary-importer').ProgressData} */ +        this._progressData = this._createProgressData(); +    } + +    /** +     * @param {import('./dictionary-database.js').DictionaryDatabase} dictionaryDatabase +     * @param {ArrayBuffer} archiveContent +     * @param {import('dictionary-importer').ImportDetails} details +     * @returns {Promise<import('dictionary-importer').ImportResult>} +     */ +    async importDictionary(dictionaryDatabase, archiveContent, details) { +        if (!dictionaryDatabase) { +            throw new Error('Invalid database'); +        } +        if (!dictionaryDatabase.isPrepared()) { +            throw new Error('Database is not ready'); +        } + +        this._progressReset(); + +        configure({ +            workerScripts: { +                deflate: ['../../lib/z-worker.js'], +                inflate: ['../../lib/z-worker.js'] +            } +        }); + +        // Read archive +        const zipFileReader = new Uint8ArrayReader(new Uint8Array(archiveContent)); +        const zipReader = new ZipReader(zipFileReader); +        const zipEntries = await zipReader.getEntries(); +        /** @type {import('dictionary-importer').ArchiveFileMap} */ +        const fileMap = new Map(); +        for (const entry of zipEntries) { +            fileMap.set(entry.filename, entry); +        } +        // Read and validate index +        const indexFileName = 'index.json'; +        const indexFile = fileMap.get(indexFileName); +        if (typeof indexFile === 'undefined') { +            throw new Error('No dictionary index found in archive'); +        } +        const indexFile2 = /** @type {import('@zip.js/zip.js').Entry} */ (indexFile); + +        const indexContent = await this._getData(indexFile2, new TextWriter()); +        const index = /** @type {import('dictionary-data').Index} */ (parseJson(indexContent)); + +        if (!ajvSchemas.dictionaryIndex(index)) { +            throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName); +        } + +        const dictionaryTitle = index.title; +        const version = typeof index.format === 'number' ? index.format : index.version; + +        if (typeof version !== 'number' || !dictionaryTitle || !index.revision) { +            throw new Error('Unrecognized dictionary format'); +        } + +        // Verify database is not already imported +        if (await dictionaryDatabase.dictionaryExists(dictionaryTitle)) { +            throw new Error('Dictionary is already imported'); +        } + +        // Load schemas +        this._progressNextStep(0); +        const dataBankSchemas = this._getDataBankSchemas(version); + +        // Files +        /** @type {import('dictionary-importer').QueryDetails} */ +        const queryDetails = new Map([ +            ['termFiles', /^term_bank_(\d+)\.json$/], +            ['termMetaFiles', /^term_meta_bank_(\d+)\.json$/], +            ['kanjiFiles', /^kanji_bank_(\d+)\.json$/], +            ['kanjiMetaFiles', /^kanji_meta_bank_(\d+)\.json$/], +            ['tagFiles', /^tag_bank_(\d+)\.json$/] +        ]); +        const {termFiles, termMetaFiles, kanjiFiles, kanjiMetaFiles, tagFiles} = Object.fromEntries(this._getArchiveFiles(fileMap, queryDetails)); + +        // Load data +        this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length); +        const termList = await ( +            version === 1 ? +            this._readFileSequence(termFiles, this._convertTermBankEntryV1.bind(this), dataBankSchemas[0], dictionaryTitle) : +            this._readFileSequence(termFiles, this._convertTermBankEntryV3.bind(this), dataBankSchemas[0], dictionaryTitle) +        ); +        const termMetaList = await this._readFileSequence(termMetaFiles, this._convertTermMetaBankEntry.bind(this), dataBankSchemas[1], dictionaryTitle); +        const kanjiList = await ( +            version === 1 ? +            this._readFileSequence(kanjiFiles, this._convertKanjiBankEntryV1.bind(this), dataBankSchemas[2], dictionaryTitle) : +            this._readFileSequence(kanjiFiles, this._convertKanjiBankEntryV3.bind(this), dataBankSchemas[2], dictionaryTitle) +        ); +        const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, this._convertKanjiMetaBankEntry.bind(this), dataBankSchemas[3], dictionaryTitle); +        const tagList = await this._readFileSequence(tagFiles, this._convertTagBankEntry.bind(this), dataBankSchemas[4], dictionaryTitle); +        this._addOldIndexTags(index, tagList, dictionaryTitle); + +        // Prefix wildcard support +        const prefixWildcardsSupported = !!details.prefixWildcardsSupported; +        if (prefixWildcardsSupported) { +            for (const entry of termList) { +                entry.expressionReverse = stringReverse(entry.expression); +                entry.readingReverse = stringReverse(entry.reading); +            } +        } + +        // Extended data support +        this._progressNextStep(termList.length); +        const formatProgressInterval = 1000; +        /** @type {import('dictionary-importer').ImportRequirement[]} */ +        const requirements = []; +        for (let i = 0, ii = termList.length; i < ii; ++i) { +            const entry = termList[i]; +            const glossaryList = entry.glossary; +            for (let j = 0, jj = glossaryList.length; j < jj; ++j) { +                const glossary = glossaryList[j]; +                if (typeof glossary !== 'object' || glossary === null) { continue; } +                glossaryList[j] = this._formatDictionaryTermGlossaryObject(glossary, entry, requirements); +            } +            if ((i % formatProgressInterval) === 0) { +                this._progressData.index = i; +                this._progress(); +            } +        } +        this._progress(); + +        // Async requirements +        this._progressNextStep(requirements.length); +        const {media} = await this._resolveAsyncRequirements(requirements, fileMap); + +        // Add dictionary descriptor +        this._progressNextStep(termList.length + termMetaList.length + kanjiList.length + kanjiMetaList.length + tagList.length + media.length); + +        /** @type {import('dictionary-importer').SummaryCounts} */ +        const counts = { +            terms: {total: termList.length}, +            termMeta: this._getMetaCounts(termMetaList), +            kanji: {total: kanjiList.length}, +            kanjiMeta: this._getMetaCounts(kanjiMetaList), +            tagMeta: {total: tagList.length}, +            media: {total: media.length} +        }; +        const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported, counts}); +        dictionaryDatabase.bulkAdd('dictionaries', [summary], 0, 1); + +        // Add data +        /** @type {Error[]} */ +        const errors = []; +        const maxTransactionLength = 1000; + +        /** +         * @template {import('dictionary-database').ObjectStoreName} T +         * @param {T} objectStoreName +         * @param {import('dictionary-database').ObjectStoreData<T>[]} entries +         */ +        const bulkAdd = async (objectStoreName, entries) => { +            const ii = entries.length; +            for (let i = 0; i < ii; i += maxTransactionLength) { +                const count = Math.min(maxTransactionLength, ii - i); + +                try { +                    await dictionaryDatabase.bulkAdd(objectStoreName, entries, i, count); +                } catch (e) { +                    errors.push(e instanceof Error ? e : new Error(`${e}`)); +                } + +                this._progressData.index += count; +                this._progress(); +            } +        }; + +        await bulkAdd('terms', termList); +        await bulkAdd('termMeta', termMetaList); +        await bulkAdd('kanji', kanjiList); +        await bulkAdd('kanjiMeta', kanjiMetaList); +        await bulkAdd('tagMeta', tagList); +        await bulkAdd('media', media); + +        this._progress(); + +        return {result: summary, errors}; +    } + +    /** +     * @returns {import('dictionary-importer').ProgressData} +     */ +    _createProgressData() { +        return { +            stepIndex: 0, +            stepCount: 6, +            index: 0, +            count: 0 +        }; +    } + +    /** */ +    _progressReset() { +        this._progressData = this._createProgressData(); +        this._progress(); +    } + +    /** +     * @param {number} count +     */ +    _progressNextStep(count) { +        ++this._progressData.stepIndex; +        this._progressData.index = 0; +        this._progressData.count = count; +        this._progress(); +    } + +    /** */ +    _progress() { +        this._onProgress(this._progressData); +    } + +    /** +     * @param {string} dictionaryTitle +     * @param {number} version +     * @param {import('dictionary-data').Index} index +     * @param {{prefixWildcardsSupported: boolean, counts: import('dictionary-importer').SummaryCounts}} details +     * @returns {import('dictionary-importer').Summary} +     */ +    _createSummary(dictionaryTitle, version, index, details) { +        const indexSequenced = index.sequenced; +        const {prefixWildcardsSupported, counts} = details; + +        /** @type {import('dictionary-importer').Summary} */ +        const summary = { +            title: dictionaryTitle, +            revision: index.revision, +            sequenced: typeof indexSequenced === 'boolean' && indexSequenced, +            version, +            importDate: Date.now(), +            prefixWildcardsSupported, +            counts +        }; + +        const {author, url, description, attribution, frequencyMode} = index; +        if (typeof author === 'string') { summary.author = author; } +        if (typeof url === 'string') { summary.url = url; } +        if (typeof description === 'string') { summary.description = description; } +        if (typeof attribution === 'string') { summary.attribution = attribution; } +        if (typeof frequencyMode === 'string') { summary.frequencyMode = frequencyMode; } + +        return summary; +    } + +    /** +     * @param {import('ajv').ValidateFunction} schema +     * @param {string} fileName +     * @returns {ExtensionError} +     */ +    _formatAjvSchemaError(schema, fileName) { +        const e2 = new ExtensionError(`Dictionary has invalid data in '${fileName}'`); +        e2.data = schema.errors; + +        return e2; +    } + +    /** +     * @param {number} version +     * @returns {import('dictionary-importer').CompiledSchemaNameArray} +     */ +    _getDataBankSchemas(version) { +        const termBank = ( +            version === 1 ? +            'dictionaryTermBankV1' : +            'dictionaryTermBankV3' +        ); +        const termMetaBank = 'dictionaryTermMetaBankV3'; +        const kanjiBank = ( +            version === 1 ? +            'dictionaryKanjiBankV1' : +            'dictionaryKanjiBankV3' +        ); +        const kanjiMetaBank = 'dictionaryKanjiMetaBankV3'; +        const tagBank = 'dictionaryTagBankV3'; + +        return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; +    } + +    /** +     * @param {import('dictionary-data').TermGlossaryText|import('dictionary-data').TermGlossaryImage|import('dictionary-data').TermGlossaryStructuredContent} data +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     * @param {import('dictionary-importer').ImportRequirement[]} requirements +     * @returns {import('dictionary-data').TermGlossary} +     * @throws {Error} +     */ +    _formatDictionaryTermGlossaryObject(data, entry, requirements) { +        switch (data.type) { +            case 'text': +                return data.text; +            case 'image': +                return this._formatDictionaryTermGlossaryImage(data, entry, requirements); +            case 'structured-content': +                return this._formatStructuredContent(data, entry, requirements); +            default: +                throw new Error(`Unhandled data type: ${/** @type {import('core').SerializableObject} */ (data).type}`); +        } +    } + +    /** +     * @param {import('dictionary-data').TermGlossaryImage} data +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     * @param {import('dictionary-importer').ImportRequirement[]} requirements +     * @returns {import('dictionary-data').TermGlossaryImage} +     */ +    _formatDictionaryTermGlossaryImage(data, entry, requirements) { +        /** @type {import('dictionary-data').TermGlossaryImage} */ +        const target = { +            type: 'image', +            path: '' // Will be populated during requirement resolution +        }; +        requirements.push({type: 'image', target, source: data, entry}); +        return target; +    } + +    /** +     * @param {import('dictionary-data').TermGlossaryStructuredContent} data +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     * @param {import('dictionary-importer').ImportRequirement[]} requirements +     * @returns {import('dictionary-data').TermGlossaryStructuredContent} +     */ +    _formatStructuredContent(data, entry, requirements) { +        const content = this._prepareStructuredContent(data.content, entry, requirements); +        return { +            type: 'structured-content', +            content +        }; +    } + +    /** +     * @param {import('structured-content').Content} content +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     * @param {import('dictionary-importer').ImportRequirement[]} requirements +     * @returns {import('structured-content').Content} +     */ +    _prepareStructuredContent(content, entry, requirements) { +        if (typeof content === 'string' || !(typeof content === 'object' && content !== null)) { +            return content; +        } +        if (Array.isArray(content)) { +            for (let i = 0, ii = content.length; i < ii; ++i) { +                content[i] = this._prepareStructuredContent(content[i], entry, requirements); +            } +            return content; +        } +        const {tag} = content; +        switch (tag) { +            case 'img': +                return this._prepareStructuredContentImage(content, entry, requirements); +        } +        const childContent = content.content; +        if (typeof childContent !== 'undefined') { +            content.content = this._prepareStructuredContent(childContent, entry, requirements); +        } +        return content; +    } + +    /** +     * @param {import('structured-content').ImageElement} content +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     * @param {import('dictionary-importer').ImportRequirement[]} requirements +     * @returns {import('structured-content').ImageElement} +     */ +    _prepareStructuredContentImage(content, entry, requirements) { +        /** @type {import('structured-content').ImageElement} */ +        const target = { +            tag: 'img', +            path: '' // Will be populated during requirement resolution +        }; +        requirements.push({type: 'structured-content-image', target, source: content, entry}); +        return target; +    } + +    /** +     * @param {import('dictionary-importer').ImportRequirement[]} requirements +     * @param {import('dictionary-importer').ArchiveFileMap} fileMap +     * @returns {Promise<{media: import('dictionary-database').MediaDataArrayBufferContent[]}>} +     */ +    async _resolveAsyncRequirements(requirements, fileMap) { +        /** @type {Map<string, import('dictionary-database').MediaDataArrayBufferContent>} */ +        const media = new Map(); +        /** @type {import('dictionary-importer').ImportRequirementContext} */ +        const context = {fileMap, media}; + +        for (const requirement of requirements) { +            await this._resolveAsyncRequirement(context, requirement); +        } + +        return { +            media: [...media.values()] +        }; +    } + +    /** +     * @param {import('dictionary-importer').ImportRequirementContext} context +     * @param {import('dictionary-importer').ImportRequirement} requirement +     */ +    async _resolveAsyncRequirement(context, requirement) { +        switch (requirement.type) { +            case 'image': +                await this._resolveDictionaryTermGlossaryImage( +                    context, +                    requirement.target, +                    requirement.source, +                    requirement.entry +                ); +                break; +            case 'structured-content-image': +                await this._resolveStructuredContentImage( +                    context, +                    requirement.target, +                    requirement.source, +                    requirement.entry +                ); +                break; +            default: +                return; +        } +        ++this._progressData.index; +        this._progress(); +    } + +    /** +     * @param {import('dictionary-importer').ImportRequirementContext} context +     * @param {import('dictionary-data').TermGlossaryImage} target +     * @param {import('dictionary-data').TermGlossaryImage} source +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     */ +    async _resolveDictionaryTermGlossaryImage(context, target, source, entry) { +        await this._createImageData(context, target, source, entry); +    } + +    /** +     * @param {import('dictionary-importer').ImportRequirementContext} context +     * @param {import('structured-content').ImageElement} target +     * @param {import('structured-content').ImageElement} source +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     */ +    async _resolveStructuredContentImage(context, target, source, entry) { +        const {verticalAlign, sizeUnits} = source; +        await this._createImageData(context, target, source, entry); +        if (typeof verticalAlign === 'string') { target.verticalAlign = verticalAlign; } +        if (typeof sizeUnits === 'string') { target.sizeUnits = sizeUnits; } +    } + +    /** +     * @param {import('dictionary-importer').ImportRequirementContext} context +     * @param {import('structured-content').ImageElementBase} target +     * @param {import('structured-content').ImageElementBase} source +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     */ +    async _createImageData(context, target, source, entry) { +        const { +            path, +            width: preferredWidth, +            height: preferredHeight, +            title, +            alt, +            description, +            pixelated, +            imageRendering, +            appearance, +            background, +            collapsed, +            collapsible +        } = source; +        const {width, height} = await this._getImageMedia(context, path, entry); +        target.path = path; +        target.width = width; +        target.height = height; +        if (typeof preferredWidth === 'number') { target.preferredWidth = preferredWidth; } +        if (typeof preferredHeight === 'number') { target.preferredHeight = preferredHeight; } +        if (typeof title === 'string') { target.title = title; } +        if (typeof alt === 'string') { target.alt = alt; } +        if (typeof description === 'string') { target.description = description; } +        if (typeof pixelated === 'boolean') { target.pixelated = pixelated; } +        if (typeof imageRendering === 'string') { target.imageRendering = imageRendering; } +        if (typeof appearance === 'string') { target.appearance = appearance; } +        if (typeof background === 'boolean') { target.background = background; } +        if (typeof collapsed === 'boolean') { target.collapsed = collapsed; } +        if (typeof collapsible === 'boolean') { target.collapsible = collapsible; } +    } + +    /** +     * @param {import('dictionary-importer').ImportRequirementContext} context +     * @param {string} path +     * @param {import('dictionary-database').DatabaseTermEntry} entry +     * @returns {Promise<import('dictionary-database').MediaDataArrayBufferContent>} +     */ +    async _getImageMedia(context, path, entry) { +        const {media} = context; +        const {dictionary} = entry; + +        /** +         * @param {string} message +         * @returns {Error} +         */ +        const createError = (message) => { +            const {expression, reading} = entry; +            const readingSource = reading.length > 0 ? ` (${reading})` : ''; +            return new Error(`${message} at path ${JSON.stringify(path)} for ${expression}${readingSource} in ${dictionary}`); +        }; + +        // Check if already added +        let mediaData = media.get(path); +        if (typeof mediaData !== 'undefined') { +            if (MediaUtil.getFileExtensionFromImageMediaType(mediaData.mediaType) === null) { +                throw createError('Media file is not a valid image'); +            } +            return mediaData; +        } + +        // Find file in archive +        const file = context.fileMap.get(path); +        if (typeof file === 'undefined') { +            throw createError('Could not find image'); +        } + +        // Load file content +        let content = await (await this._getData(file, new BlobWriter())).arrayBuffer(); + +        const mediaType = MediaUtil.getImageMediaTypeFromFileName(path); +        if (mediaType === null) { +            throw createError('Could not determine media type for image'); +        } + +        // Load image data +        let width; +        let height; +        try { +            ({content, width, height} = await this._mediaLoader.getImageDetails(content, mediaType)); +        } catch (e) { +            throw createError('Could not load image'); +        } + +        // Create image data +        mediaData = { +            dictionary, +            path, +            mediaType, +            width, +            height, +            content +        }; +        media.set(path, mediaData); + +        return mediaData; +    } + +    /** +     * @param {import('dictionary-data').TermV1} entry +     * @param {string} dictionary +     * @returns {import('dictionary-database').DatabaseTermEntry} +     */ +    _convertTermBankEntryV1(entry, dictionary) { +        let [expression, reading, definitionTags, rules, score, ...glossary] = entry; +        expression = this._normalizeTermOrReading(expression); +        reading = this._normalizeTermOrReading(reading.length > 0 ? reading : expression); +        return {expression, reading, definitionTags, rules, score, glossary, dictionary}; +    } + +    /** +     * @param {import('dictionary-data').TermV3} entry +     * @param {string} dictionary +     * @returns {import('dictionary-database').DatabaseTermEntry} +     */ +    _convertTermBankEntryV3(entry, dictionary) { +        let [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; +        expression = this._normalizeTermOrReading(expression); +        reading = this._normalizeTermOrReading(reading.length > 0 ? reading : expression); +        return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags, dictionary}; +    } + +    /** +     * @param {import('dictionary-data').TermMeta} entry +     * @param {string} dictionary +     * @returns {import('dictionary-database').DatabaseTermMeta} +     */ +    _convertTermMetaBankEntry(entry, dictionary) { +        const [expression, mode, data] = entry; +        return /** @type {import('dictionary-database').DatabaseTermMeta} */ ({expression, mode, data, dictionary}); +    } + +    /** +     * @param {import('dictionary-data').KanjiV1} entry +     * @param {string} dictionary +     * @returns {import('dictionary-database').DatabaseKanjiEntry} +     */ +    _convertKanjiBankEntryV1(entry, dictionary) { +        const [character, onyomi, kunyomi, tags, ...meanings] = entry; +        return {character, onyomi, kunyomi, tags, meanings, dictionary}; +    } + +    /** +     * @param {import('dictionary-data').KanjiV3} entry +     * @param {string} dictionary +     * @returns {import('dictionary-database').DatabaseKanjiEntry} +     */ +    _convertKanjiBankEntryV3(entry, dictionary) { +        const [character, onyomi, kunyomi, tags, meanings, stats] = entry; +        return {character, onyomi, kunyomi, tags, meanings, stats, dictionary}; +    } + +    /** +     * @param {import('dictionary-data').KanjiMeta} entry +     * @param {string} dictionary +     * @returns {import('dictionary-database').DatabaseKanjiMeta} +     */ +    _convertKanjiMetaBankEntry(entry, dictionary) { +        const [character, mode, data] = entry; +        return {character, mode, data, dictionary}; +    } + +    /** +     * @param {import('dictionary-data').Tag} entry +     * @param {string} dictionary +     * @returns {import('dictionary-database').Tag} +     */ +    _convertTagBankEntry(entry, dictionary) { +        const [name, category, order, notes, score] = entry; +        return {name, category, order, notes, score, dictionary}; +    } + +    /** +     * @param {import('dictionary-data').Index} index +     * @param {import('dictionary-database').Tag[]} results +     * @param {string} dictionary +     */ +    _addOldIndexTags(index, results, dictionary) { +        const {tagMeta} = index; +        if (typeof tagMeta !== 'object' || tagMeta === null) { return; } +        for (const [name, value] of Object.entries(tagMeta)) { +            const {category, order, notes, score} = value; +            results.push({name, category, order, notes, score, dictionary}); +        } +    } + +    /** +     * @param {import('dictionary-importer').ArchiveFileMap} fileMap +     * @param {import('dictionary-importer').QueryDetails} queryDetails +     * @returns {import('dictionary-importer').QueryResult} +     */ +    _getArchiveFiles(fileMap, queryDetails) { +        /** @type {import('dictionary-importer').QueryResult} */ +        const results = new Map(); +        for (const [name, value] of fileMap.entries()) { +            for (const [fileType, fileNameFormat] of queryDetails.entries()) { +                let entries = results.get(fileType); +                if (typeof entries === 'undefined') { +                    entries = []; +                    results.set(fileType, entries); +                } + +                if (fileNameFormat.test(name)) { +                    entries.push(value); +                    break; +                } +            } +        } +        return results; +    } + +    /** +     * @template [TEntry=unknown] +     * @template [TResult=unknown] +     * @param {import('@zip.js/zip.js').Entry[]} files +     * @param {(entry: TEntry, dictionaryTitle: string) => TResult} convertEntry +     * @param {import('dictionary-importer').CompiledSchemaName} schemaName +     * @param {string} dictionaryTitle +     * @returns {Promise<TResult[]>} +     */ +    async _readFileSequence(files, convertEntry, schemaName, dictionaryTitle) { +        const progressData = this._progressData; +        let startIndex = 0; + +        const results = []; +        for (const file of files) { +            const content = await this._getData(file, new TextWriter()); +            const entries = /** @type {unknown} */ (parseJson(content)); + +            startIndex = progressData.index; +            this._progress(); + +            const schema = ajvSchemas[schemaName]; +            if (!schema(entries)) { +                throw this._formatAjvSchemaError(schema, file.filename); +            } + +            progressData.index = startIndex + 1; +            this._progress(); + +            if (Array.isArray(entries)) { +                for (const entry of entries) { +                    results.push(convertEntry(/** @type {TEntry} */ (entry), dictionaryTitle)); +                } +            } +        } +        return results; +    } + +    /** +     * @param {import('dictionary-database').DatabaseTermMeta[]|import('dictionary-database').DatabaseKanjiMeta[]} metaList +     * @returns {import('dictionary-importer').SummaryMetaCount} +     */ +    _getMetaCounts(metaList) { +        /** @type {Map<string, number>} */ +        const countsMap = new Map(); +        for (const {mode} of metaList) { +            let count = countsMap.get(mode); +            count = typeof count !== 'undefined' ? count + 1 : 1; +            countsMap.set(mode, count); +        } +        /** @type {import('dictionary-importer').SummaryMetaCount} */ +        const counts = {total: metaList.length}; +        for (const [key, value] of countsMap.entries()) { +            if (Object.prototype.hasOwnProperty.call(counts, key)) { continue; } +            counts[key] = value; +        } +        return counts; +    } + +    /** +     * @param {string} text +     * @returns {string} +     */ +    _normalizeTermOrReading(text) { +        // Note: this function should not perform String.normalize on the text, +        // as it will normalize characters in an undesirable way. +        // Thus, this function is currently a no-op. +        // Example: +        // - '\u9038'.normalize('NFC') => '\u9038' (逸) +        // - '\ufa67'.normalize('NFC') => '\u9038' (逸 => 逸) +        return text; +    } + +    /** +     * @template [T=unknown] +     * @param {import('@zip.js/zip.js').Entry} entry +     * @param {import('@zip.js/zip.js').Writer<T>|import('@zip.js/zip.js').WritableWriter} writer +     * @returns {Promise<T>} +     */ +    async _getData(entry, writer) { +        if (typeof entry.getData === 'undefined') { +            throw new Error(`Cannot read ${entry.filename}`); +        } +        return await entry.getData(writer); +    } +} diff --git a/ext/js/dictionary/dictionary-worker-handler.js b/ext/js/dictionary/dictionary-worker-handler.js new file mode 100644 index 00000000..9a724386 --- /dev/null +++ b/ext/js/dictionary/dictionary-worker-handler.js @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2021-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {ExtensionError} from '../core/extension-error.js'; +import {DictionaryDatabase} from './dictionary-database.js'; +import {DictionaryImporter} from './dictionary-importer.js'; +import {DictionaryWorkerMediaLoader} from './dictionary-worker-media-loader.js'; + +export class DictionaryWorkerHandler { +    constructor() { +        /** @type {DictionaryWorkerMediaLoader} */ +        this._mediaLoader = new DictionaryWorkerMediaLoader(); +    } + +    /** */ +    prepare() { +        self.addEventListener('message', this._onMessage.bind(this), false); +    } + +    // Private + +    /** +     * @param {MessageEvent<import('dictionary-worker-handler').Message>} event +     */ +    _onMessage(event) { +        const {action, params} = event.data; +        switch (action) { +            case 'importDictionary': +                this._onMessageWithProgress(params, this._importDictionary.bind(this)); +                break; +            case 'deleteDictionary': +                this._onMessageWithProgress(params, this._deleteDictionary.bind(this)); +                break; +            case 'getDictionaryCounts': +                this._onMessageWithProgress(params, this._getDictionaryCounts.bind(this)); +                break; +            case 'getImageDetails.response': +                this._mediaLoader.handleMessage(params); +                break; +        } +    } + +    /** +     * @template [T=unknown] +     * @param {T} params +     * @param {(details: T, onProgress: import('dictionary-worker-handler').OnProgressCallback) => Promise<unknown>} handler +     */ +    async _onMessageWithProgress(params, handler) { +        /** +         * @param {...unknown} args +         */ +        const onProgress = (...args) => { +            self.postMessage({ +                action: 'progress', +                params: {args} +            }); +        }; +        let response; +        try { +            const result = await handler(params, onProgress); +            response = {result}; +        } catch (e) { +            response = {error: ExtensionError.serialize(e)}; +        } +        self.postMessage({action: 'complete', params: response}); +    } + +    /** +     * @param {import('dictionary-worker-handler').ImportDictionaryMessageParams} details +     * @param {import('dictionary-worker-handler').OnProgressCallback} onProgress +     * @returns {Promise<import('dictionary-worker').MessageCompleteResultSerialized>} +     */ +    async _importDictionary({details, archiveContent}, onProgress) { +        const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); +        try { +            const dictionaryImporter = new DictionaryImporter(this._mediaLoader, onProgress); +            const {result, errors} = await dictionaryImporter.importDictionary(dictionaryDatabase, archiveContent, details); +            return { +                result, +                errors: errors.map((error) => ExtensionError.serialize(error)) +            }; +        } finally { +            dictionaryDatabase.close(); +        } +    } + +    /** +     * @param {import('dictionary-worker-handler').DeleteDictionaryMessageParams} details +     * @param {import('dictionary-database').DeleteDictionaryProgressCallback} onProgress +     * @returns {Promise<void>} +     */ +    async _deleteDictionary({dictionaryTitle}, onProgress) { +        const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); +        try { +            return await dictionaryDatabase.deleteDictionary(dictionaryTitle, 1000, onProgress); +        } finally { +            dictionaryDatabase.close(); +        } +    } + +    /** +     * @param {import('dictionary-worker-handler').GetDictionaryCountsMessageParams} details +     * @returns {Promise<import('dictionary-database').DictionaryCounts>} +     */ +    async _getDictionaryCounts({dictionaryNames, getTotal}) { +        const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); +        try { +            return await dictionaryDatabase.getDictionaryCounts(dictionaryNames, getTotal); +        } finally { +            dictionaryDatabase.close(); +        } +    } + +    /** +     * @returns {Promise<DictionaryDatabase>} +     */ +    async _getPreparedDictionaryDatabase() { +        const dictionaryDatabase = new DictionaryDatabase(); +        await dictionaryDatabase.prepare(); +        return dictionaryDatabase; +    } +} diff --git a/ext/js/dictionary/dictionary-worker-main.js b/ext/js/dictionary/dictionary-worker-main.js new file mode 100644 index 00000000..8ae283b8 --- /dev/null +++ b/ext/js/dictionary/dictionary-worker-main.js @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2021-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {log} from '../core.js'; +import {DictionaryWorkerHandler} from './dictionary-worker-handler.js'; + +/** Entry point. */ +function main() { +    try { +        const dictionaryWorkerHandler = new DictionaryWorkerHandler(); +        dictionaryWorkerHandler.prepare(); +    } catch (e) { +        log.error(e); +    } +} + +main(); diff --git a/ext/js/dictionary/dictionary-worker-media-loader.js b/ext/js/dictionary/dictionary-worker-media-loader.js new file mode 100644 index 00000000..e19a13d3 --- /dev/null +++ b/ext/js/dictionary/dictionary-worker-media-loader.js @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2021-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {generateId} from '../core.js'; +import {ExtensionError} from '../core/extension-error.js'; + +/** + * Class used for loading and validating media from a worker thread + * during the dictionary import process. + */ +export class DictionaryWorkerMediaLoader { +    /** +     * Creates a new instance of the media loader. +     */ +    constructor() { +        /** @type {Map<string, {resolve: (result: import('dictionary-worker-media-loader').ImageDetails) => void, reject: (reason?: import('core').RejectionReason) => void}>} */ +        this._requests = new Map(); +    } + +    /** +     * Handles a response message posted to the worker thread. +     * @param {import('dictionary-worker-media-loader').HandleMessageParams} params Details of the response. +     */ +    handleMessage(params) { +        const {id} = params; +        const request = this._requests.get(id); +        if (typeof request === 'undefined') { return; } +        this._requests.delete(id); +        const {error} = params; +        if (typeof error !== 'undefined') { +            request.reject(ExtensionError.deserialize(error)); +        } else { +            request.resolve(params.result); +        } +    } + +    /** @type {import('dictionary-importer-media-loader').GetImageDetailsFunction} */ +    getImageDetails(content, mediaType) { +        return new Promise((resolve, reject) => { +            const id = generateId(16); +            this._requests.set(id, {resolve, reject}); +            // This is executed in a Worker context, so the self needs to be force cast +            /** @type {Worker} */ (/** @type {unknown} */ (self)).postMessage({ +                action: 'getImageDetails', +                params: {id, content, mediaType} +            }, [content]); +        }); +    } +} diff --git a/ext/js/dictionary/dictionary-worker.js b/ext/js/dictionary/dictionary-worker.js new file mode 100644 index 00000000..669c65ac --- /dev/null +++ b/ext/js/dictionary/dictionary-worker.js @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2023  Yomitan Authors + * Copyright (C) 2021-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {ExtensionError} from '../core/extension-error.js'; +import {DictionaryImporterMediaLoader} from './dictionary-importer-media-loader.js'; + +export class DictionaryWorker { +    constructor() { +        /** @type {DictionaryImporterMediaLoader} */ +        this._dictionaryImporterMediaLoader = new DictionaryImporterMediaLoader(); +    } + +    /** +     * @param {ArrayBuffer} archiveContent +     * @param {import('dictionary-importer').ImportDetails} details +     * @param {?import('dictionary-worker').ImportProgressCallback} onProgress +     * @returns {Promise<import('dictionary-importer').ImportResult>} +     */ +    importDictionary(archiveContent, details, onProgress) { +        return this._invoke( +            'importDictionary', +            {details, archiveContent}, +            [archiveContent], +            onProgress, +            this._formatImportDictionaryResult.bind(this) +        ); +    } + +    /** +     * @param {string} dictionaryTitle +     * @param {?import('dictionary-worker').DeleteProgressCallback} onProgress +     * @returns {Promise<void>} +     */ +    deleteDictionary(dictionaryTitle, onProgress) { +        return this._invoke('deleteDictionary', {dictionaryTitle}, [], onProgress, null); +    } + +    /** +     * @param {string[]} dictionaryNames +     * @param {boolean} getTotal +     * @returns {Promise<import('dictionary-database').DictionaryCounts>} +     */ +    getDictionaryCounts(dictionaryNames, getTotal) { +        return this._invoke('getDictionaryCounts', {dictionaryNames, getTotal}, [], null, null); +    } + +    // Private + +    /** +     * @template [TParams=import('core').SerializableObject] +     * @template [TResponseRaw=unknown] +     * @template [TResponse=unknown] +     * @param {string} action +     * @param {TParams} params +     * @param {Transferable[]} transfer +     * @param {?(arg: import('core').SafeAny) => void} onProgress +     * @param {?(result: TResponseRaw) => TResponse} formatResult +     */ +    _invoke(action, params, transfer, onProgress, formatResult) { +        return new Promise((resolve, reject) => { +            const worker = new Worker('/js/dictionary/dictionary-worker-main.js', {type: 'module'}); +            /** @type {import('dictionary-worker').InvokeDetails<TResponseRaw, TResponse>} */ +            const details = { +                complete: false, +                worker, +                resolve, +                reject, +                onMessage: null, +                onProgress, +                formatResult +            }; +            // Ugly typecast below due to not being able to explicitly state the template types +            /** @type {(event: MessageEvent<import('dictionary-worker').MessageData<TResponseRaw>>) => void} */ +            const onMessage = /** @type {(details: import('dictionary-worker').InvokeDetails<TResponseRaw, TResponse>, event: MessageEvent<import('dictionary-worker').MessageData<TResponseRaw>>) => void} */ (this._onMessage).bind(this, details); +            details.onMessage = onMessage; +            worker.addEventListener('message', onMessage); +            worker.postMessage({action, params}, transfer); +        }); +    } + +    /** +     * @template [TResponseRaw=unknown] +     * @template [TResponse=unknown] +     * @param {import('dictionary-worker').InvokeDetails<TResponseRaw, TResponse>} details +     * @param {MessageEvent<import('dictionary-worker').MessageData<TResponseRaw>>} event +     */ +    _onMessage(details, event) { +        if (details.complete) { return; } +        const {action, params} = event.data; +        switch (action) { +            case 'complete': +                { +                    const {worker, resolve, reject, onMessage, formatResult} = details; +                    if (worker === null || onMessage === null || resolve === null || reject === null) { return; } +                    details.complete = true; +                    details.worker = null; +                    details.resolve = null; +                    details.reject = null; +                    details.onMessage = null; +                    details.onProgress = null; +                    details.formatResult = null; +                    worker.removeEventListener('message', onMessage); +                    worker.terminate(); +                    this._onMessageComplete(params, resolve, reject, formatResult); +                } +                break; +            case 'progress': +                this._onMessageProgress(params, details.onProgress); +                break; +            case 'getImageDetails': +                { +                    const {worker} = details; +                    if (worker === null) { return; } +                    this._onMessageGetImageDetails(params, worker); +                } +                break; +        } +    } + +    /** +     * @template [TResponseRaw=unknown] +     * @template [TResponse=unknown] +     * @param {import('dictionary-worker').MessageCompleteParams<TResponseRaw>} params +     * @param {(result: TResponse) => void} resolve +     * @param {(reason?: import('core').RejectionReason) => void} reject +     * @param {?(result: TResponseRaw) => TResponse} formatResult +     */ +    _onMessageComplete(params, resolve, reject, formatResult) { +        const {error} = params; +        if (typeof error !== 'undefined') { +            reject(ExtensionError.deserialize(error)); +        } else { +            const {result} = params; +            if (typeof formatResult === 'function') { +                let result2; +                try { +                    result2 = formatResult(result); +                } catch (e) { +                    reject(e); +                    return; +                } +                resolve(result2); +            } else { +                // If formatResult is not provided, the response is assumed to be the same type +                // For some reason, eslint thinks the TResponse type is undefined +                // eslint-disable-next-line jsdoc/no-undefined-types +                resolve(/** @type {TResponse} */ (/** @type {unknown} */ (result))); +            } +        } +    } + +    /** +     * @param {import('dictionary-worker').MessageProgressParams} params +     * @param {?(...args: unknown[]) => void} onProgress +     */ +    _onMessageProgress(params, onProgress) { +        if (typeof onProgress !== 'function') { return; } +        const {args} = params; +        onProgress(...args); +    } + +    /** +     * @param {import('dictionary-worker').MessageGetImageDetailsParams} params +     * @param {Worker} worker +     */ +    async _onMessageGetImageDetails(params, worker) { +        const {id, content, mediaType} = params; +        /** @type {Transferable[]} */ +        const transfer = []; +        let response; +        try { +            const result = await this._dictionaryImporterMediaLoader.getImageDetails(content, mediaType, transfer); +            response = {id, result}; +        } catch (e) { +            response = {id, error: ExtensionError.serialize(e)}; +        } +        worker.postMessage({action: 'getImageDetails.response', params: response}, transfer); +    } + +    /** +     * @param {import('dictionary-worker').MessageCompleteResultSerialized} response +     * @returns {import('dictionary-worker').MessageCompleteResult} +     */ +    _formatImportDictionaryResult(response) { +        const {result, errors} = response; +        return { +            result, +            errors: errors.map((error) => ExtensionError.deserialize(error)) +        }; +    } +} |