From 6550495282780abaaa4c1aa4d33e3907fedf2927 Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Sat, 23 Dec 2023 11:29:48 +0100 Subject: move dictionary files to dictionary folder (#429) --- .eslintrc.json | 10 +- dev/jsconfig.json | 4 +- ext/js/background/backend.js | 2 +- ext/js/background/offscreen.js | 2 +- ext/js/data/sandbox/anki-note-data-creator.js | 2 +- ext/js/dictionary/dictionary-data-util.js | 391 ++++++++++ ext/js/dictionary/dictionary-database.js | 661 +++++++++++++++++ .../dictionary/dictionary-importer-media-loader.js | 50 ++ ext/js/dictionary/dictionary-importer.js | 796 +++++++++++++++++++++ ext/js/dictionary/dictionary-worker-handler.js | 137 ++++ ext/js/dictionary/dictionary-worker-main.js | 32 + .../dictionary/dictionary-worker-media-loader.js | 64 ++ ext/js/dictionary/dictionary-worker.js | 206 ++++++ ext/js/display/display-generator.js | 2 +- ext/js/language/dictionary-database.js | 661 ----------------- .../language/dictionary-importer-media-loader.js | 50 -- ext/js/language/dictionary-importer.js | 796 --------------------- ext/js/language/dictionary-worker-handler.js | 137 ---- ext/js/language/dictionary-worker-main.js | 32 - ext/js/language/dictionary-worker-media-loader.js | 64 -- ext/js/language/dictionary-worker.js | 206 ------ ext/js/language/sandbox/dictionary-data-util.js | 391 ---------- ext/js/language/translator.js | 2 +- ext/js/pages/settings/dictionary-controller.js | 2 +- .../pages/settings/dictionary-import-controller.js | 2 +- ext/js/templates/sandbox/anki-template-renderer.js | 2 +- test/database.test.js | 4 +- test/fixtures/translator-test.js | 4 +- types/ext/translator.d.ts | 2 +- 29 files changed, 2357 insertions(+), 2357 deletions(-) create mode 100644 ext/js/dictionary/dictionary-data-util.js create mode 100644 ext/js/dictionary/dictionary-database.js create mode 100644 ext/js/dictionary/dictionary-importer-media-loader.js create mode 100644 ext/js/dictionary/dictionary-importer.js create mode 100644 ext/js/dictionary/dictionary-worker-handler.js create mode 100644 ext/js/dictionary/dictionary-worker-main.js create mode 100644 ext/js/dictionary/dictionary-worker-media-loader.js create mode 100644 ext/js/dictionary/dictionary-worker.js delete mode 100644 ext/js/language/dictionary-database.js delete mode 100644 ext/js/language/dictionary-importer-media-loader.js delete mode 100644 ext/js/language/dictionary-importer.js delete mode 100644 ext/js/language/dictionary-worker-handler.js delete mode 100644 ext/js/language/dictionary-worker-main.js delete mode 100644 ext/js/language/dictionary-worker-media-loader.js delete mode 100644 ext/js/language/dictionary-worker.js delete mode 100644 ext/js/language/sandbox/dictionary-data-util.js diff --git a/.eslintrc.json b/.eslintrc.json index 87309296..bd90f4fc 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -651,7 +651,7 @@ "ext/js/general/regex-util.js", "ext/js/general/text-source-map.js", "ext/js/language/deinflector.js", - "ext/js/language/dictionary-database.js", + "ext/js/dictionary/dictionary-database.js", "ext/js/language/sandbox/dictionary-data-util.js", "ext/js/language/sandbox/japanese-util.js", "ext/js/language/translator.js", @@ -679,10 +679,10 @@ "ext/js/data/database.js", "ext/js/data/json-schema.js", "ext/js/general/cache-map.js", - "ext/js/language/dictionary-database.js", - "ext/js/language/dictionary-importer.js", - "ext/js/language/dictionary-worker-handler.js", - "ext/js/language/dictionary-worker-media-loader.js", + "ext/js/dictionary/dictionary-database.js", + "ext/js/dictionary/dictionary-importer.js", + "ext/js/dictionary/dictionary-worker-handler.js", + "ext/js/dictionary/dictionary-worker-media-loader.js", "ext/js/media/media-util.js" ], "env": { diff --git a/dev/jsconfig.json b/dev/jsconfig.json index d613e88d..a7540068 100644 --- a/dev/jsconfig.json +++ b/dev/jsconfig.json @@ -64,8 +64,8 @@ "../ext/js/general/regex-util.js", "../ext/js/general/text-source-map.js", "../ext/js/language/deinflector.js", - "../ext/js/language/dictionary-importer.js", - "../ext/js/language/dictionary-database.js", + "../ext/js/dictionary/dictionary-importer.js", + "../ext/js/dictionary/dictionary-database.js", "../ext/js/language/sandbox/dictionary-data-util.js", "../ext/js/language/sandbox/japanese-util.js", "../ext/js/language/translator.js", diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index f7cad1e6..df4b9777 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -30,9 +30,9 @@ import {AnkiUtil} from '../data/anki-util.js'; import {OptionsUtil} from '../data/options-util.js'; import {PermissionsUtil} from '../data/permissions-util.js'; import {ArrayBufferUtil} from '../data/sandbox/array-buffer-util.js'; +import {DictionaryDatabase} from '../dictionary/dictionary-database.js'; import {Environment} from '../extension/environment.js'; import {ObjectPropertyAccessor} from '../general/object-property-accessor.js'; -import {DictionaryDatabase} from '../language/dictionary-database.js'; import {JapaneseUtil} from '../language/sandbox/japanese-util.js'; import {Translator} from '../language/translator.js'; import {AudioDownloader} from '../media/audio-downloader.js'; diff --git a/ext/js/background/offscreen.js b/ext/js/background/offscreen.js index 7e3576ba..05c655df 100644 --- a/ext/js/background/offscreen.js +++ b/ext/js/background/offscreen.js @@ -20,7 +20,7 @@ import * as wanakana from '../../lib/wanakana.js'; import {ClipboardReader} from '../comm/clipboard-reader.js'; import {createApiMap, invokeApiMapHandler} from '../core/api-map.js'; import {ArrayBufferUtil} from '../data/sandbox/array-buffer-util.js'; -import {DictionaryDatabase} from '../language/dictionary-database.js'; +import {DictionaryDatabase} from '../dictionary/dictionary-database.js'; import {JapaneseUtil} from '../language/sandbox/japanese-util.js'; import {Translator} from '../language/translator.js'; diff --git a/ext/js/data/sandbox/anki-note-data-creator.js b/ext/js/data/sandbox/anki-note-data-creator.js index dce71938..9d93b497 100644 --- a/ext/js/data/sandbox/anki-note-data-creator.js +++ b/ext/js/data/sandbox/anki-note-data-creator.js @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -import {DictionaryDataUtil} from '../../language/sandbox/dictionary-data-util.js'; +import {DictionaryDataUtil} from '../../dictionary/dictionary-data-util.js'; /** * This class is used to convert the internal dictionary entry format to the diff --git a/ext/js/dictionary/dictionary-data-util.js b/ext/js/dictionary/dictionary-data-util.js new file mode 100644 index 00000000..a54b043b --- /dev/null +++ b/ext/js/dictionary/dictionary-data-util.js @@ -0,0 +1,391 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2020-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +export class DictionaryDataUtil { + /** + * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry + * @returns {import('dictionary-data-util').TagGroup[]} + */ + static groupTermTags(dictionaryEntry) { + const {headwords} = dictionaryEntry; + const headwordCount = headwords.length; + const uniqueCheck = (headwordCount > 1); + const resultsIndexMap = new Map(); + const results = []; + for (let i = 0; i < headwordCount; ++i) { + const {tags} = headwords[i]; + for (const tag of tags) { + if (uniqueCheck) { + const {name, category, content, dictionaries} = tag; + const key = this._createMapKey([name, category, content, dictionaries]); + const index = resultsIndexMap.get(key); + if (typeof index !== 'undefined') { + const existingItem = results[index]; + existingItem.headwordIndices.push(i); + continue; + } + resultsIndexMap.set(key, results.length); + } + + const item = {tag, headwordIndices: [i]}; + results.push(item); + } + } + return results; + } + + /** + * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry + * @returns {import('dictionary-data-util').DictionaryFrequency[]} + */ + static groupTermFrequencies(dictionaryEntry) { + const {headwords, frequencies: sourceFrequencies} = dictionaryEntry; + + /** @type {import('dictionary-data-util').TermFrequenciesMap1} */ + const map1 = new Map(); + for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) { + const {term, reading} = headwords[headwordIndex]; + + let map2 = map1.get(dictionary); + if (typeof map2 === 'undefined') { + map2 = new Map(); + map1.set(dictionary, map2); + } + + const readingKey = hasReading ? reading : null; + const key = this._createMapKey([term, readingKey]); + let frequencyData = map2.get(key); + if (typeof frequencyData === 'undefined') { + frequencyData = {term, reading: readingKey, values: new Map()}; + map2.set(key, frequencyData); + } + + frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); + } + + const results = []; + for (const [dictionary, map2] of map1.entries()) { + const frequencies = []; + for (const {term, reading, values} of map2.values()) { + frequencies.push({ + term, + reading, + values: [...values.values()] + }); + } + results.push({dictionary, frequencies}); + } + return results; + } + + /** + * @param {import('dictionary').KanjiFrequency[]} sourceFrequencies + * @returns {import('dictionary-data-util').DictionaryFrequency[]} + */ + static groupKanjiFrequencies(sourceFrequencies) { + /** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */ + const map1 = new Map(); + for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) { + let map2 = map1.get(dictionary); + if (typeof map2 === 'undefined') { + map2 = new Map(); + map1.set(dictionary, map2); + } + + let frequencyData = map2.get(character); + if (typeof frequencyData === 'undefined') { + frequencyData = {character, values: new Map()}; + map2.set(character, frequencyData); + } + + frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); + } + + const results = []; + for (const [dictionary, map2] of map1.entries()) { + const frequencies = []; + for (const {character, values} of map2.values()) { + frequencies.push({ + character, + values: [...values.values()] + }); + } + results.push({dictionary, frequencies}); + } + return results; + } + + /** + * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry + * @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]} + */ + static getGroupedPronunciations(dictionaryEntry) { + const {headwords, pronunciations} = dictionaryEntry; + + const allTerms = new Set(); + const allReadings = new Set(); + for (const {term, reading} of headwords) { + allTerms.add(term); + allReadings.add(reading); + } + + /** @type {Map} */ + const groupedPronunciationsMap = new Map(); + for (const {headwordIndex, dictionary, pitches} of pronunciations) { + const {term, reading} = headwords[headwordIndex]; + let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary); + if (typeof dictionaryGroupedPronunciationList === 'undefined') { + dictionaryGroupedPronunciationList = []; + groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList); + } + for (const {position, nasalPositions, devoicePositions, tags} of pitches) { + let groupedPronunciation = this._findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, dictionaryGroupedPronunciationList); + if (groupedPronunciation === null) { + groupedPronunciation = { + terms: new Set(), + reading, + position, + nasalPositions, + devoicePositions, + tags + }; + dictionaryGroupedPronunciationList.push(groupedPronunciation); + } + groupedPronunciation.terms.add(term); + } + } + + /** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */ + const results2 = []; + const multipleReadings = (allReadings.size > 1); + for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) { + /** @type {import('dictionary-data-util').GroupedPronunciation[]} */ + const pronunciations2 = []; + for (const groupedPronunciation of dictionaryGroupedPronunciationList) { + const {terms, reading, position, nasalPositions, devoicePositions, tags} = groupedPronunciation; + const exclusiveTerms = !this._areSetsEqual(terms, allTerms) ? this._getSetIntersection(terms, allTerms) : []; + const exclusiveReadings = []; + if (multipleReadings) { + exclusiveReadings.push(reading); + } + pronunciations2.push({ + terms: [...terms], + reading, + position, + nasalPositions, + devoicePositions, + tags, + exclusiveTerms, + exclusiveReadings + }); + } + results2.push({dictionary, pronunciations: pronunciations2}); + } + return results2; + } + + /** + * @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags + * @returns {import('dictionary-data-util').TermFrequencyType} + */ + static getTermFrequency(termTags) { + let totalScore = 0; + for (const {score} of termTags) { + totalScore += score; + } + if (totalScore > 0) { + return 'popular'; + } else if (totalScore < 0) { + return 'rare'; + } else { + return 'normal'; + } + } + + /** + * @param {import('dictionary').TermHeadword[]} headwords + * @param {number[]} headwordIndices + * @param {Set} allTermsSet + * @param {Set} allReadingsSet + * @returns {string[]} + */ + static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { + if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } + + /** @type {Set} */ + const terms = new Set(); + /** @type {Set} */ + const readings = new Set(); + for (const headwordIndex of headwordIndices) { + const {term, reading} = headwords[headwordIndex]; + terms.add(term); + readings.add(reading); + } + + /** @type {string[]} */ + const disambiguations = []; + const addTerms = !this._areSetsEqual(terms, allTermsSet); + const addReadings = !this._areSetsEqual(readings, allReadingsSet); + if (addTerms) { + disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); + } + if (addReadings) { + if (addTerms) { + for (const term of terms) { + readings.delete(term); + } + } + disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); + } + return disambiguations; + } + + /** + * @param {string[]} wordClasses + * @returns {boolean} + */ + static isNonNounVerbOrAdjective(wordClasses) { + let isVerbOrAdjective = false; + let isSuruVerb = false; + let isNoun = false; + for (const wordClass of wordClasses) { + switch (wordClass) { + case 'v1': + case 'v5': + case 'vk': + case 'vz': + case 'adj-i': + isVerbOrAdjective = true; + break; + case 'vs': + isVerbOrAdjective = true; + isSuruVerb = true; + break; + case 'n': + isNoun = true; + break; + } + } + return isVerbOrAdjective && !(isSuruVerb && isNoun); + } + + // Private + + /** + * @param {string} reading + * @param {number} position + * @param {number[]} nasalPositions + * @param {number[]} devoicePositions + * @param {import('dictionary').Tag[]} tags + * @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList + * @returns {?import('dictionary-data-util').GroupedPronunciationInternal} + */ + static _findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, groupedPronunciationList) { + for (const pitchInfo of groupedPronunciationList) { + if ( + pitchInfo.reading === reading && + pitchInfo.position === position && + this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) && + this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) && + this._areTagListsEqual(pitchInfo.tags, tags) + ) { + return pitchInfo; + } + } + return null; + } + + /** + * @template [T=unknown] + * @param {T[]} array1 + * @param {T[]} array2 + * @returns {boolean} + */ + static _areArraysEqual(array1, array2) { + const ii = array1.length; + if (ii !== array2.length) { return false; } + for (let i = 0; i < ii; ++i) { + if (array1[i] !== array2[i]) { return false; } + } + return true; + } + + /** + * @param {import('dictionary').Tag[]} tagList1 + * @param {import('dictionary').Tag[]} tagList2 + * @returns {boolean} + */ + static _areTagListsEqual(tagList1, tagList2) { + const ii = tagList1.length; + if (tagList2.length !== ii) { return false; } + + for (let i = 0; i < ii; ++i) { + const tag1 = tagList1[i]; + const tag2 = tagList2[i]; + if (tag1.name !== tag2.name || !this._areArraysEqual(tag1.dictionaries, tag2.dictionaries)) { + return false; + } + } + + return true; + } + + /** + * @template [T=unknown] + * @param {Set} set1 + * @param {Set} set2 + * @returns {boolean} + */ + static _areSetsEqual(set1, set2) { + if (set1.size !== set2.size) { + return false; + } + + for (const value of set1) { + if (!set2.has(value)) { + return false; + } + } + + return true; + } + + /** + * @template [T=unknown] + * @param {Set} set1 + * @param {Set} set2 + * @returns {T[]} + */ + static _getSetIntersection(set1, set2) { + const result = []; + for (const value of set1) { + if (set2.has(value)) { + result.push(value); + } + } + return result; + } + + /** + * @param {unknown[]} array + * @returns {string} + */ + static _createMapKey(array) { + return JSON.stringify(array); + } +} diff --git a/ext/js/dictionary/dictionary-database.js b/ext/js/dictionary/dictionary-database.js new file mode 100644 index 00000000..45c5c6fd --- /dev/null +++ b/ext/js/dictionary/dictionary-database.js @@ -0,0 +1,661 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2016-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {log, stringReverse} from '../core.js'; +import {Database} from '../data/database.js'; + +export class DictionaryDatabase { + constructor() { + /** @type {Database} */ + this._db = new Database(); + /** @type {string} */ + this._dbName = 'dict'; + /** @type {import('dictionary-database').CreateQuery} */ + this._createOnlyQuery1 = (item) => IDBKeyRange.only(item); + /** @type {import('dictionary-database').CreateQuery} */ + this._createOnlyQuery2 = (item) => IDBKeyRange.only(item.query); + /** @type {import('dictionary-database').CreateQuery} */ + this._createOnlyQuery3 = (item) => IDBKeyRange.only(item.term); + /** @type {import('dictionary-database').CreateQuery} */ + this._createOnlyQuery4 = (item) => IDBKeyRange.only(item.path); + /** @type {import('dictionary-database').CreateQuery} */ + this._createBoundQuery1 = (item) => IDBKeyRange.bound(item, `${item}\uffff`, false, false); + /** @type {import('dictionary-database').CreateQuery} */ + this._createBoundQuery2 = (item) => { item = stringReverse(item); return IDBKeyRange.bound(item, `${item}\uffff`, false, false); }; + /** @type {import('dictionary-database').CreateResult} */ + this._createTermBind1 = this._createTermExact.bind(this); + /** @type {import('dictionary-database').CreateResult} */ + this._createTermBind2 = this._createTermSequenceExact.bind(this); + /** @type {import('dictionary-database').CreateResult} */ + this._createTermMetaBind = this._createTermMeta.bind(this); + /** @type {import('dictionary-database').CreateResult} */ + this._createKanjiBind = this._createKanji.bind(this); + /** @type {import('dictionary-database').CreateResult} */ + this._createKanjiMetaBind = this._createKanjiMeta.bind(this); + /** @type {import('dictionary-database').CreateResult} */ + this._createMediaBind = this._createMedia.bind(this); + } + + /** */ + async prepare() { + await this._db.open( + this._dbName, + 60, + /** @type {import('database').StructureDefinition[]} */ + ([ + /** @type {import('database').StructureDefinition} */ + ({ + version: 20, + stores: { + terms: { + primaryKey: {keyPath: 'id', autoIncrement: true}, + indices: ['dictionary', 'expression', 'reading'] + }, + kanji: { + primaryKey: {autoIncrement: true}, + indices: ['dictionary', 'character'] + }, + tagMeta: { + primaryKey: {autoIncrement: true}, + indices: ['dictionary'] + }, + dictionaries: { + primaryKey: {autoIncrement: true}, + indices: ['title', 'version'] + } + } + }), + { + version: 30, + stores: { + termMeta: { + primaryKey: {autoIncrement: true}, + indices: ['dictionary', 'expression'] + }, + kanjiMeta: { + primaryKey: {autoIncrement: true}, + indices: ['dictionary', 'character'] + }, + tagMeta: { + primaryKey: {autoIncrement: true}, + indices: ['dictionary', 'name'] + } + } + }, + { + version: 40, + stores: { + terms: { + primaryKey: {keyPath: 'id', autoIncrement: true}, + indices: ['dictionary', 'expression', 'reading', 'sequence'] + } + } + }, + { + version: 50, + stores: { + terms: { + primaryKey: {keyPath: 'id', autoIncrement: true}, + indices: ['dictionary', 'expression', 'reading', 'sequence', 'expressionReverse', 'readingReverse'] + } + } + }, + { + version: 60, + stores: { + media: { + primaryKey: {keyPath: 'id', autoIncrement: true}, + indices: ['dictionary', 'path'] + } + } + } + ]) + ); + } + + /** */ + async close() { + this._db.close(); + } + + /** + * @returns {boolean} + */ + isPrepared() { + return this._db.isOpen(); + } + + /** + * @returns {Promise} + */ + async purge() { + if (this._db.isOpening()) { + throw new Error('Cannot purge database while opening'); + } + if (this._db.isOpen()) { + this._db.close(); + } + let result = false; + try { + await Database.deleteDatabase(this._dbName); + result = true; + } catch (e) { + log.error(e); + } + await this.prepare(); + return result; + } + + /** + * @param {string} dictionaryName + * @param {number} progressRate + * @param {import('dictionary-database').DeleteDictionaryProgressCallback} onProgress + */ + async deleteDictionary(dictionaryName, progressRate, onProgress) { + /** @type {[objectStoreName: import('dictionary-database').ObjectStoreName, key: string][][]} */ + const targetGroups = [ + [ + ['kanji', 'dictionary'], + ['kanjiMeta', 'dictionary'], + ['terms', 'dictionary'], + ['termMeta', 'dictionary'], + ['tagMeta', 'dictionary'], + ['media', 'dictionary'] + ], + [ + ['dictionaries', 'title'] + ] + ]; + + let storeCount = 0; + for (const targets of targetGroups) { + storeCount += targets.length; + } + + /** @type {import('dictionary-database').DeleteDictionaryProgressData} */ + const progressData = { + count: 0, + processed: 0, + storeCount, + storesProcesed: 0 + }; + + /** + * @param {IDBValidKey[]} keys + * @returns {IDBValidKey[]} + */ + const filterKeys = (keys) => { + ++progressData.storesProcesed; + progressData.count += keys.length; + onProgress(progressData); + return keys; + }; + const onProgressWrapper = () => { + const processed = progressData.processed + 1; + progressData.processed = processed; + if ((processed % progressRate) === 0 || processed === progressData.count) { + onProgress(progressData); + } + }; + + for (const targets of targetGroups) { + const promises = []; + for (const [objectStoreName, indexName] of targets) { + const query = IDBKeyRange.only(dictionaryName); + const promise = this._db.bulkDelete(objectStoreName, indexName, query, filterKeys, onProgressWrapper); + promises.push(promise); + } + await Promise.all(promises); + } + } + + /** + * @param {string[]} termList + * @param {import('dictionary-database').DictionarySet} dictionaries + * @param {import('dictionary-database').MatchType} matchType + * @returns {Promise} + */ + findTermsBulk(termList, dictionaries, matchType) { + const visited = new Set(); + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row) => { + if (!dictionaries.has(row.dictionary)) { return false; } + const {id} = row; + if (visited.has(id)) { return false; } + visited.add(id); + return true; + }; + + const indexNames = (matchType === 'suffix') ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; + + let createQuery = this._createOnlyQuery1; + switch (matchType) { + case 'prefix': + createQuery = this._createBoundQuery1; + break; + case 'suffix': + createQuery = this._createBoundQuery2; + break; + } + + const createResult = this._createTermGeneric.bind(this, matchType); + + return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, createResult); + } + + /** + * @param {import('dictionary-database').TermExactRequest[]} termList + * @param {import('dictionary-database').DictionarySet} dictionaries + * @returns {Promise} + */ + findTermsExactBulk(termList, dictionaries) { + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row, item) => (row.reading === item.reading && dictionaries.has(row.dictionary)); + return this._findMultiBulk('terms', ['expression'], termList, this._createOnlyQuery3, predicate, this._createTermBind1); + } + + /** + * @param {import('dictionary-database').DictionaryAndQueryRequest[]} items + * @returns {Promise} + */ + findTermsBySequenceBulk(items) { + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row, item) => (row.dictionary === item.dictionary); + return this._findMultiBulk('terms', ['sequence'], items, this._createOnlyQuery2, predicate, this._createTermBind2); + } + + /** + * @param {string[]} termList + * @param {import('dictionary-database').DictionarySet} dictionaries + * @returns {Promise} + */ + findTermMetaBulk(termList, dictionaries) { + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row) => dictionaries.has(row.dictionary); + return this._findMultiBulk('termMeta', ['expression'], termList, this._createOnlyQuery1, predicate, this._createTermMetaBind); + } + + /** + * @param {string[]} kanjiList + * @param {import('dictionary-database').DictionarySet} dictionaries + * @returns {Promise} + */ + findKanjiBulk(kanjiList, dictionaries) { + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row) => dictionaries.has(row.dictionary); + return this._findMultiBulk('kanji', ['character'], kanjiList, this._createOnlyQuery1, predicate, this._createKanjiBind); + } + + /** + * @param {string[]} kanjiList + * @param {import('dictionary-database').DictionarySet} dictionaries + * @returns {Promise} + */ + findKanjiMetaBulk(kanjiList, dictionaries) { + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row) => dictionaries.has(row.dictionary); + return this._findMultiBulk('kanjiMeta', ['character'], kanjiList, this._createOnlyQuery1, predicate, this._createKanjiMetaBind); + } + + /** + * @param {import('dictionary-database').DictionaryAndQueryRequest[]} items + * @returns {Promise<(import('dictionary-database').Tag|undefined)[]>} + */ + findTagMetaBulk(items) { + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row, item) => (row.dictionary === item.dictionary); + return this._findFirstBulk('tagMeta', 'name', items, this._createOnlyQuery2, predicate); + } + + /** + * @param {string} name + * @param {string} dictionary + * @returns {Promise} + */ + findTagForTitle(name, dictionary) { + const query = IDBKeyRange.only(name); + return this._db.find('tagMeta', 'name', query, (row) => (/** @type {import('dictionary-database').Tag} */ (row).dictionary === dictionary), null, null); + } + + /** + * @param {import('dictionary-database').MediaRequest[]} items + * @returns {Promise} + */ + getMedia(items) { + /** @type {import('dictionary-database').FindPredicate} */ + const predicate = (row, item) => (row.dictionary === item.dictionary); + return this._findMultiBulk('media', ['path'], items, this._createOnlyQuery4, predicate, this._createMediaBind); + } + + /** + * @returns {Promise} + */ + getDictionaryInfo() { + return new Promise((resolve, reject) => { + const transaction = this._db.transaction(['dictionaries'], 'readonly'); + const objectStore = transaction.objectStore('dictionaries'); + this._db.getAll(objectStore, null, resolve, reject, null); + }); + } + + /** + * @param {string[]} dictionaryNames + * @param {boolean} getTotal + * @returns {Promise} + */ + getDictionaryCounts(dictionaryNames, getTotal) { + return new Promise((resolve, reject) => { + const targets = [ + ['kanji', 'dictionary'], + ['kanjiMeta', 'dictionary'], + ['terms', 'dictionary'], + ['termMeta', 'dictionary'], + ['tagMeta', 'dictionary'], + ['media', 'dictionary'] + ]; + const objectStoreNames = targets.map(([objectStoreName]) => objectStoreName); + const transaction = this._db.transaction(objectStoreNames, 'readonly'); + const databaseTargets = targets.map(([objectStoreName, indexName]) => { + const objectStore = transaction.objectStore(objectStoreName); + const index = objectStore.index(indexName); + return {objectStore, index}; + }); + + /** @type {import('database').CountTarget[]} */ + const countTargets = []; + if (getTotal) { + for (const {objectStore} of databaseTargets) { + countTargets.push([objectStore, void 0]); + } + } + for (const dictionaryName of dictionaryNames) { + const query = IDBKeyRange.only(dictionaryName); + for (const {index} of databaseTargets) { + countTargets.push([index, query]); + } + } + + /** + * @param {number[]} results + */ + const onCountComplete = (results) => { + const resultCount = results.length; + const targetCount = targets.length; + /** @type {import('dictionary-database').DictionaryCountGroup[]} */ + const counts = []; + for (let i = 0; i < resultCount; i += targetCount) { + /** @type {import('dictionary-database').DictionaryCountGroup} */ + const countGroup = {}; + for (let j = 0; j < targetCount; ++j) { + countGroup[targets[j][0]] = results[i + j]; + } + counts.push(countGroup); + } + const total = getTotal ? /** @type {import('dictionary-database').DictionaryCountGroup} */ (counts.shift()) : null; + resolve({total, counts}); + }; + + this._db.bulkCount(countTargets, onCountComplete, reject); + }); + } + + /** + * @param {string} title + * @returns {Promise} + */ + async dictionaryExists(title) { + const query = IDBKeyRange.only(title); + const result = await this._db.find('dictionaries', 'title', query, null, null, void 0); + return typeof result !== 'undefined'; + } + + /** + * @template {import('dictionary-database').ObjectStoreName} T + * @param {T} objectStoreName + * @param {import('dictionary-database').ObjectStoreData[]} items + * @param {number} start + * @param {number} count + * @returns {Promise} + */ + bulkAdd(objectStoreName, items, start, count) { + return this._db.bulkAdd(objectStoreName, items, start, count); + } + + // Private + + /** + * @template [TRow=unknown] + * @template [TItem=unknown] + * @template [TResult=unknown] + * @param {import('dictionary-database').ObjectStoreName} objectStoreName + * @param {string[]} indexNames + * @param {TItem[]} items + * @param {import('dictionary-database').CreateQuery} createQuery + * @param {import('dictionary-database').FindPredicate} predicate + * @param {import('dictionary-database').CreateResult} createResult + * @returns {Promise} + */ + _findMultiBulk(objectStoreName, indexNames, items, createQuery, predicate, createResult) { + return new Promise((resolve, reject) => { + const itemCount = items.length; + const indexCount = indexNames.length; + /** @type {TResult[]} */ + const results = []; + if (itemCount === 0 || indexCount === 0) { + resolve(results); + return; + } + + const transaction = this._db.transaction([objectStoreName], 'readonly'); + const objectStore = transaction.objectStore(objectStoreName); + const indexList = []; + for (const indexName of indexNames) { + indexList.push(objectStore.index(indexName)); + } + let completeCount = 0; + const requiredCompleteCount = itemCount * indexCount; + /** + * @param {TRow[]} rows + * @param {import('dictionary-database').FindMultiBulkData} data + */ + const onGetAll = (rows, data) => { + for (const row of rows) { + if (predicate(row, data.item)) { + results.push(createResult(row, data)); + } + } + if (++completeCount >= requiredCompleteCount) { + resolve(results); + } + }; + for (let i = 0; i < itemCount; ++i) { + const item = items[i]; + const query = createQuery(item); + for (let j = 0; j < indexCount; ++j) { + /** @type {import('dictionary-database').FindMultiBulkData} */ + const data = {item, itemIndex: i, indexIndex: j}; + this._db.getAll(indexList[j], query, onGetAll, reject, data); + } + } + }); + } + + /** + * @template [TRow=unknown] + * @template [TItem=unknown] + * @param {import('dictionary-database').ObjectStoreName} objectStoreName + * @param {string} indexName + * @param {TItem[]} items + * @param {import('dictionary-database').CreateQuery} createQuery + * @param {import('dictionary-database').FindPredicate} predicate + * @returns {Promise<(TRow|undefined)[]>} + */ + _findFirstBulk(objectStoreName, indexName, items, createQuery, predicate) { + return new Promise((resolve, reject) => { + const itemCount = items.length; + /** @type {(TRow|undefined)[]} */ + const results = new Array(itemCount); + if (itemCount === 0) { + resolve(results); + return; + } + + const transaction = this._db.transaction([objectStoreName], 'readonly'); + const objectStore = transaction.objectStore(objectStoreName); + const index = objectStore.index(indexName); + let completeCount = 0; + /** + * @param {TRow|undefined} row + * @param {number} itemIndex + */ + const onFind = (row, itemIndex) => { + results[itemIndex] = row; + if (++completeCount >= itemCount) { + resolve(results); + } + }; + for (let i = 0; i < itemCount; ++i) { + const item = items[i]; + const query = createQuery(item); + this._db.findFirst(index, query, onFind, reject, i, predicate, item, void 0); + } + }); + } + + /** + * @param {import('dictionary-database').MatchType} matchType + * @param {import('dictionary-database').DatabaseTermEntryWithId} row + * @param {import('dictionary-database').FindMultiBulkData} data + * @returns {import('dictionary-database').TermEntry} + */ + _createTermGeneric(matchType, row, data) { + const matchSourceIsTerm = (data.indexIndex === 0); + const matchSource = (matchSourceIsTerm ? 'term' : 'reading'); + if ((matchSourceIsTerm ? row.expression : row.reading) === data.item) { + matchType = 'exact'; + } + return this._createTerm(matchSource, matchType, row, data.itemIndex); + } + + /** + * @param {import('dictionary-database').DatabaseTermEntryWithId} row + * @param {import('dictionary-database').FindMultiBulkData} data + * @returns {import('dictionary-database').TermEntry} + */ + _createTermExact(row, data) { + return this._createTerm('term', 'exact', row, data.itemIndex); + } + + /** + * @param {import('dictionary-database').DatabaseTermEntryWithId} row + * @param {import('dictionary-database').FindMultiBulkData} data + * @returns {import('dictionary-database').TermEntry} + */ + _createTermSequenceExact(row, data) { + return this._createTerm('sequence', 'exact', row, data.itemIndex); + } + + /** + * @param {import('dictionary-database').MatchSource} matchSource + * @param {import('dictionary-database').MatchType} matchType + * @param {import('dictionary-database').DatabaseTermEntryWithId} row + * @param {number} index + * @returns {import('dictionary-database').TermEntry} + */ + _createTerm(matchSource, matchType, row, index) { + const {sequence} = row; + return { + index, + matchType, + matchSource, + term: row.expression, + reading: row.reading, + definitionTags: this._splitField(row.definitionTags || row.tags), + termTags: this._splitField(row.termTags), + rules: this._splitField(row.rules), + definitions: row.glossary, + score: row.score, + dictionary: row.dictionary, + id: row.id, + sequence: typeof sequence === 'number' ? sequence : -1 + }; + } + + /** + * @param {import('dictionary-database').DatabaseKanjiEntry} row + * @param {import('dictionary-database').FindMultiBulkData} data + * @returns {import('dictionary-database').KanjiEntry} + */ + _createKanji(row, {itemIndex: index}) { + const {stats} = row; + return { + index, + character: row.character, + onyomi: this._splitField(row.onyomi), + kunyomi: this._splitField(row.kunyomi), + tags: this._splitField(row.tags), + definitions: row.meanings, + stats: typeof stats === 'object' && stats !== null ? stats : {}, + dictionary: row.dictionary + }; + } + + /** + * @param {import('dictionary-database').DatabaseTermMeta} row + * @param {import('dictionary-database').FindMultiBulkData} data + * @returns {import('dictionary-database').TermMeta} + * @throws {Error} + */ + _createTermMeta({expression: term, mode, data, dictionary}, {itemIndex: index}) { + switch (mode) { + case 'freq': + return {index, term, mode, data, dictionary}; + case 'pitch': + return {index, term, mode, data, dictionary}; + default: + throw new Error(`Unknown mode: ${mode}`); + } + } + + /** + * @param {import('dictionary-database').DatabaseKanjiMeta} row + * @param {import('dictionary-database').FindMultiBulkData} data + * @returns {import('dictionary-database').KanjiMeta} + */ + _createKanjiMeta({character, mode, data, dictionary}, {itemIndex: index}) { + return {index, character, mode, data, dictionary}; + } + + /** + * @param {import('dictionary-database').MediaDataArrayBufferContent} row + * @param {import('dictionary-database').FindMultiBulkData} data + * @returns {import('dictionary-database').Media} + */ + _createMedia(row, {itemIndex: index}) { + const {dictionary, path, mediaType, width, height, content} = row; + return {index, dictionary, path, mediaType, width, height, content}; + } + + /** + * @param {unknown} field + * @returns {string[]} + */ + _splitField(field) { + return typeof field === 'string' && field.length > 0 ? field.split(' ') : []; + } +} diff --git a/ext/js/dictionary/dictionary-importer-media-loader.js b/ext/js/dictionary/dictionary-importer-media-loader.js new file mode 100644 index 00000000..a5857dce --- /dev/null +++ b/ext/js/dictionary/dictionary-importer-media-loader.js @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2021-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {EventListenerCollection} from '../core.js'; + +/** + * Class used for loading and validating media during the dictionary import process. + */ +export class DictionaryImporterMediaLoader { + /** @type {import('dictionary-importer-media-loader').GetImageDetailsFunction} */ + getImageDetails(content, mediaType, transfer) { + return new Promise((resolve, reject) => { + const image = new Image(); + const eventListeners = new EventListenerCollection(); + const cleanup = () => { + image.removeAttribute('src'); + URL.revokeObjectURL(url); + eventListeners.removeAllEventListeners(); + }; + eventListeners.addEventListener(image, 'load', () => { + const {naturalWidth: width, naturalHeight: height} = image; + if (Array.isArray(transfer)) { transfer.push(content); } + cleanup(); + resolve({content, width, height}); + }, false); + eventListeners.addEventListener(image, 'error', () => { + cleanup(); + reject(new Error('Image failed to load')); + }, false); + const blob = new Blob([content], {type: mediaType}); + const url = URL.createObjectURL(blob); + image.src = url; + }); + } +} diff --git a/ext/js/dictionary/dictionary-importer.js b/ext/js/dictionary/dictionary-importer.js new file mode 100644 index 00000000..2c0c7e9c --- /dev/null +++ b/ext/js/dictionary/dictionary-importer.js @@ -0,0 +1,796 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2020-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import * as ajvSchemas0 from '../../lib/validate-schemas.js'; +import { + BlobWriter as BlobWriter0, + TextWriter as TextWriter0, + Uint8ArrayReader as Uint8ArrayReader0, + ZipReader as ZipReader0, + configure +} from '../../lib/zip.js'; +import {stringReverse} from '../core.js'; +import {ExtensionError} from '../core/extension-error.js'; +import {parseJson} from '../core/json.js'; +import {MediaUtil} from '../media/media-util.js'; + +const ajvSchemas = /** @type {import('dictionary-importer').CompiledSchemaValidators} */ (/** @type {unknown} */ (ajvSchemas0)); +const BlobWriter = /** @type {typeof import('@zip.js/zip.js').BlobWriter} */ (/** @type {unknown} */ (BlobWriter0)); +const TextWriter = /** @type {typeof import('@zip.js/zip.js').TextWriter} */ (/** @type {unknown} */ (TextWriter0)); +const Uint8ArrayReader = /** @type {typeof import('@zip.js/zip.js').Uint8ArrayReader} */ (/** @type {unknown} */ (Uint8ArrayReader0)); +const ZipReader = /** @type {typeof import('@zip.js/zip.js').ZipReader} */ (/** @type {unknown} */ (ZipReader0)); + +export class DictionaryImporter { + /** + * @param {import('dictionary-importer-media-loader').GenericMediaLoader} mediaLoader + * @param {import('dictionary-importer').OnProgressCallback} [onProgress] + */ + constructor(mediaLoader, onProgress) { + /** @type {import('dictionary-importer-media-loader').GenericMediaLoader} */ + this._mediaLoader = mediaLoader; + /** @type {import('dictionary-importer').OnProgressCallback} */ + this._onProgress = typeof onProgress === 'function' ? onProgress : () => {}; + /** @type {import('dictionary-importer').ProgressData} */ + this._progressData = this._createProgressData(); + } + + /** + * @param {import('./dictionary-database.js').DictionaryDatabase} dictionaryDatabase + * @param {ArrayBuffer} archiveContent + * @param {import('dictionary-importer').ImportDetails} details + * @returns {Promise} + */ + async importDictionary(dictionaryDatabase, archiveContent, details) { + if (!dictionaryDatabase) { + throw new Error('Invalid database'); + } + if (!dictionaryDatabase.isPrepared()) { + throw new Error('Database is not ready'); + } + + this._progressReset(); + + configure({ + workerScripts: { + deflate: ['../../lib/z-worker.js'], + inflate: ['../../lib/z-worker.js'] + } + }); + + // Read archive + const zipFileReader = new Uint8ArrayReader(new Uint8Array(archiveContent)); + const zipReader = new ZipReader(zipFileReader); + const zipEntries = await zipReader.getEntries(); + /** @type {import('dictionary-importer').ArchiveFileMap} */ + const fileMap = new Map(); + for (const entry of zipEntries) { + fileMap.set(entry.filename, entry); + } + // Read and validate index + const indexFileName = 'index.json'; + const indexFile = fileMap.get(indexFileName); + if (typeof indexFile === 'undefined') { + throw new Error('No dictionary index found in archive'); + } + const indexFile2 = /** @type {import('@zip.js/zip.js').Entry} */ (indexFile); + + const indexContent = await this._getData(indexFile2, new TextWriter()); + const index = /** @type {import('dictionary-data').Index} */ (parseJson(indexContent)); + + if (!ajvSchemas.dictionaryIndex(index)) { + throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName); + } + + const dictionaryTitle = index.title; + const version = typeof index.format === 'number' ? index.format : index.version; + + if (typeof version !== 'number' || !dictionaryTitle || !index.revision) { + throw new Error('Unrecognized dictionary format'); + } + + // Verify database is not already imported + if (await dictionaryDatabase.dictionaryExists(dictionaryTitle)) { + throw new Error('Dictionary is already imported'); + } + + // Load schemas + this._progressNextStep(0); + const dataBankSchemas = this._getDataBankSchemas(version); + + // Files + /** @type {import('dictionary-importer').QueryDetails} */ + const queryDetails = new Map([ + ['termFiles', /^term_bank_(\d+)\.json$/], + ['termMetaFiles', /^term_meta_bank_(\d+)\.json$/], + ['kanjiFiles', /^kanji_bank_(\d+)\.json$/], + ['kanjiMetaFiles', /^kanji_meta_bank_(\d+)\.json$/], + ['tagFiles', /^tag_bank_(\d+)\.json$/] + ]); + const {termFiles, termMetaFiles, kanjiFiles, kanjiMetaFiles, tagFiles} = Object.fromEntries(this._getArchiveFiles(fileMap, queryDetails)); + + // Load data + this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length); + const termList = await ( + version === 1 ? + this._readFileSequence(termFiles, this._convertTermBankEntryV1.bind(this), dataBankSchemas[0], dictionaryTitle) : + this._readFileSequence(termFiles, this._convertTermBankEntryV3.bind(this), dataBankSchemas[0], dictionaryTitle) + ); + const termMetaList = await this._readFileSequence(termMetaFiles, this._convertTermMetaBankEntry.bind(this), dataBankSchemas[1], dictionaryTitle); + const kanjiList = await ( + version === 1 ? + this._readFileSequence(kanjiFiles, this._convertKanjiBankEntryV1.bind(this), dataBankSchemas[2], dictionaryTitle) : + this._readFileSequence(kanjiFiles, this._convertKanjiBankEntryV3.bind(this), dataBankSchemas[2], dictionaryTitle) + ); + const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, this._convertKanjiMetaBankEntry.bind(this), dataBankSchemas[3], dictionaryTitle); + const tagList = await this._readFileSequence(tagFiles, this._convertTagBankEntry.bind(this), dataBankSchemas[4], dictionaryTitle); + this._addOldIndexTags(index, tagList, dictionaryTitle); + + // Prefix wildcard support + const prefixWildcardsSupported = !!details.prefixWildcardsSupported; + if (prefixWildcardsSupported) { + for (const entry of termList) { + entry.expressionReverse = stringReverse(entry.expression); + entry.readingReverse = stringReverse(entry.reading); + } + } + + // Extended data support + this._progressNextStep(termList.length); + const formatProgressInterval = 1000; + /** @type {import('dictionary-importer').ImportRequirement[]} */ + const requirements = []; + for (let i = 0, ii = termList.length; i < ii; ++i) { + const entry = termList[i]; + const glossaryList = entry.glossary; + for (let j = 0, jj = glossaryList.length; j < jj; ++j) { + const glossary = glossaryList[j]; + if (typeof glossary !== 'object' || glossary === null) { continue; } + glossaryList[j] = this._formatDictionaryTermGlossaryObject(glossary, entry, requirements); + } + if ((i % formatProgressInterval) === 0) { + this._progressData.index = i; + this._progress(); + } + } + this._progress(); + + // Async requirements + this._progressNextStep(requirements.length); + const {media} = await this._resolveAsyncRequirements(requirements, fileMap); + + // Add dictionary descriptor + this._progressNextStep(termList.length + termMetaList.length + kanjiList.length + kanjiMetaList.length + tagList.length + media.length); + + /** @type {import('dictionary-importer').SummaryCounts} */ + const counts = { + terms: {total: termList.length}, + termMeta: this._getMetaCounts(termMetaList), + kanji: {total: kanjiList.length}, + kanjiMeta: this._getMetaCounts(kanjiMetaList), + tagMeta: {total: tagList.length}, + media: {total: media.length} + }; + const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported, counts}); + dictionaryDatabase.bulkAdd('dictionaries', [summary], 0, 1); + + // Add data + /** @type {Error[]} */ + const errors = []; + const maxTransactionLength = 1000; + + /** + * @template {import('dictionary-database').ObjectStoreName} T + * @param {T} objectStoreName + * @param {import('dictionary-database').ObjectStoreData[]} entries + */ + const bulkAdd = async (objectStoreName, entries) => { + const ii = entries.length; + for (let i = 0; i < ii; i += maxTransactionLength) { + const count = Math.min(maxTransactionLength, ii - i); + + try { + await dictionaryDatabase.bulkAdd(objectStoreName, entries, i, count); + } catch (e) { + errors.push(e instanceof Error ? e : new Error(`${e}`)); + } + + this._progressData.index += count; + this._progress(); + } + }; + + await bulkAdd('terms', termList); + await bulkAdd('termMeta', termMetaList); + await bulkAdd('kanji', kanjiList); + await bulkAdd('kanjiMeta', kanjiMetaList); + await bulkAdd('tagMeta', tagList); + await bulkAdd('media', media); + + this._progress(); + + return {result: summary, errors}; + } + + /** + * @returns {import('dictionary-importer').ProgressData} + */ + _createProgressData() { + return { + stepIndex: 0, + stepCount: 6, + index: 0, + count: 0 + }; + } + + /** */ + _progressReset() { + this._progressData = this._createProgressData(); + this._progress(); + } + + /** + * @param {number} count + */ + _progressNextStep(count) { + ++this._progressData.stepIndex; + this._progressData.index = 0; + this._progressData.count = count; + this._progress(); + } + + /** */ + _progress() { + this._onProgress(this._progressData); + } + + /** + * @param {string} dictionaryTitle + * @param {number} version + * @param {import('dictionary-data').Index} index + * @param {{prefixWildcardsSupported: boolean, counts: import('dictionary-importer').SummaryCounts}} details + * @returns {import('dictionary-importer').Summary} + */ + _createSummary(dictionaryTitle, version, index, details) { + const indexSequenced = index.sequenced; + const {prefixWildcardsSupported, counts} = details; + + /** @type {import('dictionary-importer').Summary} */ + const summary = { + title: dictionaryTitle, + revision: index.revision, + sequenced: typeof indexSequenced === 'boolean' && indexSequenced, + version, + importDate: Date.now(), + prefixWildcardsSupported, + counts + }; + + const {author, url, description, attribution, frequencyMode} = index; + if (typeof author === 'string') { summary.author = author; } + if (typeof url === 'string') { summary.url = url; } + if (typeof description === 'string') { summary.description = description; } + if (typeof attribution === 'string') { summary.attribution = attribution; } + if (typeof frequencyMode === 'string') { summary.frequencyMode = frequencyMode; } + + return summary; + } + + /** + * @param {import('ajv').ValidateFunction} schema + * @param {string} fileName + * @returns {ExtensionError} + */ + _formatAjvSchemaError(schema, fileName) { + const e2 = new ExtensionError(`Dictionary has invalid data in '${fileName}'`); + e2.data = schema.errors; + + return e2; + } + + /** + * @param {number} version + * @returns {import('dictionary-importer').CompiledSchemaNameArray} + */ + _getDataBankSchemas(version) { + const termBank = ( + version === 1 ? + 'dictionaryTermBankV1' : + 'dictionaryTermBankV3' + ); + const termMetaBank = 'dictionaryTermMetaBankV3'; + const kanjiBank = ( + version === 1 ? + 'dictionaryKanjiBankV1' : + 'dictionaryKanjiBankV3' + ); + const kanjiMetaBank = 'dictionaryKanjiMetaBankV3'; + const tagBank = 'dictionaryTagBankV3'; + + return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; + } + + /** + * @param {import('dictionary-data').TermGlossaryText|import('dictionary-data').TermGlossaryImage|import('dictionary-data').TermGlossaryStructuredContent} data + * @param {import('dictionary-database').DatabaseTermEntry} entry + * @param {import('dictionary-importer').ImportRequirement[]} requirements + * @returns {import('dictionary-data').TermGlossary} + * @throws {Error} + */ + _formatDictionaryTermGlossaryObject(data, entry, requirements) { + switch (data.type) { + case 'text': + return data.text; + case 'image': + return this._formatDictionaryTermGlossaryImage(data, entry, requirements); + case 'structured-content': + return this._formatStructuredContent(data, entry, requirements); + default: + throw new Error(`Unhandled data type: ${/** @type {import('core').SerializableObject} */ (data).type}`); + } + } + + /** + * @param {import('dictionary-data').TermGlossaryImage} data + * @param {import('dictionary-database').DatabaseTermEntry} entry + * @param {import('dictionary-importer').ImportRequirement[]} requirements + * @returns {import('dictionary-data').TermGlossaryImage} + */ + _formatDictionaryTermGlossaryImage(data, entry, requirements) { + /** @type {import('dictionary-data').TermGlossaryImage} */ + const target = { + type: 'image', + path: '' // Will be populated during requirement resolution + }; + requirements.push({type: 'image', target, source: data, entry}); + return target; + } + + /** + * @param {import('dictionary-data').TermGlossaryStructuredContent} data + * @param {import('dictionary-database').DatabaseTermEntry} entry + * @param {import('dictionary-importer').ImportRequirement[]} requirements + * @returns {import('dictionary-data').TermGlossaryStructuredContent} + */ + _formatStructuredContent(data, entry, requirements) { + const content = this._prepareStructuredContent(data.content, entry, requirements); + return { + type: 'structured-content', + content + }; + } + + /** + * @param {import('structured-content').Content} content + * @param {import('dictionary-database').DatabaseTermEntry} entry + * @param {import('dictionary-importer').ImportRequirement[]} requirements + * @returns {import('structured-content').Content} + */ + _prepareStructuredContent(content, entry, requirements) { + if (typeof content === 'string' || !(typeof content === 'object' && content !== null)) { + return content; + } + if (Array.isArray(content)) { + for (let i = 0, ii = content.length; i < ii; ++i) { + content[i] = this._prepareStructuredContent(content[i], entry, requirements); + } + return content; + } + const {tag} = content; + switch (tag) { + case 'img': + return this._prepareStructuredContentImage(content, entry, requirements); + } + const childContent = content.content; + if (typeof childContent !== 'undefined') { + content.content = this._prepareStructuredContent(childContent, entry, requirements); + } + return content; + } + + /** + * @param {import('structured-content').ImageElement} content + * @param {import('dictionary-database').DatabaseTermEntry} entry + * @param {import('dictionary-importer').ImportRequirement[]} requirements + * @returns {import('structured-content').ImageElement} + */ + _prepareStructuredContentImage(content, entry, requirements) { + /** @type {import('structured-content').ImageElement} */ + const target = { + tag: 'img', + path: '' // Will be populated during requirement resolution + }; + requirements.push({type: 'structured-content-image', target, source: content, entry}); + return target; + } + + /** + * @param {import('dictionary-importer').ImportRequirement[]} requirements + * @param {import('dictionary-importer').ArchiveFileMap} fileMap + * @returns {Promise<{media: import('dictionary-database').MediaDataArrayBufferContent[]}>} + */ + async _resolveAsyncRequirements(requirements, fileMap) { + /** @type {Map} */ + const media = new Map(); + /** @type {import('dictionary-importer').ImportRequirementContext} */ + const context = {fileMap, media}; + + for (const requirement of requirements) { + await this._resolveAsyncRequirement(context, requirement); + } + + return { + media: [...media.values()] + }; + } + + /** + * @param {import('dictionary-importer').ImportRequirementContext} context + * @param {import('dictionary-importer').ImportRequirement} requirement + */ + async _resolveAsyncRequirement(context, requirement) { + switch (requirement.type) { + case 'image': + await this._resolveDictionaryTermGlossaryImage( + context, + requirement.target, + requirement.source, + requirement.entry + ); + break; + case 'structured-content-image': + await this._resolveStructuredContentImage( + context, + requirement.target, + requirement.source, + requirement.entry + ); + break; + default: + return; + } + ++this._progressData.index; + this._progress(); + } + + /** + * @param {import('dictionary-importer').ImportRequirementContext} context + * @param {import('dictionary-data').TermGlossaryImage} target + * @param {import('dictionary-data').TermGlossaryImage} source + * @param {import('dictionary-database').DatabaseTermEntry} entry + */ + async _resolveDictionaryTermGlossaryImage(context, target, source, entry) { + await this._createImageData(context, target, source, entry); + } + + /** + * @param {import('dictionary-importer').ImportRequirementContext} context + * @param {import('structured-content').ImageElement} target + * @param {import('structured-content').ImageElement} source + * @param {import('dictionary-database').DatabaseTermEntry} entry + */ + async _resolveStructuredContentImage(context, target, source, entry) { + const {verticalAlign, sizeUnits} = source; + await this._createImageData(context, target, source, entry); + if (typeof verticalAlign === 'string') { target.verticalAlign = verticalAlign; } + if (typeof sizeUnits === 'string') { target.sizeUnits = sizeUnits; } + } + + /** + * @param {import('dictionary-importer').ImportRequirementContext} context + * @param {import('structured-content').ImageElementBase} target + * @param {import('structured-content').ImageElementBase} source + * @param {import('dictionary-database').DatabaseTermEntry} entry + */ + async _createImageData(context, target, source, entry) { + const { + path, + width: preferredWidth, + height: preferredHeight, + title, + alt, + description, + pixelated, + imageRendering, + appearance, + background, + collapsed, + collapsible + } = source; + const {width, height} = await this._getImageMedia(context, path, entry); + target.path = path; + target.width = width; + target.height = height; + if (typeof preferredWidth === 'number') { target.preferredWidth = preferredWidth; } + if (typeof preferredHeight === 'number') { target.preferredHeight = preferredHeight; } + if (typeof title === 'string') { target.title = title; } + if (typeof alt === 'string') { target.alt = alt; } + if (typeof description === 'string') { target.description = description; } + if (typeof pixelated === 'boolean') { target.pixelated = pixelated; } + if (typeof imageRendering === 'string') { target.imageRendering = imageRendering; } + if (typeof appearance === 'string') { target.appearance = appearance; } + if (typeof background === 'boolean') { target.background = background; } + if (typeof collapsed === 'boolean') { target.collapsed = collapsed; } + if (typeof collapsible === 'boolean') { target.collapsible = collapsible; } + } + + /** + * @param {import('dictionary-importer').ImportRequirementContext} context + * @param {string} path + * @param {import('dictionary-database').DatabaseTermEntry} entry + * @returns {Promise} + */ + async _getImageMedia(context, path, entry) { + const {media} = context; + const {dictionary} = entry; + + /** + * @param {string} message + * @returns {Error} + */ + const createError = (message) => { + const {expression, reading} = entry; + const readingSource = reading.length > 0 ? ` (${reading})` : ''; + return new Error(`${message} at path ${JSON.stringify(path)} for ${expression}${readingSource} in ${dictionary}`); + }; + + // Check if already added + let mediaData = media.get(path); + if (typeof mediaData !== 'undefined') { + if (MediaUtil.getFileExtensionFromImageMediaType(mediaData.mediaType) === null) { + throw createError('Media file is not a valid image'); + } + return mediaData; + } + + // Find file in archive + const file = context.fileMap.get(path); + if (typeof file === 'undefined') { + throw createError('Could not find image'); + } + + // Load file content + let content = await (await this._getData(file, new BlobWriter())).arrayBuffer(); + + const mediaType = MediaUtil.getImageMediaTypeFromFileName(path); + if (mediaType === null) { + throw createError('Could not determine media type for image'); + } + + // Load image data + let width; + let height; + try { + ({content, width, height} = await this._mediaLoader.getImageDetails(content, mediaType)); + } catch (e) { + throw createError('Could not load image'); + } + + // Create image data + mediaData = { + dictionary, + path, + mediaType, + width, + height, + content + }; + media.set(path, mediaData); + + return mediaData; + } + + /** + * @param {import('dictionary-data').TermV1} entry + * @param {string} dictionary + * @returns {import('dictionary-database').DatabaseTermEntry} + */ + _convertTermBankEntryV1(entry, dictionary) { + let [expression, reading, definitionTags, rules, score, ...glossary] = entry; + expression = this._normalizeTermOrReading(expression); + reading = this._normalizeTermOrReading(reading.length > 0 ? reading : expression); + return {expression, reading, definitionTags, rules, score, glossary, dictionary}; + } + + /** + * @param {import('dictionary-data').TermV3} entry + * @param {string} dictionary + * @returns {import('dictionary-database').DatabaseTermEntry} + */ + _convertTermBankEntryV3(entry, dictionary) { + let [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; + expression = this._normalizeTermOrReading(expression); + reading = this._normalizeTermOrReading(reading.length > 0 ? reading : expression); + return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags, dictionary}; + } + + /** + * @param {import('dictionary-data').TermMeta} entry + * @param {string} dictionary + * @returns {import('dictionary-database').DatabaseTermMeta} + */ + _convertTermMetaBankEntry(entry, dictionary) { + const [expression, mode, data] = entry; + return /** @type {import('dictionary-database').DatabaseTermMeta} */ ({expression, mode, data, dictionary}); + } + + /** + * @param {import('dictionary-data').KanjiV1} entry + * @param {string} dictionary + * @returns {import('dictionary-database').DatabaseKanjiEntry} + */ + _convertKanjiBankEntryV1(entry, dictionary) { + const [character, onyomi, kunyomi, tags, ...meanings] = entry; + return {character, onyomi, kunyomi, tags, meanings, dictionary}; + } + + /** + * @param {import('dictionary-data').KanjiV3} entry + * @param {string} dictionary + * @returns {import('dictionary-database').DatabaseKanjiEntry} + */ + _convertKanjiBankEntryV3(entry, dictionary) { + const [character, onyomi, kunyomi, tags, meanings, stats] = entry; + return {character, onyomi, kunyomi, tags, meanings, stats, dictionary}; + } + + /** + * @param {import('dictionary-data').KanjiMeta} entry + * @param {string} dictionary + * @returns {import('dictionary-database').DatabaseKanjiMeta} + */ + _convertKanjiMetaBankEntry(entry, dictionary) { + const [character, mode, data] = entry; + return {character, mode, data, dictionary}; + } + + /** + * @param {import('dictionary-data').Tag} entry + * @param {string} dictionary + * @returns {import('dictionary-database').Tag} + */ + _convertTagBankEntry(entry, dictionary) { + const [name, category, order, notes, score] = entry; + return {name, category, order, notes, score, dictionary}; + } + + /** + * @param {import('dictionary-data').Index} index + * @param {import('dictionary-database').Tag[]} results + * @param {string} dictionary + */ + _addOldIndexTags(index, results, dictionary) { + const {tagMeta} = index; + if (typeof tagMeta !== 'object' || tagMeta === null) { return; } + for (const [name, value] of Object.entries(tagMeta)) { + const {category, order, notes, score} = value; + results.push({name, category, order, notes, score, dictionary}); + } + } + + /** + * @param {import('dictionary-importer').ArchiveFileMap} fileMap + * @param {import('dictionary-importer').QueryDetails} queryDetails + * @returns {import('dictionary-importer').QueryResult} + */ + _getArchiveFiles(fileMap, queryDetails) { + /** @type {import('dictionary-importer').QueryResult} */ + const results = new Map(); + for (const [name, value] of fileMap.entries()) { + for (const [fileType, fileNameFormat] of queryDetails.entries()) { + let entries = results.get(fileType); + if (typeof entries === 'undefined') { + entries = []; + results.set(fileType, entries); + } + + if (fileNameFormat.test(name)) { + entries.push(value); + break; + } + } + } + return results; + } + + /** + * @template [TEntry=unknown] + * @template [TResult=unknown] + * @param {import('@zip.js/zip.js').Entry[]} files + * @param {(entry: TEntry, dictionaryTitle: string) => TResult} convertEntry + * @param {import('dictionary-importer').CompiledSchemaName} schemaName + * @param {string} dictionaryTitle + * @returns {Promise} + */ + async _readFileSequence(files, convertEntry, schemaName, dictionaryTitle) { + const progressData = this._progressData; + let startIndex = 0; + + const results = []; + for (const file of files) { + const content = await this._getData(file, new TextWriter()); + const entries = /** @type {unknown} */ (parseJson(content)); + + startIndex = progressData.index; + this._progress(); + + const schema = ajvSchemas[schemaName]; + if (!schema(entries)) { + throw this._formatAjvSchemaError(schema, file.filename); + } + + progressData.index = startIndex + 1; + this._progress(); + + if (Array.isArray(entries)) { + for (const entry of entries) { + results.push(convertEntry(/** @type {TEntry} */ (entry), dictionaryTitle)); + } + } + } + return results; + } + + /** + * @param {import('dictionary-database').DatabaseTermMeta[]|import('dictionary-database').DatabaseKanjiMeta[]} metaList + * @returns {import('dictionary-importer').SummaryMetaCount} + */ + _getMetaCounts(metaList) { + /** @type {Map} */ + const countsMap = new Map(); + for (const {mode} of metaList) { + let count = countsMap.get(mode); + count = typeof count !== 'undefined' ? count + 1 : 1; + countsMap.set(mode, count); + } + /** @type {import('dictionary-importer').SummaryMetaCount} */ + const counts = {total: metaList.length}; + for (const [key, value] of countsMap.entries()) { + if (Object.prototype.hasOwnProperty.call(counts, key)) { continue; } + counts[key] = value; + } + return counts; + } + + /** + * @param {string} text + * @returns {string} + */ + _normalizeTermOrReading(text) { + // Note: this function should not perform String.normalize on the text, + // as it will normalize characters in an undesirable way. + // Thus, this function is currently a no-op. + // Example: + // - '\u9038'.normalize('NFC') => '\u9038' (逸) + // - '\ufa67'.normalize('NFC') => '\u9038' (逸 => 逸) + return text; + } + + /** + * @template [T=unknown] + * @param {import('@zip.js/zip.js').Entry} entry + * @param {import('@zip.js/zip.js').Writer|import('@zip.js/zip.js').WritableWriter} writer + * @returns {Promise} + */ + async _getData(entry, writer) { + if (typeof entry.getData === 'undefined') { + throw new Error(`Cannot read ${entry.filename}`); + } + return await entry.getData(writer); + } +} diff --git a/ext/js/dictionary/dictionary-worker-handler.js b/ext/js/dictionary/dictionary-worker-handler.js new file mode 100644 index 00000000..9a724386 --- /dev/null +++ b/ext/js/dictionary/dictionary-worker-handler.js @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2021-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {ExtensionError} from '../core/extension-error.js'; +import {DictionaryDatabase} from './dictionary-database.js'; +import {DictionaryImporter} from './dictionary-importer.js'; +import {DictionaryWorkerMediaLoader} from './dictionary-worker-media-loader.js'; + +export class DictionaryWorkerHandler { + constructor() { + /** @type {DictionaryWorkerMediaLoader} */ + this._mediaLoader = new DictionaryWorkerMediaLoader(); + } + + /** */ + prepare() { + self.addEventListener('message', this._onMessage.bind(this), false); + } + + // Private + + /** + * @param {MessageEvent} event + */ + _onMessage(event) { + const {action, params} = event.data; + switch (action) { + case 'importDictionary': + this._onMessageWithProgress(params, this._importDictionary.bind(this)); + break; + case 'deleteDictionary': + this._onMessageWithProgress(params, this._deleteDictionary.bind(this)); + break; + case 'getDictionaryCounts': + this._onMessageWithProgress(params, this._getDictionaryCounts.bind(this)); + break; + case 'getImageDetails.response': + this._mediaLoader.handleMessage(params); + break; + } + } + + /** + * @template [T=unknown] + * @param {T} params + * @param {(details: T, onProgress: import('dictionary-worker-handler').OnProgressCallback) => Promise} handler + */ + async _onMessageWithProgress(params, handler) { + /** + * @param {...unknown} args + */ + const onProgress = (...args) => { + self.postMessage({ + action: 'progress', + params: {args} + }); + }; + let response; + try { + const result = await handler(params, onProgress); + response = {result}; + } catch (e) { + response = {error: ExtensionError.serialize(e)}; + } + self.postMessage({action: 'complete', params: response}); + } + + /** + * @param {import('dictionary-worker-handler').ImportDictionaryMessageParams} details + * @param {import('dictionary-worker-handler').OnProgressCallback} onProgress + * @returns {Promise} + */ + async _importDictionary({details, archiveContent}, onProgress) { + const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); + try { + const dictionaryImporter = new DictionaryImporter(this._mediaLoader, onProgress); + const {result, errors} = await dictionaryImporter.importDictionary(dictionaryDatabase, archiveContent, details); + return { + result, + errors: errors.map((error) => ExtensionError.serialize(error)) + }; + } finally { + dictionaryDatabase.close(); + } + } + + /** + * @param {import('dictionary-worker-handler').DeleteDictionaryMessageParams} details + * @param {import('dictionary-database').DeleteDictionaryProgressCallback} onProgress + * @returns {Promise} + */ + async _deleteDictionary({dictionaryTitle}, onProgress) { + const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); + try { + return await dictionaryDatabase.deleteDictionary(dictionaryTitle, 1000, onProgress); + } finally { + dictionaryDatabase.close(); + } + } + + /** + * @param {import('dictionary-worker-handler').GetDictionaryCountsMessageParams} details + * @returns {Promise} + */ + async _getDictionaryCounts({dictionaryNames, getTotal}) { + const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); + try { + return await dictionaryDatabase.getDictionaryCounts(dictionaryNames, getTotal); + } finally { + dictionaryDatabase.close(); + } + } + + /** + * @returns {Promise} + */ + async _getPreparedDictionaryDatabase() { + const dictionaryDatabase = new DictionaryDatabase(); + await dictionaryDatabase.prepare(); + return dictionaryDatabase; + } +} diff --git a/ext/js/dictionary/dictionary-worker-main.js b/ext/js/dictionary/dictionary-worker-main.js new file mode 100644 index 00000000..8ae283b8 --- /dev/null +++ b/ext/js/dictionary/dictionary-worker-main.js @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2021-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {log} from '../core.js'; +import {DictionaryWorkerHandler} from './dictionary-worker-handler.js'; + +/** Entry point. */ +function main() { + try { + const dictionaryWorkerHandler = new DictionaryWorkerHandler(); + dictionaryWorkerHandler.prepare(); + } catch (e) { + log.error(e); + } +} + +main(); diff --git a/ext/js/dictionary/dictionary-worker-media-loader.js b/ext/js/dictionary/dictionary-worker-media-loader.js new file mode 100644 index 00000000..e19a13d3 --- /dev/null +++ b/ext/js/dictionary/dictionary-worker-media-loader.js @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2021-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {generateId} from '../core.js'; +import {ExtensionError} from '../core/extension-error.js'; + +/** + * Class used for loading and validating media from a worker thread + * during the dictionary import process. + */ +export class DictionaryWorkerMediaLoader { + /** + * Creates a new instance of the media loader. + */ + constructor() { + /** @type {Map void, reject: (reason?: import('core').RejectionReason) => void}>} */ + this._requests = new Map(); + } + + /** + * Handles a response message posted to the worker thread. + * @param {import('dictionary-worker-media-loader').HandleMessageParams} params Details of the response. + */ + handleMessage(params) { + const {id} = params; + const request = this._requests.get(id); + if (typeof request === 'undefined') { return; } + this._requests.delete(id); + const {error} = params; + if (typeof error !== 'undefined') { + request.reject(ExtensionError.deserialize(error)); + } else { + request.resolve(params.result); + } + } + + /** @type {import('dictionary-importer-media-loader').GetImageDetailsFunction} */ + getImageDetails(content, mediaType) { + return new Promise((resolve, reject) => { + const id = generateId(16); + this._requests.set(id, {resolve, reject}); + // This is executed in a Worker context, so the self needs to be force cast + /** @type {Worker} */ (/** @type {unknown} */ (self)).postMessage({ + action: 'getImageDetails', + params: {id, content, mediaType} + }, [content]); + }); + } +} diff --git a/ext/js/dictionary/dictionary-worker.js b/ext/js/dictionary/dictionary-worker.js new file mode 100644 index 00000000..669c65ac --- /dev/null +++ b/ext/js/dictionary/dictionary-worker.js @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2023 Yomitan Authors + * Copyright (C) 2021-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {ExtensionError} from '../core/extension-error.js'; +import {DictionaryImporterMediaLoader} from './dictionary-importer-media-loader.js'; + +export class DictionaryWorker { + constructor() { + /** @type {DictionaryImporterMediaLoader} */ + this._dictionaryImporterMediaLoader = new DictionaryImporterMediaLoader(); + } + + /** + * @param {ArrayBuffer} archiveContent + * @param {import('dictionary-importer').ImportDetails} details + * @param {?import('dictionary-worker').ImportProgressCallback} onProgress + * @returns {Promise} + */ + importDictionary(archiveContent, details, onProgress) { + return this._invoke( + 'importDictionary', + {details, archiveContent}, + [archiveContent], + onProgress, + this._formatImportDictionaryResult.bind(this) + ); + } + + /** + * @param {string} dictionaryTitle + * @param {?import('dictionary-worker').DeleteProgressCallback} onProgress + * @returns {Promise} + */ + deleteDictionary(dictionaryTitle, onProgress) { + return this._invoke('deleteDictionary', {dictionaryTitle}, [], onProgress, null); + } + + /** + * @param {string[]} dictionaryNames + * @param {boolean} getTotal + * @returns {Promise} + */ + getDictionaryCounts(dictionaryNames, getTotal) { + return this._invoke('getDictionaryCounts', {dictionaryNames, getTotal}, [], null, null); + } + + // Private + + /** + * @template [TParams=import('core').SerializableObject] + * @template [TResponseRaw=unknown] + * @template [TResponse=unknown] + * @param {string} action + * @param {TParams} params + * @param {Transferable[]} transfer + * @param {?(arg: import('core').SafeAny) => void} onProgress + * @param {?(result: TResponseRaw) => TResponse} formatResult + */ + _invoke(action, params, transfer, onProgress, formatResult) { + return new Promise((resolve, reject) => { + const worker = new Worker('/js/dictionary/dictionary-worker-main.js', {type: 'module'}); + /** @type {import('dictionary-worker').InvokeDetails} */ + const details = { + complete: false, + worker, + resolve, + reject, + onMessage: null, + onProgress, + formatResult + }; + // Ugly typecast below due to not being able to explicitly state the template types + /** @type {(event: MessageEvent>) => void} */ + const onMessage = /** @type {(details: import('dictionary-worker').InvokeDetails, event: MessageEvent>) => void} */ (this._onMessage).bind(this, details); + details.onMessage = onMessage; + worker.addEventListener('message', onMessage); + worker.postMessage({action, params}, transfer); + }); + } + + /** + * @template [TResponseRaw=unknown] + * @template [TResponse=unknown] + * @param {import('dictionary-worker').InvokeDetails} details + * @param {MessageEvent>} event + */ + _onMessage(details, event) { + if (details.complete) { return; } + const {action, params} = event.data; + switch (action) { + case 'complete': + { + const {worker, resolve, reject, onMessage, formatResult} = details; + if (worker === null || onMessage === null || resolve === null || reject === null) { return; } + details.complete = true; + details.worker = null; + details.resolve = null; + details.reject = null; + details.onMessage = null; + details.onProgress = null; + details.formatResult = null; + worker.removeEventListener('message', onMessage); + worker.terminate(); + this._onMessageComplete(params, resolve, reject, formatResult); + } + break; + case 'progress': + this._onMessageProgress(params, details.onProgress); + break; + case 'getImageDetails': + { + const {worker} = details; + if (worker === null) { return; } + this._onMessageGetImageDetails(params, worker); + } + break; + } + } + + /** + * @template [TResponseRaw=unknown] + * @template [TResponse=unknown] + * @param {import('dictionary-worker').MessageCompleteParams} params + * @param {(result: TResponse) => void} resolve + * @param {(reason?: import('core').RejectionReason) => void} reject + * @param {?(result: TResponseRaw) => TResponse} formatResult + */ + _onMessageComplete(params, resolve, reject, formatResult) { + const {error} = params; + if (typeof error !== 'undefined') { + reject(ExtensionError.deserialize(error)); + } else { + const {result} = params; + if (typeof formatResult === 'function') { + let result2; + try { + result2 = formatResult(result); + } catch (e) { + reject(e); + return; + } + resolve(result2); + } else { + // If formatResult is not provided, the response is assumed to be the same type + // For some reason, eslint thinks the TResponse type is undefined + // eslint-disable-next-line jsdoc/no-undefined-types + resolve(/** @type {TResponse} */ (/** @type {unknown} */ (result))); + } + } + } + + /** + * @param {import('dictionary-worker').MessageProgressParams} params + * @param {?(...args: unknown[]) => void} onProgress + */ + _onMessageProgress(params, onProgress) { + if (typeof onProgress !== 'function') { return; } + const {args} = params; + onProgress(...args); + } + + /** + * @param {import('dictionary-worker').MessageGetImageDetailsParams} params + * @param {Worker} worker + */ + async _onMessageGetImageDetails(params, worker) { + const {id, content, mediaType} = params; + /** @type {Transferable[]} */ + const transfer = []; + let response; + try { + const result = await this._dictionaryImporterMediaLoader.getImageDetails(content, mediaType, transfer); + response = {id, result}; + } catch (e) { + response = {id, error: ExtensionError.serialize(e)}; + } + worker.postMessage({action: 'getImageDetails.response', params: response}, transfer); + } + + /** + * @param {import('dictionary-worker').MessageCompleteResultSerialized} response + * @returns {import('dictionary-worker').MessageCompleteResult} + */ + _formatImportDictionaryResult(response) { + const {result, errors} = response; + return { + result, + errors: errors.map((error) => ExtensionError.deserialize(error)) + }; + } +} diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js index 87b02f8a..920d2c94 100644 --- a/ext/js/display/display-generator.js +++ b/ext/js/display/display-generator.js @@ -18,8 +18,8 @@ import {isObject} from '../core.js'; import {ExtensionError} from '../core/extension-error.js'; +import {DictionaryDataUtil} from '../dictionary/dictionary-data-util.js'; import {HtmlTemplateCollection} from '../dom/html-template-collection.js'; -import {DictionaryDataUtil} from '../language/sandbox/dictionary-data-util.js'; import {yomitan} from '../yomitan.js'; import {PronunciationGenerator} from './sandbox/pronunciation-generator.js'; import {StructuredContentGenerator} from './sandbox/structured-content-generator.js'; diff --git a/ext/js/language/dictionary-database.js b/ext/js/language/dictionary-database.js deleted file mode 100644 index 45c5c6fd..00000000 --- a/ext/js/language/dictionary-database.js +++ /dev/null @@ -1,661 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2016-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -import {log, stringReverse} from '../core.js'; -import {Database} from '../data/database.js'; - -export class DictionaryDatabase { - constructor() { - /** @type {Database} */ - this._db = new Database(); - /** @type {string} */ - this._dbName = 'dict'; - /** @type {import('dictionary-database').CreateQuery} */ - this._createOnlyQuery1 = (item) => IDBKeyRange.only(item); - /** @type {import('dictionary-database').CreateQuery} */ - this._createOnlyQuery2 = (item) => IDBKeyRange.only(item.query); - /** @type {import('dictionary-database').CreateQuery} */ - this._createOnlyQuery3 = (item) => IDBKeyRange.only(item.term); - /** @type {import('dictionary-database').CreateQuery} */ - this._createOnlyQuery4 = (item) => IDBKeyRange.only(item.path); - /** @type {import('dictionary-database').CreateQuery} */ - this._createBoundQuery1 = (item) => IDBKeyRange.bound(item, `${item}\uffff`, false, false); - /** @type {import('dictionary-database').CreateQuery} */ - this._createBoundQuery2 = (item) => { item = stringReverse(item); return IDBKeyRange.bound(item, `${item}\uffff`, false, false); }; - /** @type {import('dictionary-database').CreateResult} */ - this._createTermBind1 = this._createTermExact.bind(this); - /** @type {import('dictionary-database').CreateResult} */ - this._createTermBind2 = this._createTermSequenceExact.bind(this); - /** @type {import('dictionary-database').CreateResult} */ - this._createTermMetaBind = this._createTermMeta.bind(this); - /** @type {import('dictionary-database').CreateResult} */ - this._createKanjiBind = this._createKanji.bind(this); - /** @type {import('dictionary-database').CreateResult} */ - this._createKanjiMetaBind = this._createKanjiMeta.bind(this); - /** @type {import('dictionary-database').CreateResult} */ - this._createMediaBind = this._createMedia.bind(this); - } - - /** */ - async prepare() { - await this._db.open( - this._dbName, - 60, - /** @type {import('database').StructureDefinition[]} */ - ([ - /** @type {import('database').StructureDefinition} */ - ({ - version: 20, - stores: { - terms: { - primaryKey: {keyPath: 'id', autoIncrement: true}, - indices: ['dictionary', 'expression', 'reading'] - }, - kanji: { - primaryKey: {autoIncrement: true}, - indices: ['dictionary', 'character'] - }, - tagMeta: { - primaryKey: {autoIncrement: true}, - indices: ['dictionary'] - }, - dictionaries: { - primaryKey: {autoIncrement: true}, - indices: ['title', 'version'] - } - } - }), - { - version: 30, - stores: { - termMeta: { - primaryKey: {autoIncrement: true}, - indices: ['dictionary', 'expression'] - }, - kanjiMeta: { - primaryKey: {autoIncrement: true}, - indices: ['dictionary', 'character'] - }, - tagMeta: { - primaryKey: {autoIncrement: true}, - indices: ['dictionary', 'name'] - } - } - }, - { - version: 40, - stores: { - terms: { - primaryKey: {keyPath: 'id', autoIncrement: true}, - indices: ['dictionary', 'expression', 'reading', 'sequence'] - } - } - }, - { - version: 50, - stores: { - terms: { - primaryKey: {keyPath: 'id', autoIncrement: true}, - indices: ['dictionary', 'expression', 'reading', 'sequence', 'expressionReverse', 'readingReverse'] - } - } - }, - { - version: 60, - stores: { - media: { - primaryKey: {keyPath: 'id', autoIncrement: true}, - indices: ['dictionary', 'path'] - } - } - } - ]) - ); - } - - /** */ - async close() { - this._db.close(); - } - - /** - * @returns {boolean} - */ - isPrepared() { - return this._db.isOpen(); - } - - /** - * @returns {Promise} - */ - async purge() { - if (this._db.isOpening()) { - throw new Error('Cannot purge database while opening'); - } - if (this._db.isOpen()) { - this._db.close(); - } - let result = false; - try { - await Database.deleteDatabase(this._dbName); - result = true; - } catch (e) { - log.error(e); - } - await this.prepare(); - return result; - } - - /** - * @param {string} dictionaryName - * @param {number} progressRate - * @param {import('dictionary-database').DeleteDictionaryProgressCallback} onProgress - */ - async deleteDictionary(dictionaryName, progressRate, onProgress) { - /** @type {[objectStoreName: import('dictionary-database').ObjectStoreName, key: string][][]} */ - const targetGroups = [ - [ - ['kanji', 'dictionary'], - ['kanjiMeta', 'dictionary'], - ['terms', 'dictionary'], - ['termMeta', 'dictionary'], - ['tagMeta', 'dictionary'], - ['media', 'dictionary'] - ], - [ - ['dictionaries', 'title'] - ] - ]; - - let storeCount = 0; - for (const targets of targetGroups) { - storeCount += targets.length; - } - - /** @type {import('dictionary-database').DeleteDictionaryProgressData} */ - const progressData = { - count: 0, - processed: 0, - storeCount, - storesProcesed: 0 - }; - - /** - * @param {IDBValidKey[]} keys - * @returns {IDBValidKey[]} - */ - const filterKeys = (keys) => { - ++progressData.storesProcesed; - progressData.count += keys.length; - onProgress(progressData); - return keys; - }; - const onProgressWrapper = () => { - const processed = progressData.processed + 1; - progressData.processed = processed; - if ((processed % progressRate) === 0 || processed === progressData.count) { - onProgress(progressData); - } - }; - - for (const targets of targetGroups) { - const promises = []; - for (const [objectStoreName, indexName] of targets) { - const query = IDBKeyRange.only(dictionaryName); - const promise = this._db.bulkDelete(objectStoreName, indexName, query, filterKeys, onProgressWrapper); - promises.push(promise); - } - await Promise.all(promises); - } - } - - /** - * @param {string[]} termList - * @param {import('dictionary-database').DictionarySet} dictionaries - * @param {import('dictionary-database').MatchType} matchType - * @returns {Promise} - */ - findTermsBulk(termList, dictionaries, matchType) { - const visited = new Set(); - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row) => { - if (!dictionaries.has(row.dictionary)) { return false; } - const {id} = row; - if (visited.has(id)) { return false; } - visited.add(id); - return true; - }; - - const indexNames = (matchType === 'suffix') ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; - - let createQuery = this._createOnlyQuery1; - switch (matchType) { - case 'prefix': - createQuery = this._createBoundQuery1; - break; - case 'suffix': - createQuery = this._createBoundQuery2; - break; - } - - const createResult = this._createTermGeneric.bind(this, matchType); - - return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, createResult); - } - - /** - * @param {import('dictionary-database').TermExactRequest[]} termList - * @param {import('dictionary-database').DictionarySet} dictionaries - * @returns {Promise} - */ - findTermsExactBulk(termList, dictionaries) { - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row, item) => (row.reading === item.reading && dictionaries.has(row.dictionary)); - return this._findMultiBulk('terms', ['expression'], termList, this._createOnlyQuery3, predicate, this._createTermBind1); - } - - /** - * @param {import('dictionary-database').DictionaryAndQueryRequest[]} items - * @returns {Promise} - */ - findTermsBySequenceBulk(items) { - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row, item) => (row.dictionary === item.dictionary); - return this._findMultiBulk('terms', ['sequence'], items, this._createOnlyQuery2, predicate, this._createTermBind2); - } - - /** - * @param {string[]} termList - * @param {import('dictionary-database').DictionarySet} dictionaries - * @returns {Promise} - */ - findTermMetaBulk(termList, dictionaries) { - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row) => dictionaries.has(row.dictionary); - return this._findMultiBulk('termMeta', ['expression'], termList, this._createOnlyQuery1, predicate, this._createTermMetaBind); - } - - /** - * @param {string[]} kanjiList - * @param {import('dictionary-database').DictionarySet} dictionaries - * @returns {Promise} - */ - findKanjiBulk(kanjiList, dictionaries) { - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row) => dictionaries.has(row.dictionary); - return this._findMultiBulk('kanji', ['character'], kanjiList, this._createOnlyQuery1, predicate, this._createKanjiBind); - } - - /** - * @param {string[]} kanjiList - * @param {import('dictionary-database').DictionarySet} dictionaries - * @returns {Promise} - */ - findKanjiMetaBulk(kanjiList, dictionaries) { - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row) => dictionaries.has(row.dictionary); - return this._findMultiBulk('kanjiMeta', ['character'], kanjiList, this._createOnlyQuery1, predicate, this._createKanjiMetaBind); - } - - /** - * @param {import('dictionary-database').DictionaryAndQueryRequest[]} items - * @returns {Promise<(import('dictionary-database').Tag|undefined)[]>} - */ - findTagMetaBulk(items) { - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row, item) => (row.dictionary === item.dictionary); - return this._findFirstBulk('tagMeta', 'name', items, this._createOnlyQuery2, predicate); - } - - /** - * @param {string} name - * @param {string} dictionary - * @returns {Promise} - */ - findTagForTitle(name, dictionary) { - const query = IDBKeyRange.only(name); - return this._db.find('tagMeta', 'name', query, (row) => (/** @type {import('dictionary-database').Tag} */ (row).dictionary === dictionary), null, null); - } - - /** - * @param {import('dictionary-database').MediaRequest[]} items - * @returns {Promise} - */ - getMedia(items) { - /** @type {import('dictionary-database').FindPredicate} */ - const predicate = (row, item) => (row.dictionary === item.dictionary); - return this._findMultiBulk('media', ['path'], items, this._createOnlyQuery4, predicate, this._createMediaBind); - } - - /** - * @returns {Promise} - */ - getDictionaryInfo() { - return new Promise((resolve, reject) => { - const transaction = this._db.transaction(['dictionaries'], 'readonly'); - const objectStore = transaction.objectStore('dictionaries'); - this._db.getAll(objectStore, null, resolve, reject, null); - }); - } - - /** - * @param {string[]} dictionaryNames - * @param {boolean} getTotal - * @returns {Promise} - */ - getDictionaryCounts(dictionaryNames, getTotal) { - return new Promise((resolve, reject) => { - const targets = [ - ['kanji', 'dictionary'], - ['kanjiMeta', 'dictionary'], - ['terms', 'dictionary'], - ['termMeta', 'dictionary'], - ['tagMeta', 'dictionary'], - ['media', 'dictionary'] - ]; - const objectStoreNames = targets.map(([objectStoreName]) => objectStoreName); - const transaction = this._db.transaction(objectStoreNames, 'readonly'); - const databaseTargets = targets.map(([objectStoreName, indexName]) => { - const objectStore = transaction.objectStore(objectStoreName); - const index = objectStore.index(indexName); - return {objectStore, index}; - }); - - /** @type {import('database').CountTarget[]} */ - const countTargets = []; - if (getTotal) { - for (const {objectStore} of databaseTargets) { - countTargets.push([objectStore, void 0]); - } - } - for (const dictionaryName of dictionaryNames) { - const query = IDBKeyRange.only(dictionaryName); - for (const {index} of databaseTargets) { - countTargets.push([index, query]); - } - } - - /** - * @param {number[]} results - */ - const onCountComplete = (results) => { - const resultCount = results.length; - const targetCount = targets.length; - /** @type {import('dictionary-database').DictionaryCountGroup[]} */ - const counts = []; - for (let i = 0; i < resultCount; i += targetCount) { - /** @type {import('dictionary-database').DictionaryCountGroup} */ - const countGroup = {}; - for (let j = 0; j < targetCount; ++j) { - countGroup[targets[j][0]] = results[i + j]; - } - counts.push(countGroup); - } - const total = getTotal ? /** @type {import('dictionary-database').DictionaryCountGroup} */ (counts.shift()) : null; - resolve({total, counts}); - }; - - this._db.bulkCount(countTargets, onCountComplete, reject); - }); - } - - /** - * @param {string} title - * @returns {Promise} - */ - async dictionaryExists(title) { - const query = IDBKeyRange.only(title); - const result = await this._db.find('dictionaries', 'title', query, null, null, void 0); - return typeof result !== 'undefined'; - } - - /** - * @template {import('dictionary-database').ObjectStoreName} T - * @param {T} objectStoreName - * @param {import('dictionary-database').ObjectStoreData[]} items - * @param {number} start - * @param {number} count - * @returns {Promise} - */ - bulkAdd(objectStoreName, items, start, count) { - return this._db.bulkAdd(objectStoreName, items, start, count); - } - - // Private - - /** - * @template [TRow=unknown] - * @template [TItem=unknown] - * @template [TResult=unknown] - * @param {import('dictionary-database').ObjectStoreName} objectStoreName - * @param {string[]} indexNames - * @param {TItem[]} items - * @param {import('dictionary-database').CreateQuery} createQuery - * @param {import('dictionary-database').FindPredicate} predicate - * @param {import('dictionary-database').CreateResult} createResult - * @returns {Promise} - */ - _findMultiBulk(objectStoreName, indexNames, items, createQuery, predicate, createResult) { - return new Promise((resolve, reject) => { - const itemCount = items.length; - const indexCount = indexNames.length; - /** @type {TResult[]} */ - const results = []; - if (itemCount === 0 || indexCount === 0) { - resolve(results); - return; - } - - const transaction = this._db.transaction([objectStoreName], 'readonly'); - const objectStore = transaction.objectStore(objectStoreName); - const indexList = []; - for (const indexName of indexNames) { - indexList.push(objectStore.index(indexName)); - } - let completeCount = 0; - const requiredCompleteCount = itemCount * indexCount; - /** - * @param {TRow[]} rows - * @param {import('dictionary-database').FindMultiBulkData} data - */ - const onGetAll = (rows, data) => { - for (const row of rows) { - if (predicate(row, data.item)) { - results.push(createResult(row, data)); - } - } - if (++completeCount >= requiredCompleteCount) { - resolve(results); - } - }; - for (let i = 0; i < itemCount; ++i) { - const item = items[i]; - const query = createQuery(item); - for (let j = 0; j < indexCount; ++j) { - /** @type {import('dictionary-database').FindMultiBulkData} */ - const data = {item, itemIndex: i, indexIndex: j}; - this._db.getAll(indexList[j], query, onGetAll, reject, data); - } - } - }); - } - - /** - * @template [TRow=unknown] - * @template [TItem=unknown] - * @param {import('dictionary-database').ObjectStoreName} objectStoreName - * @param {string} indexName - * @param {TItem[]} items - * @param {import('dictionary-database').CreateQuery} createQuery - * @param {import('dictionary-database').FindPredicate} predicate - * @returns {Promise<(TRow|undefined)[]>} - */ - _findFirstBulk(objectStoreName, indexName, items, createQuery, predicate) { - return new Promise((resolve, reject) => { - const itemCount = items.length; - /** @type {(TRow|undefined)[]} */ - const results = new Array(itemCount); - if (itemCount === 0) { - resolve(results); - return; - } - - const transaction = this._db.transaction([objectStoreName], 'readonly'); - const objectStore = transaction.objectStore(objectStoreName); - const index = objectStore.index(indexName); - let completeCount = 0; - /** - * @param {TRow|undefined} row - * @param {number} itemIndex - */ - const onFind = (row, itemIndex) => { - results[itemIndex] = row; - if (++completeCount >= itemCount) { - resolve(results); - } - }; - for (let i = 0; i < itemCount; ++i) { - const item = items[i]; - const query = createQuery(item); - this._db.findFirst(index, query, onFind, reject, i, predicate, item, void 0); - } - }); - } - - /** - * @param {import('dictionary-database').MatchType} matchType - * @param {import('dictionary-database').DatabaseTermEntryWithId} row - * @param {import('dictionary-database').FindMultiBulkData} data - * @returns {import('dictionary-database').TermEntry} - */ - _createTermGeneric(matchType, row, data) { - const matchSourceIsTerm = (data.indexIndex === 0); - const matchSource = (matchSourceIsTerm ? 'term' : 'reading'); - if ((matchSourceIsTerm ? row.expression : row.reading) === data.item) { - matchType = 'exact'; - } - return this._createTerm(matchSource, matchType, row, data.itemIndex); - } - - /** - * @param {import('dictionary-database').DatabaseTermEntryWithId} row - * @param {import('dictionary-database').FindMultiBulkData} data - * @returns {import('dictionary-database').TermEntry} - */ - _createTermExact(row, data) { - return this._createTerm('term', 'exact', row, data.itemIndex); - } - - /** - * @param {import('dictionary-database').DatabaseTermEntryWithId} row - * @param {import('dictionary-database').FindMultiBulkData} data - * @returns {import('dictionary-database').TermEntry} - */ - _createTermSequenceExact(row, data) { - return this._createTerm('sequence', 'exact', row, data.itemIndex); - } - - /** - * @param {import('dictionary-database').MatchSource} matchSource - * @param {import('dictionary-database').MatchType} matchType - * @param {import('dictionary-database').DatabaseTermEntryWithId} row - * @param {number} index - * @returns {import('dictionary-database').TermEntry} - */ - _createTerm(matchSource, matchType, row, index) { - const {sequence} = row; - return { - index, - matchType, - matchSource, - term: row.expression, - reading: row.reading, - definitionTags: this._splitField(row.definitionTags || row.tags), - termTags: this._splitField(row.termTags), - rules: this._splitField(row.rules), - definitions: row.glossary, - score: row.score, - dictionary: row.dictionary, - id: row.id, - sequence: typeof sequence === 'number' ? sequence : -1 - }; - } - - /** - * @param {import('dictionary-database').DatabaseKanjiEntry} row - * @param {import('dictionary-database').FindMultiBulkData} data - * @returns {import('dictionary-database').KanjiEntry} - */ - _createKanji(row, {itemIndex: index}) { - const {stats} = row; - return { - index, - character: row.character, - onyomi: this._splitField(row.onyomi), - kunyomi: this._splitField(row.kunyomi), - tags: this._splitField(row.tags), - definitions: row.meanings, - stats: typeof stats === 'object' && stats !== null ? stats : {}, - dictionary: row.dictionary - }; - } - - /** - * @param {import('dictionary-database').DatabaseTermMeta} row - * @param {import('dictionary-database').FindMultiBulkData} data - * @returns {import('dictionary-database').TermMeta} - * @throws {Error} - */ - _createTermMeta({expression: term, mode, data, dictionary}, {itemIndex: index}) { - switch (mode) { - case 'freq': - return {index, term, mode, data, dictionary}; - case 'pitch': - return {index, term, mode, data, dictionary}; - default: - throw new Error(`Unknown mode: ${mode}`); - } - } - - /** - * @param {import('dictionary-database').DatabaseKanjiMeta} row - * @param {import('dictionary-database').FindMultiBulkData} data - * @returns {import('dictionary-database').KanjiMeta} - */ - _createKanjiMeta({character, mode, data, dictionary}, {itemIndex: index}) { - return {index, character, mode, data, dictionary}; - } - - /** - * @param {import('dictionary-database').MediaDataArrayBufferContent} row - * @param {import('dictionary-database').FindMultiBulkData} data - * @returns {import('dictionary-database').Media} - */ - _createMedia(row, {itemIndex: index}) { - const {dictionary, path, mediaType, width, height, content} = row; - return {index, dictionary, path, mediaType, width, height, content}; - } - - /** - * @param {unknown} field - * @returns {string[]} - */ - _splitField(field) { - return typeof field === 'string' && field.length > 0 ? field.split(' ') : []; - } -} diff --git a/ext/js/language/dictionary-importer-media-loader.js b/ext/js/language/dictionary-importer-media-loader.js deleted file mode 100644 index a5857dce..00000000 --- a/ext/js/language/dictionary-importer-media-loader.js +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2021-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -import {EventListenerCollection} from '../core.js'; - -/** - * Class used for loading and validating media during the dictionary import process. - */ -export class DictionaryImporterMediaLoader { - /** @type {import('dictionary-importer-media-loader').GetImageDetailsFunction} */ - getImageDetails(content, mediaType, transfer) { - return new Promise((resolve, reject) => { - const image = new Image(); - const eventListeners = new EventListenerCollection(); - const cleanup = () => { - image.removeAttribute('src'); - URL.revokeObjectURL(url); - eventListeners.removeAllEventListeners(); - }; - eventListeners.addEventListener(image, 'load', () => { - const {naturalWidth: width, naturalHeight: height} = image; - if (Array.isArray(transfer)) { transfer.push(content); } - cleanup(); - resolve({content, width, height}); - }, false); - eventListeners.addEventListener(image, 'error', () => { - cleanup(); - reject(new Error('Image failed to load')); - }, false); - const blob = new Blob([content], {type: mediaType}); - const url = URL.createObjectURL(blob); - image.src = url; - }); - } -} diff --git a/ext/js/language/dictionary-importer.js b/ext/js/language/dictionary-importer.js deleted file mode 100644 index 2c0c7e9c..00000000 --- a/ext/js/language/dictionary-importer.js +++ /dev/null @@ -1,796 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2020-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -import * as ajvSchemas0 from '../../lib/validate-schemas.js'; -import { - BlobWriter as BlobWriter0, - TextWriter as TextWriter0, - Uint8ArrayReader as Uint8ArrayReader0, - ZipReader as ZipReader0, - configure -} from '../../lib/zip.js'; -import {stringReverse} from '../core.js'; -import {ExtensionError} from '../core/extension-error.js'; -import {parseJson} from '../core/json.js'; -import {MediaUtil} from '../media/media-util.js'; - -const ajvSchemas = /** @type {import('dictionary-importer').CompiledSchemaValidators} */ (/** @type {unknown} */ (ajvSchemas0)); -const BlobWriter = /** @type {typeof import('@zip.js/zip.js').BlobWriter} */ (/** @type {unknown} */ (BlobWriter0)); -const TextWriter = /** @type {typeof import('@zip.js/zip.js').TextWriter} */ (/** @type {unknown} */ (TextWriter0)); -const Uint8ArrayReader = /** @type {typeof import('@zip.js/zip.js').Uint8ArrayReader} */ (/** @type {unknown} */ (Uint8ArrayReader0)); -const ZipReader = /** @type {typeof import('@zip.js/zip.js').ZipReader} */ (/** @type {unknown} */ (ZipReader0)); - -export class DictionaryImporter { - /** - * @param {import('dictionary-importer-media-loader').GenericMediaLoader} mediaLoader - * @param {import('dictionary-importer').OnProgressCallback} [onProgress] - */ - constructor(mediaLoader, onProgress) { - /** @type {import('dictionary-importer-media-loader').GenericMediaLoader} */ - this._mediaLoader = mediaLoader; - /** @type {import('dictionary-importer').OnProgressCallback} */ - this._onProgress = typeof onProgress === 'function' ? onProgress : () => {}; - /** @type {import('dictionary-importer').ProgressData} */ - this._progressData = this._createProgressData(); - } - - /** - * @param {import('./dictionary-database.js').DictionaryDatabase} dictionaryDatabase - * @param {ArrayBuffer} archiveContent - * @param {import('dictionary-importer').ImportDetails} details - * @returns {Promise} - */ - async importDictionary(dictionaryDatabase, archiveContent, details) { - if (!dictionaryDatabase) { - throw new Error('Invalid database'); - } - if (!dictionaryDatabase.isPrepared()) { - throw new Error('Database is not ready'); - } - - this._progressReset(); - - configure({ - workerScripts: { - deflate: ['../../lib/z-worker.js'], - inflate: ['../../lib/z-worker.js'] - } - }); - - // Read archive - const zipFileReader = new Uint8ArrayReader(new Uint8Array(archiveContent)); - const zipReader = new ZipReader(zipFileReader); - const zipEntries = await zipReader.getEntries(); - /** @type {import('dictionary-importer').ArchiveFileMap} */ - const fileMap = new Map(); - for (const entry of zipEntries) { - fileMap.set(entry.filename, entry); - } - // Read and validate index - const indexFileName = 'index.json'; - const indexFile = fileMap.get(indexFileName); - if (typeof indexFile === 'undefined') { - throw new Error('No dictionary index found in archive'); - } - const indexFile2 = /** @type {import('@zip.js/zip.js').Entry} */ (indexFile); - - const indexContent = await this._getData(indexFile2, new TextWriter()); - const index = /** @type {import('dictionary-data').Index} */ (parseJson(indexContent)); - - if (!ajvSchemas.dictionaryIndex(index)) { - throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName); - } - - const dictionaryTitle = index.title; - const version = typeof index.format === 'number' ? index.format : index.version; - - if (typeof version !== 'number' || !dictionaryTitle || !index.revision) { - throw new Error('Unrecognized dictionary format'); - } - - // Verify database is not already imported - if (await dictionaryDatabase.dictionaryExists(dictionaryTitle)) { - throw new Error('Dictionary is already imported'); - } - - // Load schemas - this._progressNextStep(0); - const dataBankSchemas = this._getDataBankSchemas(version); - - // Files - /** @type {import('dictionary-importer').QueryDetails} */ - const queryDetails = new Map([ - ['termFiles', /^term_bank_(\d+)\.json$/], - ['termMetaFiles', /^term_meta_bank_(\d+)\.json$/], - ['kanjiFiles', /^kanji_bank_(\d+)\.json$/], - ['kanjiMetaFiles', /^kanji_meta_bank_(\d+)\.json$/], - ['tagFiles', /^tag_bank_(\d+)\.json$/] - ]); - const {termFiles, termMetaFiles, kanjiFiles, kanjiMetaFiles, tagFiles} = Object.fromEntries(this._getArchiveFiles(fileMap, queryDetails)); - - // Load data - this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length); - const termList = await ( - version === 1 ? - this._readFileSequence(termFiles, this._convertTermBankEntryV1.bind(this), dataBankSchemas[0], dictionaryTitle) : - this._readFileSequence(termFiles, this._convertTermBankEntryV3.bind(this), dataBankSchemas[0], dictionaryTitle) - ); - const termMetaList = await this._readFileSequence(termMetaFiles, this._convertTermMetaBankEntry.bind(this), dataBankSchemas[1], dictionaryTitle); - const kanjiList = await ( - version === 1 ? - this._readFileSequence(kanjiFiles, this._convertKanjiBankEntryV1.bind(this), dataBankSchemas[2], dictionaryTitle) : - this._readFileSequence(kanjiFiles, this._convertKanjiBankEntryV3.bind(this), dataBankSchemas[2], dictionaryTitle) - ); - const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, this._convertKanjiMetaBankEntry.bind(this), dataBankSchemas[3], dictionaryTitle); - const tagList = await this._readFileSequence(tagFiles, this._convertTagBankEntry.bind(this), dataBankSchemas[4], dictionaryTitle); - this._addOldIndexTags(index, tagList, dictionaryTitle); - - // Prefix wildcard support - const prefixWildcardsSupported = !!details.prefixWildcardsSupported; - if (prefixWildcardsSupported) { - for (const entry of termList) { - entry.expressionReverse = stringReverse(entry.expression); - entry.readingReverse = stringReverse(entry.reading); - } - } - - // Extended data support - this._progressNextStep(termList.length); - const formatProgressInterval = 1000; - /** @type {import('dictionary-importer').ImportRequirement[]} */ - const requirements = []; - for (let i = 0, ii = termList.length; i < ii; ++i) { - const entry = termList[i]; - const glossaryList = entry.glossary; - for (let j = 0, jj = glossaryList.length; j < jj; ++j) { - const glossary = glossaryList[j]; - if (typeof glossary !== 'object' || glossary === null) { continue; } - glossaryList[j] = this._formatDictionaryTermGlossaryObject(glossary, entry, requirements); - } - if ((i % formatProgressInterval) === 0) { - this._progressData.index = i; - this._progress(); - } - } - this._progress(); - - // Async requirements - this._progressNextStep(requirements.length); - const {media} = await this._resolveAsyncRequirements(requirements, fileMap); - - // Add dictionary descriptor - this._progressNextStep(termList.length + termMetaList.length + kanjiList.length + kanjiMetaList.length + tagList.length + media.length); - - /** @type {import('dictionary-importer').SummaryCounts} */ - const counts = { - terms: {total: termList.length}, - termMeta: this._getMetaCounts(termMetaList), - kanji: {total: kanjiList.length}, - kanjiMeta: this._getMetaCounts(kanjiMetaList), - tagMeta: {total: tagList.length}, - media: {total: media.length} - }; - const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported, counts}); - dictionaryDatabase.bulkAdd('dictionaries', [summary], 0, 1); - - // Add data - /** @type {Error[]} */ - const errors = []; - const maxTransactionLength = 1000; - - /** - * @template {import('dictionary-database').ObjectStoreName} T - * @param {T} objectStoreName - * @param {import('dictionary-database').ObjectStoreData[]} entries - */ - const bulkAdd = async (objectStoreName, entries) => { - const ii = entries.length; - for (let i = 0; i < ii; i += maxTransactionLength) { - const count = Math.min(maxTransactionLength, ii - i); - - try { - await dictionaryDatabase.bulkAdd(objectStoreName, entries, i, count); - } catch (e) { - errors.push(e instanceof Error ? e : new Error(`${e}`)); - } - - this._progressData.index += count; - this._progress(); - } - }; - - await bulkAdd('terms', termList); - await bulkAdd('termMeta', termMetaList); - await bulkAdd('kanji', kanjiList); - await bulkAdd('kanjiMeta', kanjiMetaList); - await bulkAdd('tagMeta', tagList); - await bulkAdd('media', media); - - this._progress(); - - return {result: summary, errors}; - } - - /** - * @returns {import('dictionary-importer').ProgressData} - */ - _createProgressData() { - return { - stepIndex: 0, - stepCount: 6, - index: 0, - count: 0 - }; - } - - /** */ - _progressReset() { - this._progressData = this._createProgressData(); - this._progress(); - } - - /** - * @param {number} count - */ - _progressNextStep(count) { - ++this._progressData.stepIndex; - this._progressData.index = 0; - this._progressData.count = count; - this._progress(); - } - - /** */ - _progress() { - this._onProgress(this._progressData); - } - - /** - * @param {string} dictionaryTitle - * @param {number} version - * @param {import('dictionary-data').Index} index - * @param {{prefixWildcardsSupported: boolean, counts: import('dictionary-importer').SummaryCounts}} details - * @returns {import('dictionary-importer').Summary} - */ - _createSummary(dictionaryTitle, version, index, details) { - const indexSequenced = index.sequenced; - const {prefixWildcardsSupported, counts} = details; - - /** @type {import('dictionary-importer').Summary} */ - const summary = { - title: dictionaryTitle, - revision: index.revision, - sequenced: typeof indexSequenced === 'boolean' && indexSequenced, - version, - importDate: Date.now(), - prefixWildcardsSupported, - counts - }; - - const {author, url, description, attribution, frequencyMode} = index; - if (typeof author === 'string') { summary.author = author; } - if (typeof url === 'string') { summary.url = url; } - if (typeof description === 'string') { summary.description = description; } - if (typeof attribution === 'string') { summary.attribution = attribution; } - if (typeof frequencyMode === 'string') { summary.frequencyMode = frequencyMode; } - - return summary; - } - - /** - * @param {import('ajv').ValidateFunction} schema - * @param {string} fileName - * @returns {ExtensionError} - */ - _formatAjvSchemaError(schema, fileName) { - const e2 = new ExtensionError(`Dictionary has invalid data in '${fileName}'`); - e2.data = schema.errors; - - return e2; - } - - /** - * @param {number} version - * @returns {import('dictionary-importer').CompiledSchemaNameArray} - */ - _getDataBankSchemas(version) { - const termBank = ( - version === 1 ? - 'dictionaryTermBankV1' : - 'dictionaryTermBankV3' - ); - const termMetaBank = 'dictionaryTermMetaBankV3'; - const kanjiBank = ( - version === 1 ? - 'dictionaryKanjiBankV1' : - 'dictionaryKanjiBankV3' - ); - const kanjiMetaBank = 'dictionaryKanjiMetaBankV3'; - const tagBank = 'dictionaryTagBankV3'; - - return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; - } - - /** - * @param {import('dictionary-data').TermGlossaryText|import('dictionary-data').TermGlossaryImage|import('dictionary-data').TermGlossaryStructuredContent} data - * @param {import('dictionary-database').DatabaseTermEntry} entry - * @param {import('dictionary-importer').ImportRequirement[]} requirements - * @returns {import('dictionary-data').TermGlossary} - * @throws {Error} - */ - _formatDictionaryTermGlossaryObject(data, entry, requirements) { - switch (data.type) { - case 'text': - return data.text; - case 'image': - return this._formatDictionaryTermGlossaryImage(data, entry, requirements); - case 'structured-content': - return this._formatStructuredContent(data, entry, requirements); - default: - throw new Error(`Unhandled data type: ${/** @type {import('core').SerializableObject} */ (data).type}`); - } - } - - /** - * @param {import('dictionary-data').TermGlossaryImage} data - * @param {import('dictionary-database').DatabaseTermEntry} entry - * @param {import('dictionary-importer').ImportRequirement[]} requirements - * @returns {import('dictionary-data').TermGlossaryImage} - */ - _formatDictionaryTermGlossaryImage(data, entry, requirements) { - /** @type {import('dictionary-data').TermGlossaryImage} */ - const target = { - type: 'image', - path: '' // Will be populated during requirement resolution - }; - requirements.push({type: 'image', target, source: data, entry}); - return target; - } - - /** - * @param {import('dictionary-data').TermGlossaryStructuredContent} data - * @param {import('dictionary-database').DatabaseTermEntry} entry - * @param {import('dictionary-importer').ImportRequirement[]} requirements - * @returns {import('dictionary-data').TermGlossaryStructuredContent} - */ - _formatStructuredContent(data, entry, requirements) { - const content = this._prepareStructuredContent(data.content, entry, requirements); - return { - type: 'structured-content', - content - }; - } - - /** - * @param {import('structured-content').Content} content - * @param {import('dictionary-database').DatabaseTermEntry} entry - * @param {import('dictionary-importer').ImportRequirement[]} requirements - * @returns {import('structured-content').Content} - */ - _prepareStructuredContent(content, entry, requirements) { - if (typeof content === 'string' || !(typeof content === 'object' && content !== null)) { - return content; - } - if (Array.isArray(content)) { - for (let i = 0, ii = content.length; i < ii; ++i) { - content[i] = this._prepareStructuredContent(content[i], entry, requirements); - } - return content; - } - const {tag} = content; - switch (tag) { - case 'img': - return this._prepareStructuredContentImage(content, entry, requirements); - } - const childContent = content.content; - if (typeof childContent !== 'undefined') { - content.content = this._prepareStructuredContent(childContent, entry, requirements); - } - return content; - } - - /** - * @param {import('structured-content').ImageElement} content - * @param {import('dictionary-database').DatabaseTermEntry} entry - * @param {import('dictionary-importer').ImportRequirement[]} requirements - * @returns {import('structured-content').ImageElement} - */ - _prepareStructuredContentImage(content, entry, requirements) { - /** @type {import('structured-content').ImageElement} */ - const target = { - tag: 'img', - path: '' // Will be populated during requirement resolution - }; - requirements.push({type: 'structured-content-image', target, source: content, entry}); - return target; - } - - /** - * @param {import('dictionary-importer').ImportRequirement[]} requirements - * @param {import('dictionary-importer').ArchiveFileMap} fileMap - * @returns {Promise<{media: import('dictionary-database').MediaDataArrayBufferContent[]}>} - */ - async _resolveAsyncRequirements(requirements, fileMap) { - /** @type {Map} */ - const media = new Map(); - /** @type {import('dictionary-importer').ImportRequirementContext} */ - const context = {fileMap, media}; - - for (const requirement of requirements) { - await this._resolveAsyncRequirement(context, requirement); - } - - return { - media: [...media.values()] - }; - } - - /** - * @param {import('dictionary-importer').ImportRequirementContext} context - * @param {import('dictionary-importer').ImportRequirement} requirement - */ - async _resolveAsyncRequirement(context, requirement) { - switch (requirement.type) { - case 'image': - await this._resolveDictionaryTermGlossaryImage( - context, - requirement.target, - requirement.source, - requirement.entry - ); - break; - case 'structured-content-image': - await this._resolveStructuredContentImage( - context, - requirement.target, - requirement.source, - requirement.entry - ); - break; - default: - return; - } - ++this._progressData.index; - this._progress(); - } - - /** - * @param {import('dictionary-importer').ImportRequirementContext} context - * @param {import('dictionary-data').TermGlossaryImage} target - * @param {import('dictionary-data').TermGlossaryImage} source - * @param {import('dictionary-database').DatabaseTermEntry} entry - */ - async _resolveDictionaryTermGlossaryImage(context, target, source, entry) { - await this._createImageData(context, target, source, entry); - } - - /** - * @param {import('dictionary-importer').ImportRequirementContext} context - * @param {import('structured-content').ImageElement} target - * @param {import('structured-content').ImageElement} source - * @param {import('dictionary-database').DatabaseTermEntry} entry - */ - async _resolveStructuredContentImage(context, target, source, entry) { - const {verticalAlign, sizeUnits} = source; - await this._createImageData(context, target, source, entry); - if (typeof verticalAlign === 'string') { target.verticalAlign = verticalAlign; } - if (typeof sizeUnits === 'string') { target.sizeUnits = sizeUnits; } - } - - /** - * @param {import('dictionary-importer').ImportRequirementContext} context - * @param {import('structured-content').ImageElementBase} target - * @param {import('structured-content').ImageElementBase} source - * @param {import('dictionary-database').DatabaseTermEntry} entry - */ - async _createImageData(context, target, source, entry) { - const { - path, - width: preferredWidth, - height: preferredHeight, - title, - alt, - description, - pixelated, - imageRendering, - appearance, - background, - collapsed, - collapsible - } = source; - const {width, height} = await this._getImageMedia(context, path, entry); - target.path = path; - target.width = width; - target.height = height; - if (typeof preferredWidth === 'number') { target.preferredWidth = preferredWidth; } - if (typeof preferredHeight === 'number') { target.preferredHeight = preferredHeight; } - if (typeof title === 'string') { target.title = title; } - if (typeof alt === 'string') { target.alt = alt; } - if (typeof description === 'string') { target.description = description; } - if (typeof pixelated === 'boolean') { target.pixelated = pixelated; } - if (typeof imageRendering === 'string') { target.imageRendering = imageRendering; } - if (typeof appearance === 'string') { target.appearance = appearance; } - if (typeof background === 'boolean') { target.background = background; } - if (typeof collapsed === 'boolean') { target.collapsed = collapsed; } - if (typeof collapsible === 'boolean') { target.collapsible = collapsible; } - } - - /** - * @param {import('dictionary-importer').ImportRequirementContext} context - * @param {string} path - * @param {import('dictionary-database').DatabaseTermEntry} entry - * @returns {Promise} - */ - async _getImageMedia(context, path, entry) { - const {media} = context; - const {dictionary} = entry; - - /** - * @param {string} message - * @returns {Error} - */ - const createError = (message) => { - const {expression, reading} = entry; - const readingSource = reading.length > 0 ? ` (${reading})` : ''; - return new Error(`${message} at path ${JSON.stringify(path)} for ${expression}${readingSource} in ${dictionary}`); - }; - - // Check if already added - let mediaData = media.get(path); - if (typeof mediaData !== 'undefined') { - if (MediaUtil.getFileExtensionFromImageMediaType(mediaData.mediaType) === null) { - throw createError('Media file is not a valid image'); - } - return mediaData; - } - - // Find file in archive - const file = context.fileMap.get(path); - if (typeof file === 'undefined') { - throw createError('Could not find image'); - } - - // Load file content - let content = await (await this._getData(file, new BlobWriter())).arrayBuffer(); - - const mediaType = MediaUtil.getImageMediaTypeFromFileName(path); - if (mediaType === null) { - throw createError('Could not determine media type for image'); - } - - // Load image data - let width; - let height; - try { - ({content, width, height} = await this._mediaLoader.getImageDetails(content, mediaType)); - } catch (e) { - throw createError('Could not load image'); - } - - // Create image data - mediaData = { - dictionary, - path, - mediaType, - width, - height, - content - }; - media.set(path, mediaData); - - return mediaData; - } - - /** - * @param {import('dictionary-data').TermV1} entry - * @param {string} dictionary - * @returns {import('dictionary-database').DatabaseTermEntry} - */ - _convertTermBankEntryV1(entry, dictionary) { - let [expression, reading, definitionTags, rules, score, ...glossary] = entry; - expression = this._normalizeTermOrReading(expression); - reading = this._normalizeTermOrReading(reading.length > 0 ? reading : expression); - return {expression, reading, definitionTags, rules, score, glossary, dictionary}; - } - - /** - * @param {import('dictionary-data').TermV3} entry - * @param {string} dictionary - * @returns {import('dictionary-database').DatabaseTermEntry} - */ - _convertTermBankEntryV3(entry, dictionary) { - let [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; - expression = this._normalizeTermOrReading(expression); - reading = this._normalizeTermOrReading(reading.length > 0 ? reading : expression); - return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags, dictionary}; - } - - /** - * @param {import('dictionary-data').TermMeta} entry - * @param {string} dictionary - * @returns {import('dictionary-database').DatabaseTermMeta} - */ - _convertTermMetaBankEntry(entry, dictionary) { - const [expression, mode, data] = entry; - return /** @type {import('dictionary-database').DatabaseTermMeta} */ ({expression, mode, data, dictionary}); - } - - /** - * @param {import('dictionary-data').KanjiV1} entry - * @param {string} dictionary - * @returns {import('dictionary-database').DatabaseKanjiEntry} - */ - _convertKanjiBankEntryV1(entry, dictionary) { - const [character, onyomi, kunyomi, tags, ...meanings] = entry; - return {character, onyomi, kunyomi, tags, meanings, dictionary}; - } - - /** - * @param {import('dictionary-data').KanjiV3} entry - * @param {string} dictionary - * @returns {import('dictionary-database').DatabaseKanjiEntry} - */ - _convertKanjiBankEntryV3(entry, dictionary) { - const [character, onyomi, kunyomi, tags, meanings, stats] = entry; - return {character, onyomi, kunyomi, tags, meanings, stats, dictionary}; - } - - /** - * @param {import('dictionary-data').KanjiMeta} entry - * @param {string} dictionary - * @returns {import('dictionary-database').DatabaseKanjiMeta} - */ - _convertKanjiMetaBankEntry(entry, dictionary) { - const [character, mode, data] = entry; - return {character, mode, data, dictionary}; - } - - /** - * @param {import('dictionary-data').Tag} entry - * @param {string} dictionary - * @returns {import('dictionary-database').Tag} - */ - _convertTagBankEntry(entry, dictionary) { - const [name, category, order, notes, score] = entry; - return {name, category, order, notes, score, dictionary}; - } - - /** - * @param {import('dictionary-data').Index} index - * @param {import('dictionary-database').Tag[]} results - * @param {string} dictionary - */ - _addOldIndexTags(index, results, dictionary) { - const {tagMeta} = index; - if (typeof tagMeta !== 'object' || tagMeta === null) { return; } - for (const [name, value] of Object.entries(tagMeta)) { - const {category, order, notes, score} = value; - results.push({name, category, order, notes, score, dictionary}); - } - } - - /** - * @param {import('dictionary-importer').ArchiveFileMap} fileMap - * @param {import('dictionary-importer').QueryDetails} queryDetails - * @returns {import('dictionary-importer').QueryResult} - */ - _getArchiveFiles(fileMap, queryDetails) { - /** @type {import('dictionary-importer').QueryResult} */ - const results = new Map(); - for (const [name, value] of fileMap.entries()) { - for (const [fileType, fileNameFormat] of queryDetails.entries()) { - let entries = results.get(fileType); - if (typeof entries === 'undefined') { - entries = []; - results.set(fileType, entries); - } - - if (fileNameFormat.test(name)) { - entries.push(value); - break; - } - } - } - return results; - } - - /** - * @template [TEntry=unknown] - * @template [TResult=unknown] - * @param {import('@zip.js/zip.js').Entry[]} files - * @param {(entry: TEntry, dictionaryTitle: string) => TResult} convertEntry - * @param {import('dictionary-importer').CompiledSchemaName} schemaName - * @param {string} dictionaryTitle - * @returns {Promise} - */ - async _readFileSequence(files, convertEntry, schemaName, dictionaryTitle) { - const progressData = this._progressData; - let startIndex = 0; - - const results = []; - for (const file of files) { - const content = await this._getData(file, new TextWriter()); - const entries = /** @type {unknown} */ (parseJson(content)); - - startIndex = progressData.index; - this._progress(); - - const schema = ajvSchemas[schemaName]; - if (!schema(entries)) { - throw this._formatAjvSchemaError(schema, file.filename); - } - - progressData.index = startIndex + 1; - this._progress(); - - if (Array.isArray(entries)) { - for (const entry of entries) { - results.push(convertEntry(/** @type {TEntry} */ (entry), dictionaryTitle)); - } - } - } - return results; - } - - /** - * @param {import('dictionary-database').DatabaseTermMeta[]|import('dictionary-database').DatabaseKanjiMeta[]} metaList - * @returns {import('dictionary-importer').SummaryMetaCount} - */ - _getMetaCounts(metaList) { - /** @type {Map} */ - const countsMap = new Map(); - for (const {mode} of metaList) { - let count = countsMap.get(mode); - count = typeof count !== 'undefined' ? count + 1 : 1; - countsMap.set(mode, count); - } - /** @type {import('dictionary-importer').SummaryMetaCount} */ - const counts = {total: metaList.length}; - for (const [key, value] of countsMap.entries()) { - if (Object.prototype.hasOwnProperty.call(counts, key)) { continue; } - counts[key] = value; - } - return counts; - } - - /** - * @param {string} text - * @returns {string} - */ - _normalizeTermOrReading(text) { - // Note: this function should not perform String.normalize on the text, - // as it will normalize characters in an undesirable way. - // Thus, this function is currently a no-op. - // Example: - // - '\u9038'.normalize('NFC') => '\u9038' (逸) - // - '\ufa67'.normalize('NFC') => '\u9038' (逸 => 逸) - return text; - } - - /** - * @template [T=unknown] - * @param {import('@zip.js/zip.js').Entry} entry - * @param {import('@zip.js/zip.js').Writer|import('@zip.js/zip.js').WritableWriter} writer - * @returns {Promise} - */ - async _getData(entry, writer) { - if (typeof entry.getData === 'undefined') { - throw new Error(`Cannot read ${entry.filename}`); - } - return await entry.getData(writer); - } -} diff --git a/ext/js/language/dictionary-worker-handler.js b/ext/js/language/dictionary-worker-handler.js deleted file mode 100644 index 9a724386..00000000 --- a/ext/js/language/dictionary-worker-handler.js +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2021-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -import {ExtensionError} from '../core/extension-error.js'; -import {DictionaryDatabase} from './dictionary-database.js'; -import {DictionaryImporter} from './dictionary-importer.js'; -import {DictionaryWorkerMediaLoader} from './dictionary-worker-media-loader.js'; - -export class DictionaryWorkerHandler { - constructor() { - /** @type {DictionaryWorkerMediaLoader} */ - this._mediaLoader = new DictionaryWorkerMediaLoader(); - } - - /** */ - prepare() { - self.addEventListener('message', this._onMessage.bind(this), false); - } - - // Private - - /** - * @param {MessageEvent} event - */ - _onMessage(event) { - const {action, params} = event.data; - switch (action) { - case 'importDictionary': - this._onMessageWithProgress(params, this._importDictionary.bind(this)); - break; - case 'deleteDictionary': - this._onMessageWithProgress(params, this._deleteDictionary.bind(this)); - break; - case 'getDictionaryCounts': - this._onMessageWithProgress(params, this._getDictionaryCounts.bind(this)); - break; - case 'getImageDetails.response': - this._mediaLoader.handleMessage(params); - break; - } - } - - /** - * @template [T=unknown] - * @param {T} params - * @param {(details: T, onProgress: import('dictionary-worker-handler').OnProgressCallback) => Promise} handler - */ - async _onMessageWithProgress(params, handler) { - /** - * @param {...unknown} args - */ - const onProgress = (...args) => { - self.postMessage({ - action: 'progress', - params: {args} - }); - }; - let response; - try { - const result = await handler(params, onProgress); - response = {result}; - } catch (e) { - response = {error: ExtensionError.serialize(e)}; - } - self.postMessage({action: 'complete', params: response}); - } - - /** - * @param {import('dictionary-worker-handler').ImportDictionaryMessageParams} details - * @param {import('dictionary-worker-handler').OnProgressCallback} onProgress - * @returns {Promise} - */ - async _importDictionary({details, archiveContent}, onProgress) { - const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); - try { - const dictionaryImporter = new DictionaryImporter(this._mediaLoader, onProgress); - const {result, errors} = await dictionaryImporter.importDictionary(dictionaryDatabase, archiveContent, details); - return { - result, - errors: errors.map((error) => ExtensionError.serialize(error)) - }; - } finally { - dictionaryDatabase.close(); - } - } - - /** - * @param {import('dictionary-worker-handler').DeleteDictionaryMessageParams} details - * @param {import('dictionary-database').DeleteDictionaryProgressCallback} onProgress - * @returns {Promise} - */ - async _deleteDictionary({dictionaryTitle}, onProgress) { - const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); - try { - return await dictionaryDatabase.deleteDictionary(dictionaryTitle, 1000, onProgress); - } finally { - dictionaryDatabase.close(); - } - } - - /** - * @param {import('dictionary-worker-handler').GetDictionaryCountsMessageParams} details - * @returns {Promise} - */ - async _getDictionaryCounts({dictionaryNames, getTotal}) { - const dictionaryDatabase = await this._getPreparedDictionaryDatabase(); - try { - return await dictionaryDatabase.getDictionaryCounts(dictionaryNames, getTotal); - } finally { - dictionaryDatabase.close(); - } - } - - /** - * @returns {Promise} - */ - async _getPreparedDictionaryDatabase() { - const dictionaryDatabase = new DictionaryDatabase(); - await dictionaryDatabase.prepare(); - return dictionaryDatabase; - } -} diff --git a/ext/js/language/dictionary-worker-main.js b/ext/js/language/dictionary-worker-main.js deleted file mode 100644 index 8ae283b8..00000000 --- a/ext/js/language/dictionary-worker-main.js +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2021-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -import {log} from '../core.js'; -import {DictionaryWorkerHandler} from './dictionary-worker-handler.js'; - -/** Entry point. */ -function main() { - try { - const dictionaryWorkerHandler = new DictionaryWorkerHandler(); - dictionaryWorkerHandler.prepare(); - } catch (e) { - log.error(e); - } -} - -main(); diff --git a/ext/js/language/dictionary-worker-media-loader.js b/ext/js/language/dictionary-worker-media-loader.js deleted file mode 100644 index e19a13d3..00000000 --- a/ext/js/language/dictionary-worker-media-loader.js +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2021-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -import {generateId} from '../core.js'; -import {ExtensionError} from '../core/extension-error.js'; - -/** - * Class used for loading and validating media from a worker thread - * during the dictionary import process. - */ -export class DictionaryWorkerMediaLoader { - /** - * Creates a new instance of the media loader. - */ - constructor() { - /** @type {Map void, reject: (reason?: import('core').RejectionReason) => void}>} */ - this._requests = new Map(); - } - - /** - * Handles a response message posted to the worker thread. - * @param {import('dictionary-worker-media-loader').HandleMessageParams} params Details of the response. - */ - handleMessage(params) { - const {id} = params; - const request = this._requests.get(id); - if (typeof request === 'undefined') { return; } - this._requests.delete(id); - const {error} = params; - if (typeof error !== 'undefined') { - request.reject(ExtensionError.deserialize(error)); - } else { - request.resolve(params.result); - } - } - - /** @type {import('dictionary-importer-media-loader').GetImageDetailsFunction} */ - getImageDetails(content, mediaType) { - return new Promise((resolve, reject) => { - const id = generateId(16); - this._requests.set(id, {resolve, reject}); - // This is executed in a Worker context, so the self needs to be force cast - /** @type {Worker} */ (/** @type {unknown} */ (self)).postMessage({ - action: 'getImageDetails', - params: {id, content, mediaType} - }, [content]); - }); - } -} diff --git a/ext/js/language/dictionary-worker.js b/ext/js/language/dictionary-worker.js deleted file mode 100644 index 3119dd7b..00000000 --- a/ext/js/language/dictionary-worker.js +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2021-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -import {ExtensionError} from '../core/extension-error.js'; -import {DictionaryImporterMediaLoader} from './dictionary-importer-media-loader.js'; - -export class DictionaryWorker { - constructor() { - /** @type {DictionaryImporterMediaLoader} */ - this._dictionaryImporterMediaLoader = new DictionaryImporterMediaLoader(); - } - - /** - * @param {ArrayBuffer} archiveContent - * @param {import('dictionary-importer').ImportDetails} details - * @param {?import('dictionary-worker').ImportProgressCallback} onProgress - * @returns {Promise} - */ - importDictionary(archiveContent, details, onProgress) { - return this._invoke( - 'importDictionary', - {details, archiveContent}, - [archiveContent], - onProgress, - this._formatImportDictionaryResult.bind(this) - ); - } - - /** - * @param {string} dictionaryTitle - * @param {?import('dictionary-worker').DeleteProgressCallback} onProgress - * @returns {Promise} - */ - deleteDictionary(dictionaryTitle, onProgress) { - return this._invoke('deleteDictionary', {dictionaryTitle}, [], onProgress, null); - } - - /** - * @param {string[]} dictionaryNames - * @param {boolean} getTotal - * @returns {Promise} - */ - getDictionaryCounts(dictionaryNames, getTotal) { - return this._invoke('getDictionaryCounts', {dictionaryNames, getTotal}, [], null, null); - } - - // Private - - /** - * @template [TParams=import('core').SerializableObject] - * @template [TResponseRaw=unknown] - * @template [TResponse=unknown] - * @param {string} action - * @param {TParams} params - * @param {Transferable[]} transfer - * @param {?(arg: import('core').SafeAny) => void} onProgress - * @param {?(result: TResponseRaw) => TResponse} formatResult - */ - _invoke(action, params, transfer, onProgress, formatResult) { - return new Promise((resolve, reject) => { - const worker = new Worker('/js/language/dictionary-worker-main.js', {type: 'module'}); - /** @type {import('dictionary-worker').InvokeDetails} */ - const details = { - complete: false, - worker, - resolve, - reject, - onMessage: null, - onProgress, - formatResult - }; - // Ugly typecast below due to not being able to explicitly state the template types - /** @type {(event: MessageEvent>) => void} */ - const onMessage = /** @type {(details: import('dictionary-worker').InvokeDetails, event: MessageEvent>) => void} */ (this._onMessage).bind(this, details); - details.onMessage = onMessage; - worker.addEventListener('message', onMessage); - worker.postMessage({action, params}, transfer); - }); - } - - /** - * @template [TResponseRaw=unknown] - * @template [TResponse=unknown] - * @param {import('dictionary-worker').InvokeDetails} details - * @param {MessageEvent>} event - */ - _onMessage(details, event) { - if (details.complete) { return; } - const {action, params} = event.data; - switch (action) { - case 'complete': - { - const {worker, resolve, reject, onMessage, formatResult} = details; - if (worker === null || onMessage === null || resolve === null || reject === null) { return; } - details.complete = true; - details.worker = null; - details.resolve = null; - details.reject = null; - details.onMessage = null; - details.onProgress = null; - details.formatResult = null; - worker.removeEventListener('message', onMessage); - worker.terminate(); - this._onMessageComplete(params, resolve, reject, formatResult); - } - break; - case 'progress': - this._onMessageProgress(params, details.onProgress); - break; - case 'getImageDetails': - { - const {worker} = details; - if (worker === null) { return; } - this._onMessageGetImageDetails(params, worker); - } - break; - } - } - - /** - * @template [TResponseRaw=unknown] - * @template [TResponse=unknown] - * @param {import('dictionary-worker').MessageCompleteParams} params - * @param {(result: TResponse) => void} resolve - * @param {(reason?: import('core').RejectionReason) => void} reject - * @param {?(result: TResponseRaw) => TResponse} formatResult - */ - _onMessageComplete(params, resolve, reject, formatResult) { - const {error} = params; - if (typeof error !== 'undefined') { - reject(ExtensionError.deserialize(error)); - } else { - const {result} = params; - if (typeof formatResult === 'function') { - let result2; - try { - result2 = formatResult(result); - } catch (e) { - reject(e); - return; - } - resolve(result2); - } else { - // If formatResult is not provided, the response is assumed to be the same type - // For some reason, eslint thinks the TResponse type is undefined - // eslint-disable-next-line jsdoc/no-undefined-types - resolve(/** @type {TResponse} */ (/** @type {unknown} */ (result))); - } - } - } - - /** - * @param {import('dictionary-worker').MessageProgressParams} params - * @param {?(...args: unknown[]) => void} onProgress - */ - _onMessageProgress(params, onProgress) { - if (typeof onProgress !== 'function') { return; } - const {args} = params; - onProgress(...args); - } - - /** - * @param {import('dictionary-worker').MessageGetImageDetailsParams} params - * @param {Worker} worker - */ - async _onMessageGetImageDetails(params, worker) { - const {id, content, mediaType} = params; - /** @type {Transferable[]} */ - const transfer = []; - let response; - try { - const result = await this._dictionaryImporterMediaLoader.getImageDetails(content, mediaType, transfer); - response = {id, result}; - } catch (e) { - response = {id, error: ExtensionError.serialize(e)}; - } - worker.postMessage({action: 'getImageDetails.response', params: response}, transfer); - } - - /** - * @param {import('dictionary-worker').MessageCompleteResultSerialized} response - * @returns {import('dictionary-worker').MessageCompleteResult} - */ - _formatImportDictionaryResult(response) { - const {result, errors} = response; - return { - result, - errors: errors.map((error) => ExtensionError.deserialize(error)) - }; - } -} diff --git a/ext/js/language/sandbox/dictionary-data-util.js b/ext/js/language/sandbox/dictionary-data-util.js deleted file mode 100644 index a54b043b..00000000 --- a/ext/js/language/sandbox/dictionary-data-util.js +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2020-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -export class DictionaryDataUtil { - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').TagGroup[]} - */ - static groupTermTags(dictionaryEntry) { - const {headwords} = dictionaryEntry; - const headwordCount = headwords.length; - const uniqueCheck = (headwordCount > 1); - const resultsIndexMap = new Map(); - const results = []; - for (let i = 0; i < headwordCount; ++i) { - const {tags} = headwords[i]; - for (const tag of tags) { - if (uniqueCheck) { - const {name, category, content, dictionaries} = tag; - const key = this._createMapKey([name, category, content, dictionaries]); - const index = resultsIndexMap.get(key); - if (typeof index !== 'undefined') { - const existingItem = results[index]; - existingItem.headwordIndices.push(i); - continue; - } - resultsIndexMap.set(key, results.length); - } - - const item = {tag, headwordIndices: [i]}; - results.push(item); - } - } - return results; - } - - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').DictionaryFrequency[]} - */ - static groupTermFrequencies(dictionaryEntry) { - const {headwords, frequencies: sourceFrequencies} = dictionaryEntry; - - /** @type {import('dictionary-data-util').TermFrequenciesMap1} */ - const map1 = new Map(); - for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) { - const {term, reading} = headwords[headwordIndex]; - - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - const readingKey = hasReading ? reading : null; - const key = this._createMapKey([term, readingKey]); - let frequencyData = map2.get(key); - if (typeof frequencyData === 'undefined') { - frequencyData = {term, reading: readingKey, values: new Map()}; - map2.set(key, frequencyData); - } - - frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); - } - - const results = []; - for (const [dictionary, map2] of map1.entries()) { - const frequencies = []; - for (const {term, reading, values} of map2.values()) { - frequencies.push({ - term, - reading, - values: [...values.values()] - }); - } - results.push({dictionary, frequencies}); - } - return results; - } - - /** - * @param {import('dictionary').KanjiFrequency[]} sourceFrequencies - * @returns {import('dictionary-data-util').DictionaryFrequency[]} - */ - static groupKanjiFrequencies(sourceFrequencies) { - /** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */ - const map1 = new Map(); - for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) { - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - let frequencyData = map2.get(character); - if (typeof frequencyData === 'undefined') { - frequencyData = {character, values: new Map()}; - map2.set(character, frequencyData); - } - - frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); - } - - const results = []; - for (const [dictionary, map2] of map1.entries()) { - const frequencies = []; - for (const {character, values} of map2.values()) { - frequencies.push({ - character, - values: [...values.values()] - }); - } - results.push({dictionary, frequencies}); - } - return results; - } - - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]} - */ - static getGroupedPronunciations(dictionaryEntry) { - const {headwords, pronunciations} = dictionaryEntry; - - const allTerms = new Set(); - const allReadings = new Set(); - for (const {term, reading} of headwords) { - allTerms.add(term); - allReadings.add(reading); - } - - /** @type {Map} */ - const groupedPronunciationsMap = new Map(); - for (const {headwordIndex, dictionary, pitches} of pronunciations) { - const {term, reading} = headwords[headwordIndex]; - let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary); - if (typeof dictionaryGroupedPronunciationList === 'undefined') { - dictionaryGroupedPronunciationList = []; - groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList); - } - for (const {position, nasalPositions, devoicePositions, tags} of pitches) { - let groupedPronunciation = this._findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, dictionaryGroupedPronunciationList); - if (groupedPronunciation === null) { - groupedPronunciation = { - terms: new Set(), - reading, - position, - nasalPositions, - devoicePositions, - tags - }; - dictionaryGroupedPronunciationList.push(groupedPronunciation); - } - groupedPronunciation.terms.add(term); - } - } - - /** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */ - const results2 = []; - const multipleReadings = (allReadings.size > 1); - for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) { - /** @type {import('dictionary-data-util').GroupedPronunciation[]} */ - const pronunciations2 = []; - for (const groupedPronunciation of dictionaryGroupedPronunciationList) { - const {terms, reading, position, nasalPositions, devoicePositions, tags} = groupedPronunciation; - const exclusiveTerms = !this._areSetsEqual(terms, allTerms) ? this._getSetIntersection(terms, allTerms) : []; - const exclusiveReadings = []; - if (multipleReadings) { - exclusiveReadings.push(reading); - } - pronunciations2.push({ - terms: [...terms], - reading, - position, - nasalPositions, - devoicePositions, - tags, - exclusiveTerms, - exclusiveReadings - }); - } - results2.push({dictionary, pronunciations: pronunciations2}); - } - return results2; - } - - /** - * @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags - * @returns {import('dictionary-data-util').TermFrequencyType} - */ - static getTermFrequency(termTags) { - let totalScore = 0; - for (const {score} of termTags) { - totalScore += score; - } - if (totalScore > 0) { - return 'popular'; - } else if (totalScore < 0) { - return 'rare'; - } else { - return 'normal'; - } - } - - /** - * @param {import('dictionary').TermHeadword[]} headwords - * @param {number[]} headwordIndices - * @param {Set} allTermsSet - * @param {Set} allReadingsSet - * @returns {string[]} - */ - static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { - if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } - - /** @type {Set} */ - const terms = new Set(); - /** @type {Set} */ - const readings = new Set(); - for (const headwordIndex of headwordIndices) { - const {term, reading} = headwords[headwordIndex]; - terms.add(term); - readings.add(reading); - } - - /** @type {string[]} */ - const disambiguations = []; - const addTerms = !this._areSetsEqual(terms, allTermsSet); - const addReadings = !this._areSetsEqual(readings, allReadingsSet); - if (addTerms) { - disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); - } - if (addReadings) { - if (addTerms) { - for (const term of terms) { - readings.delete(term); - } - } - disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); - } - return disambiguations; - } - - /** - * @param {string[]} wordClasses - * @returns {boolean} - */ - static isNonNounVerbOrAdjective(wordClasses) { - let isVerbOrAdjective = false; - let isSuruVerb = false; - let isNoun = false; - for (const wordClass of wordClasses) { - switch (wordClass) { - case 'v1': - case 'v5': - case 'vk': - case 'vz': - case 'adj-i': - isVerbOrAdjective = true; - break; - case 'vs': - isVerbOrAdjective = true; - isSuruVerb = true; - break; - case 'n': - isNoun = true; - break; - } - } - return isVerbOrAdjective && !(isSuruVerb && isNoun); - } - - // Private - - /** - * @param {string} reading - * @param {number} position - * @param {number[]} nasalPositions - * @param {number[]} devoicePositions - * @param {import('dictionary').Tag[]} tags - * @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList - * @returns {?import('dictionary-data-util').GroupedPronunciationInternal} - */ - static _findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, groupedPronunciationList) { - for (const pitchInfo of groupedPronunciationList) { - if ( - pitchInfo.reading === reading && - pitchInfo.position === position && - this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) && - this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) && - this._areTagListsEqual(pitchInfo.tags, tags) - ) { - return pitchInfo; - } - } - return null; - } - - /** - * @template [T=unknown] - * @param {T[]} array1 - * @param {T[]} array2 - * @returns {boolean} - */ - static _areArraysEqual(array1, array2) { - const ii = array1.length; - if (ii !== array2.length) { return false; } - for (let i = 0; i < ii; ++i) { - if (array1[i] !== array2[i]) { return false; } - } - return true; - } - - /** - * @param {import('dictionary').Tag[]} tagList1 - * @param {import('dictionary').Tag[]} tagList2 - * @returns {boolean} - */ - static _areTagListsEqual(tagList1, tagList2) { - const ii = tagList1.length; - if (tagList2.length !== ii) { return false; } - - for (let i = 0; i < ii; ++i) { - const tag1 = tagList1[i]; - const tag2 = tagList2[i]; - if (tag1.name !== tag2.name || !this._areArraysEqual(tag1.dictionaries, tag2.dictionaries)) { - return false; - } - } - - return true; - } - - /** - * @template [T=unknown] - * @param {Set} set1 - * @param {Set} set2 - * @returns {boolean} - */ - static _areSetsEqual(set1, set2) { - if (set1.size !== set2.size) { - return false; - } - - for (const value of set1) { - if (!set2.has(value)) { - return false; - } - } - - return true; - } - - /** - * @template [T=unknown] - * @param {Set} set1 - * @param {Set} set2 - * @returns {T[]} - */ - static _getSetIntersection(set1, set2) { - const result = []; - for (const value of set1) { - if (set2.has(value)) { - result.push(value); - } - } - return result; - } - - /** - * @param {unknown[]} array - * @returns {string} - */ - static _createMapKey(array) { - return JSON.stringify(array); - } -} diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index ddbd8578..45909940 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -31,7 +31,7 @@ export class Translator { constructor({japaneseUtil, database}) { /** @type {import('./sandbox/japanese-util.js').JapaneseUtil} */ this._japaneseUtil = japaneseUtil; - /** @type {import('./dictionary-database.js').DictionaryDatabase} */ + /** @type {import('../dictionary/dictionary-database.js').DictionaryDatabase} */ this._database = database; /** @type {?Deinflector} */ this._deinflector = null; diff --git a/ext/js/pages/settings/dictionary-controller.js b/ext/js/pages/settings/dictionary-controller.js index 6e9522db..db6a73d4 100644 --- a/ext/js/pages/settings/dictionary-controller.js +++ b/ext/js/pages/settings/dictionary-controller.js @@ -17,8 +17,8 @@ */ import {EventListenerCollection, log} from '../../core.js'; +import {DictionaryWorker} from '../../dictionary/dictionary-worker.js'; import {querySelectorNotNull} from '../../dom/query-selector.js'; -import {DictionaryWorker} from '../../language/dictionary-worker.js'; import {yomitan} from '../../yomitan.js'; class DictionaryEntry { diff --git a/ext/js/pages/settings/dictionary-import-controller.js b/ext/js/pages/settings/dictionary-import-controller.js index 35b7c461..c478b265 100644 --- a/ext/js/pages/settings/dictionary-import-controller.js +++ b/ext/js/pages/settings/dictionary-import-controller.js @@ -18,8 +18,8 @@ import {log} from '../../core.js'; import {ExtensionError} from '../../core/extension-error.js'; +import {DictionaryWorker} from '../../dictionary/dictionary-worker.js'; import {querySelectorNotNull} from '../../dom/query-selector.js'; -import {DictionaryWorker} from '../../language/dictionary-worker.js'; import {yomitan} from '../../yomitan.js'; import {DictionaryController} from './dictionary-controller.js'; diff --git a/ext/js/templates/sandbox/anki-template-renderer.js b/ext/js/templates/sandbox/anki-template-renderer.js index 515e85da..d69c7b47 100644 --- a/ext/js/templates/sandbox/anki-template-renderer.js +++ b/ext/js/templates/sandbox/anki-template-renderer.js @@ -18,10 +18,10 @@ import {Handlebars} from '../../../lib/handlebars.js'; import {AnkiNoteDataCreator} from '../../data/sandbox/anki-note-data-creator.js'; +import {DictionaryDataUtil} from '../../dictionary/dictionary-data-util.js'; import {PronunciationGenerator} from '../../display/sandbox/pronunciation-generator.js'; import {StructuredContentGenerator} from '../../display/sandbox/structured-content-generator.js'; import {CssStyleApplier} from '../../dom/sandbox/css-style-applier.js'; -import {DictionaryDataUtil} from '../../language/sandbox/dictionary-data-util.js'; import {JapaneseUtil} from '../../language/sandbox/japanese-util.js'; import {AnkiTemplateRendererContentManager} from './anki-template-renderer-content-manager.js'; import {TemplateRendererMediaProvider} from './template-renderer-media-provider.js'; diff --git a/test/database.test.js b/test/database.test.js index 86c69a41..4a52f18d 100644 --- a/test/database.test.js +++ b/test/database.test.js @@ -22,8 +22,8 @@ import path from 'path'; import {beforeEach, describe, expect, test, vi} from 'vitest'; import {parseJson} from '../dev/json.js'; import {createDictionaryArchive} from '../dev/util.js'; -import {DictionaryDatabase} from '../ext/js/language/dictionary-database.js'; -import {DictionaryImporter} from '../ext/js/language/dictionary-importer.js'; +import {DictionaryDatabase} from '../ext/js/dictionary/dictionary-database.js'; +import {DictionaryImporter} from '../ext/js/dictionary/dictionary-importer.js'; import {DictionaryImporterMediaLoader} from './mocks/dictionary-importer-media-loader.js'; const dirname = path.dirname(fileURLToPath(import.meta.url)); diff --git a/test/fixtures/translator-test.js b/test/fixtures/translator-test.js index 0afbe1f0..83644513 100644 --- a/test/fixtures/translator-test.js +++ b/test/fixtures/translator-test.js @@ -24,8 +24,8 @@ import {expect, vi} from 'vitest'; import {parseJson} from '../../dev/json.js'; import {createDictionaryArchive} from '../../dev/util.js'; import {AnkiNoteDataCreator} from '../../ext/js/data/sandbox/anki-note-data-creator.js'; -import {DictionaryDatabase} from '../../ext/js/language/dictionary-database.js'; -import {DictionaryImporter} from '../../ext/js/language/dictionary-importer.js'; +import {DictionaryDatabase} from '../../ext/js/dictionary/dictionary-database.js'; +import {DictionaryImporter} from '../../ext/js/dictionary/dictionary-importer.js'; import {JapaneseUtil} from '../../ext/js/language/sandbox/japanese-util.js'; import {Translator} from '../../ext/js/language/translator.js'; import {chrome, fetch} from '../mocks/common.js'; diff --git a/types/ext/translator.d.ts b/types/ext/translator.d.ts index 39d4be81..f1eb6bcc 100644 --- a/types/ext/translator.d.ts +++ b/types/ext/translator.d.ts @@ -15,7 +15,7 @@ * along with this program. If not, see . */ -import type {DictionaryDatabase} from '../../ext/js/language/dictionary-database'; +import type {DictionaryDatabase} from '../../ext/js/dictionary/dictionary-database'; import type {JapaneseUtil} from '../../ext/js/language/sandbox/japanese-util'; import type * as Dictionary from './dictionary'; import type * as DictionaryDatabaseTypes from './dictionary-database'; -- cgit v1.2.3