aboutsummaryrefslogtreecommitdiff
path: root/ext/bg/js/dictionary-importer.js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/bg/js/dictionary-importer.js')
-rw-r--r--ext/bg/js/dictionary-importer.js407
1 files changed, 0 insertions, 407 deletions
diff --git a/ext/bg/js/dictionary-importer.js b/ext/bg/js/dictionary-importer.js
deleted file mode 100644
index 4cb608db..00000000
--- a/ext/bg/js/dictionary-importer.js
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (C) 2020-2021 Yomichan Authors
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-/* global
- * JSZip
- * JsonSchemaValidator
- * MediaUtility
- */
-
-class DictionaryImporter {
- constructor() {
- this._schemas = new Map();
- this._jsonSchemaValidator = new JsonSchemaValidator();
- this._mediaUtility = new MediaUtility();
- }
-
- async importDictionary(dictionaryDatabase, archiveSource, details, onProgress) {
- if (!dictionaryDatabase) {
- throw new Error('Invalid database');
- }
- if (!dictionaryDatabase.isPrepared()) {
- throw new Error('Database is not ready');
- }
-
- const hasOnProgress = (typeof onProgress === 'function');
-
- // Read archive
- const archive = await JSZip.loadAsync(archiveSource);
-
- // Read and validate index
- const indexFileName = 'index.json';
- const indexFile = archive.files[indexFileName];
- if (!indexFile) {
- throw new Error('No dictionary index found in archive');
- }
-
- const index = JSON.parse(await indexFile.async('string'));
-
- const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json');
- this._validateJsonSchema(index, indexSchema, indexFileName);
-
- const dictionaryTitle = index.title;
- const version = index.format || index.version;
-
- if (!dictionaryTitle || !index.revision) {
- throw new Error('Unrecognized dictionary format');
- }
-
- // Verify database is not already imported
- if (await dictionaryDatabase.dictionaryExists(dictionaryTitle)) {
- throw new Error('Dictionary is already imported');
- }
-
- // Data format converters
- const convertTermBankEntry = (entry) => {
- if (version === 1) {
- const [expression, reading, definitionTags, rules, score, ...glossary] = entry;
- return {expression, reading, definitionTags, rules, score, glossary};
- } else {
- const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry;
- return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags};
- }
- };
-
- const convertTermMetaBankEntry = (entry) => {
- const [expression, mode, data] = entry;
- return {expression, mode, data};
- };
-
- const convertKanjiBankEntry = (entry) => {
- if (version === 1) {
- const [character, onyomi, kunyomi, tags, ...meanings] = entry;
- return {character, onyomi, kunyomi, tags, meanings};
- } else {
- const [character, onyomi, kunyomi, tags, meanings, stats] = entry;
- return {character, onyomi, kunyomi, tags, meanings, stats};
- }
- };
-
- const convertKanjiMetaBankEntry = (entry) => {
- const [character, mode, data] = entry;
- return {character, mode, data};
- };
-
- const convertTagBankEntry = (entry) => {
- const [name, category, order, notes, score] = entry;
- return {name, category, order, notes, score};
- };
-
- // Archive file reading
- const readFileSequence = async (fileNameFormat, convertEntry, schema) => {
- const results = [];
- for (let i = 1; true; ++i) {
- const fileName = fileNameFormat.replace(/\?/, `${i}`);
- const file = archive.files[fileName];
- if (!file) { break; }
-
- const entries = JSON.parse(await file.async('string'));
- this._validateJsonSchema(entries, schema, fileName);
-
- for (let entry of entries) {
- entry = convertEntry(entry);
- entry.dictionary = dictionaryTitle;
- results.push(entry);
- }
- }
- return results;
- };
-
- // Load schemas
- const dataBankSchemaPaths = this._getDataBankSchemaPaths(version);
- const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path)));
-
- // Load data
- const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]);
- const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]);
- const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]);
- const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]);
- const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]);
-
- // Old tags
- const indexTagMeta = index.tagMeta;
- if (typeof indexTagMeta === 'object' && indexTagMeta !== null) {
- for (const name of Object.keys(indexTagMeta)) {
- const {category, order, notes, score} = indexTagMeta[name];
- tagList.push({name, category, order, notes, score});
- }
- }
-
- // Prefix wildcard support
- const prefixWildcardsSupported = !!details.prefixWildcardsSupported;
- if (prefixWildcardsSupported) {
- for (const entry of termList) {
- entry.expressionReverse = stringReverse(entry.expression);
- entry.readingReverse = stringReverse(entry.reading);
- }
- }
-
- // Extended data support
- const extendedDataContext = {
- archive,
- media: new Map()
- };
- for (const entry of termList) {
- const glossaryList = entry.glossary;
- for (let i = 0, ii = glossaryList.length; i < ii; ++i) {
- const glossary = glossaryList[i];
- if (typeof glossary !== 'object' || glossary === null) { continue; }
- glossaryList[i] = await this._formatDictionaryTermGlossaryObject(glossary, extendedDataContext, entry);
- }
- }
-
- const media = [...extendedDataContext.media.values()];
-
- // Add dictionary
- const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported});
-
- dictionaryDatabase.bulkAdd('dictionaries', [summary], 0, 1);
-
- // Add data
- const errors = [];
- const total = (
- termList.length +
- termMetaList.length +
- kanjiList.length +
- kanjiMetaList.length +
- tagList.length
- );
- let loadedCount = 0;
- const maxTransactionLength = 1000;
-
- const bulkAdd = async (objectStoreName, entries) => {
- const ii = entries.length;
- for (let i = 0; i < ii; i += maxTransactionLength) {
- const count = Math.min(maxTransactionLength, ii - i);
-
- try {
- await dictionaryDatabase.bulkAdd(objectStoreName, entries, i, count);
- } catch (e) {
- errors.push(e);
- }
-
- loadedCount += count;
- if (hasOnProgress) {
- onProgress(total, loadedCount);
- }
- }
- };
-
- await bulkAdd('terms', termList);
- await bulkAdd('termMeta', termMetaList);
- await bulkAdd('kanji', kanjiList);
- await bulkAdd('kanjiMeta', kanjiMetaList);
- await bulkAdd('tagMeta', tagList);
- await bulkAdd('media', media);
-
- return {result: summary, errors};
- }
-
- _createSummary(dictionaryTitle, version, index, details) {
- const summary = {
- title: dictionaryTitle,
- revision: index.revision,
- sequenced: index.sequenced,
- version
- };
-
- const {author, url, description, attribution} = index;
- if (typeof author === 'string') { summary.author = author; }
- if (typeof url === 'string') { summary.url = url; }
- if (typeof description === 'string') { summary.description = description; }
- if (typeof attribution === 'string') { summary.attribution = attribution; }
-
- Object.assign(summary, details);
-
- return summary;
- }
-
- async _getSchema(fileName) {
- let schemaPromise = this._schemas.get(fileName);
- if (typeof schemaPromise !== 'undefined') {
- return schemaPromise;
- }
-
- schemaPromise = this._fetchJsonAsset(fileName);
- this._schemas.set(fileName, schemaPromise);
- return schemaPromise;
- }
-
- _validateJsonSchema(value, schema, fileName) {
- try {
- this._jsonSchemaValidator.validate(value, schema);
- } catch (e) {
- throw this._formatSchemaError(e, fileName);
- }
- }
-
- _formatSchemaError(e, fileName) {
- const valuePathString = this._getSchemaErrorPathString(e.info.valuePath, 'dictionary');
- const schemaPathString = this._getSchemaErrorPathString(e.info.schemaPath, 'schema');
-
- const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`);
- e2.data = e;
-
- return e2;
- }
-
- _getSchemaErrorPathString(infoList, base='') {
- let result = base;
- for (const [part] of infoList) {
- switch (typeof part) {
- case 'string':
- if (result.length > 0) {
- result += '.';
- }
- result += part;
- break;
- case 'number':
- result += `[${part}]`;
- break;
- }
- }
- return result;
- }
-
- _getDataBankSchemaPaths(version) {
- const termBank = (
- version === 1 ?
- '/data/schemas/dictionary-term-bank-v1-schema.json' :
- '/data/schemas/dictionary-term-bank-v3-schema.json'
- );
- const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json';
- const kanjiBank = (
- version === 1 ?
- '/data/schemas/dictionary-kanji-bank-v1-schema.json' :
- '/data/schemas/dictionary-kanji-bank-v3-schema.json'
- );
- const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json';
- const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json';
-
- return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
- }
-
- async _formatDictionaryTermGlossaryObject(data, context, entry) {
- switch (data.type) {
- case 'text':
- return data.text;
- case 'image':
- return await this._formatDictionaryTermGlossaryImage(data, context, entry);
- default:
- throw new Error(`Unhandled data type: ${data.type}`);
- }
- }
-
- async _formatDictionaryTermGlossaryImage(data, context, entry) {
- const dictionary = entry.dictionary;
- const {path, width: preferredWidth, height: preferredHeight, title, description, pixelated} = data;
- if (context.media.has(path)) {
- // Already exists
- return data;
- }
-
- let errorSource = entry.expression;
- if (entry.reading.length > 0) {
- errorSource += ` (${entry.reading});`;
- }
-
- const file = context.archive.file(path);
- if (file === null) {
- throw new Error(`Could not find image at path ${JSON.stringify(path)} for ${errorSource}`);
- }
-
- const content = await file.async('base64');
- const mediaType = this._mediaUtility.getImageMediaTypeFromFileName(path);
- if (mediaType === null) {
- throw new Error(`Could not determine media type for image at path ${JSON.stringify(path)} for ${errorSource}`);
- }
-
- let image;
- try {
- image = await this._loadImageBase64(mediaType, content);
- } catch (e) {
- throw new Error(`Could not load image at path ${JSON.stringify(path)} for ${errorSource}`);
- }
-
- const width = image.naturalWidth;
- const height = image.naturalHeight;
-
- // Create image data
- const mediaData = {
- dictionary,
- path,
- mediaType,
- width,
- height,
- content
- };
- context.media.set(path, mediaData);
-
- // Create new data
- const newData = {
- type: 'image',
- path,
- width,
- height
- };
- if (typeof preferredWidth === 'number') { newData.preferredWidth = preferredWidth; }
- if (typeof preferredHeight === 'number') { newData.preferredHeight = preferredHeight; }
- if (typeof title === 'string') { newData.title = title; }
- if (typeof description === 'string') { newData.description = description; }
- if (typeof pixelated === 'boolean') { newData.pixelated = pixelated; }
-
- return newData;
- }
-
- async _fetchJsonAsset(url) {
- const response = await fetch(chrome.runtime.getURL(url), {
- method: 'GET',
- mode: 'no-cors',
- cache: 'default',
- credentials: 'omit',
- redirect: 'follow',
- referrerPolicy: 'no-referrer'
- });
- if (!response.ok) {
- throw new Error(`Failed to fetch ${url}: ${response.status}`);
- }
- return await response.json();
- }
-
- /**
- * Attempts to load an image using a base64 encoded content and a media type.
- * @param mediaType The media type for the image content.
- * @param content The binary content for the image, encoded in base64.
- * @returns A Promise which resolves with an HTMLImageElement instance on
- * successful load, otherwise an error is thrown.
- */
- _loadImageBase64(mediaType, content) {
- return new Promise((resolve, reject) => {
- const image = new Image();
- const eventListeners = new EventListenerCollection();
- eventListeners.addEventListener(image, 'load', () => {
- eventListeners.removeAllEventListeners();
- resolve(image);
- }, false);
- eventListeners.addEventListener(image, 'error', () => {
- eventListeners.removeAllEventListeners();
- reject(new Error('Image failed to load'));
- }, false);
- image.src = `data:${mediaType};base64,${content}`;
- });
- }
-}