diff options
Diffstat (limited to 'ext/bg/js')
-rw-r--r-- | ext/bg/js/anki-note-builder.js | 76 | ||||
-rw-r--r-- | ext/bg/js/audio-uri-builder.js | 4 | ||||
-rw-r--r-- | ext/bg/js/backend.js | 128 | ||||
-rw-r--r-- | ext/bg/js/clipboard-monitor.js | 4 | ||||
-rw-r--r-- | ext/bg/js/database.js | 297 | ||||
-rw-r--r-- | ext/bg/js/dictionary-importer.js | 279 | ||||
-rw-r--r-- | ext/bg/js/dictionary.js | 32 | ||||
-rw-r--r-- | ext/bg/js/handlebars.js | 9 | ||||
-rw-r--r-- | ext/bg/js/japanese.js | 705 | ||||
-rw-r--r-- | ext/bg/js/options.js | 15 | ||||
-rw-r--r-- | ext/bg/js/search-frontend.js | 8 | ||||
-rw-r--r-- | ext/bg/js/search-query-parser.js | 48 | ||||
-rw-r--r-- | ext/bg/js/search.js | 27 | ||||
-rw-r--r-- | ext/bg/js/settings/anki-templates.js | 7 | ||||
-rw-r--r-- | ext/bg/js/settings/anki.js | 2 | ||||
-rw-r--r-- | ext/bg/js/settings/dictionaries.js | 58 | ||||
-rw-r--r-- | ext/bg/js/settings/main.js | 8 | ||||
-rw-r--r-- | ext/bg/js/text-source-map.js | 115 | ||||
-rw-r--r-- | ext/bg/js/translator.js | 90 | ||||
-rw-r--r-- | ext/bg/js/util.js | 2 |
20 files changed, 1026 insertions, 888 deletions
diff --git a/ext/bg/js/anki-note-builder.js b/ext/bg/js/anki-note-builder.js index d0ff8205..244aaab8 100644 --- a/ext/bg/js/anki-note-builder.js +++ b/ext/bg/js/anki-note-builder.js @@ -17,11 +17,12 @@ */ class AnkiNoteBuilder { - constructor({renderTemplate}) { + constructor({audioSystem, renderTemplate}) { + this._audioSystem = audioSystem; this._renderTemplate = renderTemplate; } - async createNote(definition, mode, options, templates) { + async createNote(definition, mode, context, options, templates) { const isKanji = (mode === 'kanji'); const tags = options.anki.tags; const modeOptions = isKanji ? options.anki.kanji : options.anki.terms; @@ -35,7 +36,7 @@ class AnkiNoteBuilder { }; for (const [fieldName, fieldValue] of modeOptionsFieldEntries) { - note.fields[fieldName] = await this.formatField(fieldValue, definition, mode, options, templates, null); + note.fields[fieldName] = await this.formatField(fieldValue, definition, mode, context, options, templates, null); } if (!isKanji && definition.audio) { @@ -60,7 +61,7 @@ class AnkiNoteBuilder { return note; } - async formatField(field, definition, mode, options, templates, errors=null) { + async formatField(field, definition, mode, context, options, templates, errors=null) { const data = { marker: null, definition, @@ -69,7 +70,8 @@ class AnkiNoteBuilder { modeTermKanji: mode === 'term-kanji', modeTermKana: mode === 'term-kana', modeKanji: mode === 'kanji', - compactGlossaries: options.general.compactGlossaries + compactGlossaries: options.general.compactGlossaries, + context }; const pattern = /\{([\w-]+)\}/g; return await AnkiNoteBuilder.stringReplaceAsync(field, pattern, async (g0, marker) => { @@ -83,6 +85,70 @@ class AnkiNoteBuilder { }); } + async injectAudio(definition, fields, sources, optionsContext) { + if (!this._containsMarker(fields, 'audio')) { return; } + + try { + const expressions = definition.expressions; + const audioSourceDefinition = Array.isArray(expressions) ? expressions[0] : definition; + + const {uri} = await this._audioSystem.getDefinitionAudio(audioSourceDefinition, sources, {tts: false, optionsContext}); + const filename = this._createInjectedAudioFileName(audioSourceDefinition); + if (filename !== null) { + definition.audio = {url: uri, filename}; + } + } catch (e) { + // NOP + } + } + + async injectScreenshot(definition, fields, screenshot, anki) { + if (!this._containsMarker(fields, 'screenshot')) { return; } + + const now = new Date(Date.now()); + const filename = `yomichan_browser_screenshot_${definition.reading}_${this._dateToString(now)}.${screenshot.format}`; + const data = screenshot.dataUrl.replace(/^data:[\w\W]*?,/, ''); + + try { + await anki.storeMediaFile(filename, data); + } catch (e) { + return; + } + + definition.screenshotFileName = filename; + } + + _createInjectedAudioFileName(definition) { + const {reading, expression} = definition; + if (!reading && !expression) { return null; } + + let filename = 'yomichan'; + if (reading) { filename += `_${reading}`; } + if (expression) { filename += `_${expression}`; } + filename += '.mp3'; + return filename; + } + + _dateToString(date) { + const year = date.getUTCFullYear(); + const month = date.getUTCMonth().toString().padStart(2, '0'); + const day = date.getUTCDate().toString().padStart(2, '0'); + const hours = date.getUTCHours().toString().padStart(2, '0'); + const minutes = date.getUTCMinutes().toString().padStart(2, '0'); + const seconds = date.getUTCSeconds().toString().padStart(2, '0'); + return `${year}-${month}-${day}-${hours}-${minutes}-${seconds}`; + } + + _containsMarker(fields, marker) { + marker = `{${marker}}`; + for (const fieldValue of Object.values(fields)) { + if (fieldValue.includes(marker)) { + return true; + } + } + return false; + } + static stringReplaceAsync(str, regex, replacer) { let match; let index = 0; diff --git a/ext/bg/js/audio-uri-builder.js b/ext/bg/js/audio-uri-builder.js index 499c3441..158006bb 100644 --- a/ext/bg/js/audio-uri-builder.js +++ b/ext/bg/js/audio-uri-builder.js @@ -17,7 +17,7 @@ */ /* global - * jpIsStringEntirelyKana + * jp */ class AudioUriBuilder { @@ -66,7 +66,7 @@ class AudioUriBuilder { let kana = definition.reading; let kanji = definition.expression; - if (!kana && jpIsStringEntirelyKana(kanji)) { + if (!kana && jp.isStringEntirelyKana(kanji)) { kana = kanji; kanji = null; } diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index 978c5a4a..1fa7ede1 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -24,6 +24,8 @@ * AudioUriBuilder * BackendApiForwarder * ClipboardMonitor + * Database + * DictionaryImporter * JsonSchema * Mecab * Translator @@ -32,9 +34,7 @@ * dictEnabledSet * dictTermsSort * handlebarsRenderDynamic - * jpConvertReading - * jpDistributeFuriganaInflected - * jpKatakanaToHiragana + * jp * optionsLoad * optionsSave * profileConditionsDescriptor @@ -45,16 +45,22 @@ class Backend { constructor() { - this.translator = new Translator(); + this.database = new Database(); + this.dictionaryImporter = new DictionaryImporter(); + this.translator = new Translator(this.database); this.anki = new AnkiNull(); this.mecab = new Mecab(); this.clipboardMonitor = new ClipboardMonitor({getClipboard: this._onApiClipboardGet.bind(this)}); - this.ankiNoteBuilder = new AnkiNoteBuilder({renderTemplate: this._renderTemplate.bind(this)}); this.options = null; this.optionsSchema = null; this.defaultAnkiFieldTemplates = null; this.audioSystem = new AudioSystem({getAudioUri: this._getAudioUri.bind(this)}); this.audioUriBuilder = new AudioUriBuilder(); + this.ankiNoteBuilder = new AnkiNoteBuilder({ + audioSystem: this.audioSystem, + renderTemplate: this._renderTemplate.bind(this) + }); + this.optionsContext = { depth: 0, url: window.location.href @@ -109,6 +115,7 @@ class Backend { } async prepare() { + await this.database.prepare(); await this.translator.prepare(); this.optionsSchema = await requestJson(chrome.runtime.getURL('/bg/data/options-schema.json'), 'GET'); @@ -298,6 +305,10 @@ class Backend { return true; } + async importDictionary(archiveSource, onProgress, details) { + return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details); + } + // Message handlers _onApiYomichanCoreReady(_params, sender) { @@ -402,13 +413,13 @@ class Backend { dictTermsSort(definitions); const {expression, reading} = definitions[0]; const source = text.substring(0, sourceLength); - for (const {text: text2, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { - const reading2 = jpConvertReading(text2, furigana, options.parsing.readingMode); + for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); term.push({text: text2, reading: reading2}); } text = text.substring(source.length); } else { - const reading = jpConvertReading(text[0], null, options.parsing.readingMode); + const reading = jp.convertReading(text[0], null, options.parsing.readingMode); term.push({text: text[0], reading}); text = text.substring(1); } @@ -427,16 +438,16 @@ class Backend { for (const {expression, reading, source} of parsedLine) { const term = []; if (expression !== null && reading !== null) { - for (const {text: text2, furigana} of jpDistributeFuriganaInflected( + for (const {text: text2, furigana} of jp.distributeFuriganaInflected( expression, - jpKatakanaToHiragana(reading), + jp.convertKatakanaToHiragana(reading), source )) { - const reading2 = jpConvertReading(text2, furigana, options.parsing.readingMode); + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); term.push({text: text2, reading: reading2}); } } else { - const reading2 = jpConvertReading(source, null, options.parsing.readingMode); + const reading2 = jp.convertReading(source, null, options.parsing.readingMode); term.push({text: source, reading: reading2}); } result.push(term); @@ -448,12 +459,12 @@ class Backend { return results; } - async _onApiDefinitionAdd({definition, mode, context, optionsContext}) { + async _onApiDefinitionAdd({definition, mode, context, details, optionsContext}) { const options = this.getOptions(optionsContext); const templates = this.defaultAnkiFieldTemplates; if (mode !== 'kanji') { - await this._audioInject( + await this.ankiNoteBuilder.injectAudio( definition, options.anki.terms.fields, options.audio.sources, @@ -461,19 +472,20 @@ class Backend { ); } - if (context && context.screenshot) { - await this._injectScreenshot( + if (details && details.screenshot) { + await this.ankiNoteBuilder.injectScreenshot( definition, options.anki.terms.fields, - context.screenshot + details.screenshot, + this.anki ); } - const note = await this.ankiNoteBuilder.createNote(definition, mode, options, templates); + const note = await this.ankiNoteBuilder.createNote(definition, mode, context, options, templates); return this.anki.addNote(note); } - async _onApiDefinitionsAddable({definitions, modes, optionsContext}) { + async _onApiDefinitionsAddable({definitions, modes, context, optionsContext}) { const options = this.getOptions(optionsContext); const templates = this.defaultAnkiFieldTemplates; const states = []; @@ -482,7 +494,7 @@ class Backend { const notes = []; for (const definition of definitions) { for (const mode of modes) { - const note = await this.ankiNoteBuilder.createNote(definition, mode, options, templates); + const note = await this.ankiNoteBuilder.createNote(definition, mode, context, options, templates); notes.push(note); } } @@ -793,86 +805,10 @@ class Backend { return await this.audioUriBuilder.getUri(definition, source, options); } - async _audioInject(definition, fields, sources, optionsContext) { - let usesAudio = false; - for (const fieldValue of Object.values(fields)) { - if (fieldValue.includes('{audio}')) { - usesAudio = true; - break; - } - } - - if (!usesAudio) { - return true; - } - - try { - const expressions = definition.expressions; - const audioSourceDefinition = Array.isArray(expressions) ? expressions[0] : definition; - - const {uri} = await this.audioSystem.getDefinitionAudio(audioSourceDefinition, sources, {tts: false, optionsContext}); - const filename = this._createInjectedAudioFileName(audioSourceDefinition); - if (filename !== null) { - definition.audio = {url: uri, filename}; - } - - return true; - } catch (e) { - return false; - } - } - - async _injectScreenshot(definition, fields, screenshot) { - let usesScreenshot = false; - for (const fieldValue of Object.values(fields)) { - if (fieldValue.includes('{screenshot}')) { - usesScreenshot = true; - break; - } - } - - if (!usesScreenshot) { - return; - } - - const dateToString = (date) => { - const year = date.getUTCFullYear(); - const month = date.getUTCMonth().toString().padStart(2, '0'); - const day = date.getUTCDate().toString().padStart(2, '0'); - const hours = date.getUTCHours().toString().padStart(2, '0'); - const minutes = date.getUTCMinutes().toString().padStart(2, '0'); - const seconds = date.getUTCSeconds().toString().padStart(2, '0'); - return `${year}-${month}-${day}-${hours}-${minutes}-${seconds}`; - }; - - const now = new Date(Date.now()); - const filename = `yomichan_browser_screenshot_${definition.reading}_${dateToString(now)}.${screenshot.format}`; - const data = screenshot.dataUrl.replace(/^data:[\w\W]*?,/, ''); - - try { - await this.anki.storeMediaFile(filename, data); - } catch (e) { - return; - } - - definition.screenshotFileName = filename; - } - async _renderTemplate(template, data) { return handlebarsRenderDynamic(template, data); } - _createInjectedAudioFileName(definition) { - const {reading, expression} = definition; - if (!reading && !expression) { return null; } - - let filename = 'yomichan'; - if (reading) { filename += `_${reading}`; } - if (expression) { filename += `_${expression}`; } - filename += '.mp3'; - return filename; - } - static _getTabUrl(tab) { return new Promise((resolve) => { chrome.tabs.sendMessage(tab.id, {action: 'getUrl'}, {frameId: 0}, (response) => { diff --git a/ext/bg/js/clipboard-monitor.js b/ext/bg/js/clipboard-monitor.js index 9a881f57..c67525fc 100644 --- a/ext/bg/js/clipboard-monitor.js +++ b/ext/bg/js/clipboard-monitor.js @@ -17,7 +17,7 @@ */ /* global - * jpIsStringPartiallyJapanese + * jp */ class ClipboardMonitor extends EventDispatcher { @@ -54,7 +54,7 @@ class ClipboardMonitor extends EventDispatcher { text !== this._previousText ) { this._previousText = text; - if (jpIsStringPartiallyJapanese(text)) { + if (jp.isStringPartiallyJapanese(text)) { this.trigger('change', {text}); } } diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 08a2a39f..4a677fea 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -99,7 +99,7 @@ class Database { }); return true; } catch (e) { - console.error(e); + logError(e); return false; } } @@ -110,6 +110,10 @@ class Database { this.db = null; } + isPrepared() { + return this.db !== null; + } + async purge() { this._validate(); @@ -322,177 +326,44 @@ class Database { return result; } - async importDictionary(archiveSource, onProgress, details) { + async dictionaryExists(title) { this._validate(); - const db = this.db; - const hasOnProgress = (typeof onProgress === 'function'); - - // Read archive - const archive = await JSZip.loadAsync(archiveSource); - - // Read and validate index - const indexFileName = 'index.json'; - const indexFile = archive.files[indexFileName]; - if (!indexFile) { - throw new Error('No dictionary index found in archive'); - } - - const index = JSON.parse(await indexFile.async('string')); - - const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); - Database._validateJsonSchema(index, indexSchema, indexFileName); - - const dictionaryTitle = index.title; - const version = index.format || index.version; - - if (!dictionaryTitle || !index.revision) { - throw new Error('Unrecognized dictionary format'); - } - - // Verify database is not already imported - if (await this._dictionaryExists(dictionaryTitle)) { - throw new Error('Dictionary is already imported'); - } - - // Data format converters - const convertTermBankEntry = (entry) => { - if (version === 1) { - const [expression, reading, definitionTags, rules, score, ...glossary] = entry; - return {expression, reading, definitionTags, rules, score, glossary}; - } else { - const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; - return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; - } - }; - - const convertTermMetaBankEntry = (entry) => { - const [expression, mode, data] = entry; - return {expression, mode, data}; - }; - - const convertKanjiBankEntry = (entry) => { - if (version === 1) { - const [character, onyomi, kunyomi, tags, ...meanings] = entry; - return {character, onyomi, kunyomi, tags, meanings}; - } else { - const [character, onyomi, kunyomi, tags, meanings, stats] = entry; - return {character, onyomi, kunyomi, tags, meanings, stats}; - } - }; - - const convertKanjiMetaBankEntry = (entry) => { - const [character, mode, data] = entry; - return {character, mode, data}; - }; - - const convertTagBankEntry = (entry) => { - const [name, category, order, notes, score] = entry; - return {name, category, order, notes, score}; - }; + const transaction = this.db.transaction(['dictionaries'], 'readonly'); + const index = transaction.objectStore('dictionaries').index('title'); + const query = IDBKeyRange.only(title); + const count = await Database._getCount(index, query); + return count > 0; + } - // Archive file reading - const readFileSequence = async (fileNameFormat, convertEntry, schema) => { - const results = []; - for (let i = 1; true; ++i) { - const fileName = fileNameFormat.replace(/\?/, `${i}`); - const file = archive.files[fileName]; - if (!file) { break; } - - const entries = JSON.parse(await file.async('string')); - Database._validateJsonSchema(entries, schema, fileName); - - for (let entry of entries) { - entry = convertEntry(entry); - entry.dictionary = dictionaryTitle; - results.push(entry); - } - } - return results; - }; + bulkAdd(objectStoreName, items, start, count) { + return new Promise((resolve, reject) => { + const transaction = this.db.transaction([objectStoreName], 'readwrite'); + const objectStore = transaction.objectStore(objectStoreName); - // Load schemas - const dataBankSchemaPaths = this.constructor._getDataBankSchemaPaths(version); - const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); - - // Load data - const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]); - const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]); - const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]); - const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); - const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]); - - // Old tags - const indexTagMeta = index.tagMeta; - if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { - for (const name of Object.keys(indexTagMeta)) { - const {category, order, notes, score} = indexTagMeta[name]; - tagList.push({name, category, order, notes, score}); + if (start + count > items.length) { + count = items.length - start; } - } - // Prefix wildcard support - const prefixWildcardsSupported = !!details.prefixWildcardsSupported; - if (prefixWildcardsSupported) { - for (const entry of termList) { - entry.expressionReverse = stringReverse(entry.expression); - entry.readingReverse = stringReverse(entry.reading); + if (count <= 0) { + resolve(); + return; } - } - - // Add dictionary - const summary = { - title: dictionaryTitle, - revision: index.revision, - sequenced: index.sequenced, - version, - prefixWildcardsSupported - }; - - { - const transaction = db.transaction(['dictionaries'], 'readwrite'); - const objectStore = transaction.objectStore('dictionaries'); - await Database._bulkAdd(objectStore, [summary], 0, 1); - } - - // Add data - const errors = []; - const total = ( - termList.length + - termMetaList.length + - kanjiList.length + - kanjiMetaList.length + - tagList.length - ); - let loadedCount = 0; - const maxTransactionLength = 1000; - - const bulkAdd = async (objectStoreName, entries) => { - const ii = entries.length; - for (let i = 0; i < ii; i += maxTransactionLength) { - const count = Math.min(maxTransactionLength, ii - i); - try { - const transaction = db.transaction([objectStoreName], 'readwrite'); - const objectStore = transaction.objectStore(objectStoreName); - await Database._bulkAdd(objectStore, entries, i, count); - } catch (e) { - errors.push(e); + const end = start + count; + let completedCount = 0; + const onError = (e) => reject(e); + const onSuccess = () => { + if (++completedCount >= count) { + resolve(); } + }; - loadedCount += count; - if (hasOnProgress) { - onProgress(total, loadedCount); - } + for (let i = start; i < end; ++i) { + const request = objectStore.add(items[i]); + request.onerror = onError; + request.onsuccess = onSuccess; } - }; - - await bulkAdd('terms', termList); - await bulkAdd('termMeta', termMetaList); - await bulkAdd('kanji', kanjiList); - await bulkAdd('kanjiMeta', kanjiMetaList); - await bulkAdd('tagMeta', tagList); - - return {result: summary, errors}; + }); } // Private @@ -503,80 +374,6 @@ class Database { } } - async _getSchema(fileName) { - let schemaPromise = this._schemas.get(fileName); - if (typeof schemaPromise !== 'undefined') { - return schemaPromise; - } - - schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); - this._schemas.set(fileName, schemaPromise); - return schemaPromise; - } - - static _validateJsonSchema(value, schema, fileName) { - try { - JsonSchema.validate(value, schema); - } catch (e) { - throw Database._formatSchemaError(e, fileName); - } - } - - static _formatSchemaError(e, fileName) { - const valuePathString = Database._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); - const schemaPathString = Database._getSchemaErrorPathString(e.info.schemaPath, 'schema'); - - const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); - e2.data = e; - - return e2; - } - - static _getSchemaErrorPathString(infoList, base='') { - let result = base; - for (const [part] of infoList) { - switch (typeof part) { - case 'string': - if (result.length > 0) { - result += '.'; - } - result += part; - break; - case 'number': - result += `[${part}]`; - break; - } - } - return result; - } - - static _getDataBankSchemaPaths(version) { - const termBank = ( - version === 1 ? - '/bg/data/dictionary-term-bank-v1-schema.json' : - '/bg/data/dictionary-term-bank-v3-schema.json' - ); - const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; - const kanjiBank = ( - version === 1 ? - '/bg/data/dictionary-kanji-bank-v1-schema.json' : - '/bg/data/dictionary-kanji-bank-v3-schema.json' - ); - const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; - const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; - - return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; - } - - async _dictionaryExists(title) { - const db = this.db; - const dbCountTransaction = db.transaction(['dictionaries'], 'readonly'); - const dbIndex = dbCountTransaction.objectStore('dictionaries').index('title'); - const only = IDBKeyRange.only(title); - const count = await Database._getCount(dbIndex, only); - return count > 0; - } - async _findGenericBulk(tableName, indexName, indexValueList, dictionaries, createResult) { this._validate(); @@ -760,34 +557,6 @@ class Database { }); } - static _bulkAdd(objectStore, items, start, count) { - return new Promise((resolve, reject) => { - if (start + count > items.length) { - count = items.length - start; - } - - if (count <= 0) { - resolve(); - return; - } - - const end = start + count; - let completedCount = 0; - const onError = (e) => reject(e); - const onSuccess = () => { - if (++completedCount >= count) { - resolve(); - } - }; - - for (let i = start; i < end; ++i) { - const request = objectStore.add(items[i]); - request.onerror = onError; - request.onsuccess = onSuccess; - } - }); - } - static _open(name, version, onUpgradeNeeded) { return new Promise((resolve, reject) => { const request = window.indexedDB.open(name, version * 10); diff --git a/ext/bg/js/dictionary-importer.js b/ext/bg/js/dictionary-importer.js new file mode 100644 index 00000000..254fde4f --- /dev/null +++ b/ext/bg/js/dictionary-importer.js @@ -0,0 +1,279 @@ +/* + * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/* global + * JSZip + * JsonSchema + * requestJson + */ + +class DictionaryImporter { + constructor() { + this._schemas = new Map(); + } + + async import(database, archiveSource, onProgress, details) { + if (!database) { + throw new Error('Invalid database'); + } + if (!database.isPrepared()) { + throw new Error('Database is not ready'); + } + + const hasOnProgress = (typeof onProgress === 'function'); + + // Read archive + const archive = await JSZip.loadAsync(archiveSource); + + // Read and validate index + const indexFileName = 'index.json'; + const indexFile = archive.files[indexFileName]; + if (!indexFile) { + throw new Error('No dictionary index found in archive'); + } + + const index = JSON.parse(await indexFile.async('string')); + + const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json'); + this._validateJsonSchema(index, indexSchema, indexFileName); + + const dictionaryTitle = index.title; + const version = index.format || index.version; + + if (!dictionaryTitle || !index.revision) { + throw new Error('Unrecognized dictionary format'); + } + + // Verify database is not already imported + if (await database.dictionaryExists(dictionaryTitle)) { + throw new Error('Dictionary is already imported'); + } + + // Data format converters + const convertTermBankEntry = (entry) => { + if (version === 1) { + const [expression, reading, definitionTags, rules, score, ...glossary] = entry; + return {expression, reading, definitionTags, rules, score, glossary}; + } else { + const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry; + return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags}; + } + }; + + const convertTermMetaBankEntry = (entry) => { + const [expression, mode, data] = entry; + return {expression, mode, data}; + }; + + const convertKanjiBankEntry = (entry) => { + if (version === 1) { + const [character, onyomi, kunyomi, tags, ...meanings] = entry; + return {character, onyomi, kunyomi, tags, meanings}; + } else { + const [character, onyomi, kunyomi, tags, meanings, stats] = entry; + return {character, onyomi, kunyomi, tags, meanings, stats}; + } + }; + + const convertKanjiMetaBankEntry = (entry) => { + const [character, mode, data] = entry; + return {character, mode, data}; + }; + + const convertTagBankEntry = (entry) => { + const [name, category, order, notes, score] = entry; + return {name, category, order, notes, score}; + }; + + // Archive file reading + const readFileSequence = async (fileNameFormat, convertEntry, schema) => { + const results = []; + for (let i = 1; true; ++i) { + const fileName = fileNameFormat.replace(/\?/, `${i}`); + const file = archive.files[fileName]; + if (!file) { break; } + + const entries = JSON.parse(await file.async('string')); + this._validateJsonSchema(entries, schema, fileName); + + for (let entry of entries) { + entry = convertEntry(entry); + entry.dictionary = dictionaryTitle; + results.push(entry); + } + } + return results; + }; + + // Load schemas + const dataBankSchemaPaths = this._getDataBankSchemaPaths(version); + const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); + + // Load data + const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]); + const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]); + const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]); + const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]); + const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]); + + // Old tags + const indexTagMeta = index.tagMeta; + if (typeof indexTagMeta === 'object' && indexTagMeta !== null) { + for (const name of Object.keys(indexTagMeta)) { + const {category, order, notes, score} = indexTagMeta[name]; + tagList.push({name, category, order, notes, score}); + } + } + + // Prefix wildcard support + const prefixWildcardsSupported = !!details.prefixWildcardsSupported; + if (prefixWildcardsSupported) { + for (const entry of termList) { + entry.expressionReverse = stringReverse(entry.expression); + entry.readingReverse = stringReverse(entry.reading); + } + } + + // Add dictionary + const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported}); + + database.bulkAdd('dictionaries', [summary], 0, 1); + + // Add data + const errors = []; + const total = ( + termList.length + + termMetaList.length + + kanjiList.length + + kanjiMetaList.length + + tagList.length + ); + let loadedCount = 0; + const maxTransactionLength = 1000; + + const bulkAdd = async (objectStoreName, entries) => { + const ii = entries.length; + for (let i = 0; i < ii; i += maxTransactionLength) { + const count = Math.min(maxTransactionLength, ii - i); + + try { + await database.bulkAdd(objectStoreName, entries, i, count); + } catch (e) { + errors.push(errorToJson(e)); + } + + loadedCount += count; + if (hasOnProgress) { + onProgress(total, loadedCount); + } + } + }; + + await bulkAdd('terms', termList); + await bulkAdd('termMeta', termMetaList); + await bulkAdd('kanji', kanjiList); + await bulkAdd('kanjiMeta', kanjiMetaList); + await bulkAdd('tagMeta', tagList); + + return {result: summary, errors}; + } + + _createSummary(dictionaryTitle, version, index, details) { + const summary = { + title: dictionaryTitle, + revision: index.revision, + sequenced: index.sequenced, + version + }; + + const {author, url, description, attribution} = index; + if (typeof author === 'string') { summary.author = author; } + if (typeof url === 'string') { summary.url = url; } + if (typeof description === 'string') { summary.description = description; } + if (typeof attribution === 'string') { summary.attribution = attribution; } + + Object.assign(summary, details); + + return summary; + } + + async _getSchema(fileName) { + let schemaPromise = this._schemas.get(fileName); + if (typeof schemaPromise !== 'undefined') { + return schemaPromise; + } + + schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET'); + this._schemas.set(fileName, schemaPromise); + return schemaPromise; + } + + _validateJsonSchema(value, schema, fileName) { + try { + JsonSchema.validate(value, schema); + } catch (e) { + throw this._formatSchemaError(e, fileName); + } + } + + _formatSchemaError(e, fileName) { + const valuePathString = this._getSchemaErrorPathString(e.info.valuePath, 'dictionary'); + const schemaPathString = this._getSchemaErrorPathString(e.info.schemaPath, 'schema'); + + const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); + e2.data = e; + + return e2; + } + + _getSchemaErrorPathString(infoList, base='') { + let result = base; + for (const [part] of infoList) { + switch (typeof part) { + case 'string': + if (result.length > 0) { + result += '.'; + } + result += part; + break; + case 'number': + result += `[${part}]`; + break; + } + } + return result; + } + + _getDataBankSchemaPaths(version) { + const termBank = ( + version === 1 ? + '/bg/data/dictionary-term-bank-v1-schema.json' : + '/bg/data/dictionary-term-bank-v3-schema.json' + ); + const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json'; + const kanjiBank = ( + version === 1 ? + '/bg/data/dictionary-kanji-bank-v1-schema.json' : + '/bg/data/dictionary-kanji-bank-v3-schema.json' + ); + const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json'; + const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json'; + + return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; + } +} diff --git a/ext/bg/js/dictionary.js b/ext/bg/js/dictionary.js index 3dd1d0c1..74bd5a64 100644 --- a/ext/bg/js/dictionary.js +++ b/ext/bg/js/dictionary.js @@ -137,30 +137,6 @@ function dictTermsGroup(definitions, dictionaries) { return dictTermsSort(results); } -function dictAreSetsEqual(set1, set2) { - if (set1.size !== set2.size) { - return false; - } - - for (const value of set1) { - if (!set2.has(value)) { - return false; - } - } - - return true; -} - -function dictGetSetIntersection(set1, set2) { - const result = []; - for (const value of set1) { - if (set2.has(value)) { - result.push(value); - } - } - return result; -} - function dictTermsMergeBySequence(definitions, mainDictionary) { const sequencedDefinitions = new Map(); const nonSequencedDefinitions = []; @@ -281,11 +257,11 @@ function dictTermsMergeByGloss(result, definitions, appendTo=null, mergedIndices const only = []; const expressionSet = definition.expression; const readingSet = definition.reading; - if (!dictAreSetsEqual(expressionSet, resultExpressionSet)) { - only.push(...dictGetSetIntersection(expressionSet, resultExpressionSet)); + if (!areSetsEqual(expressionSet, resultExpressionSet)) { + only.push(...getSetIntersection(expressionSet, resultExpressionSet)); } - if (!dictAreSetsEqual(readingSet, resultReadingSet)) { - only.push(...dictGetSetIntersection(readingSet, resultReadingSet)); + if (!areSetsEqual(readingSet, resultReadingSet)) { + only.push(...getSetIntersection(readingSet, resultReadingSet)); } definition.only = only; } diff --git a/ext/bg/js/handlebars.js b/ext/bg/js/handlebars.js index e3ce6bd0..5fda5baa 100644 --- a/ext/bg/js/handlebars.js +++ b/ext/bg/js/handlebars.js @@ -18,8 +18,7 @@ /* global * Handlebars - * jpDistributeFurigana - * jpIsCodePointKanji + * jp */ function handlebarsEscape(text) { @@ -33,7 +32,7 @@ function handlebarsDumpObject(options) { function handlebarsFurigana(options) { const definition = options.fn(this); - const segs = jpDistributeFurigana(definition.expression, definition.reading); + const segs = jp.distributeFurigana(definition.expression, definition.reading); let result = ''; for (const seg of segs) { @@ -49,7 +48,7 @@ function handlebarsFurigana(options) { function handlebarsFuriganaPlain(options) { const definition = options.fn(this); - const segs = jpDistributeFurigana(definition.expression, definition.reading); + const segs = jp.distributeFurigana(definition.expression, definition.reading); let result = ''; for (const seg of segs) { @@ -66,7 +65,7 @@ function handlebarsFuriganaPlain(options) { function handlebarsKanjiLinks(options) { let result = ''; for (const c of options.fn(this)) { - if (jpIsCodePointKanji(c.codePointAt(0))) { + if (jp.isCodePointKanji(c.codePointAt(0))) { result += `<a href="#" class="kanji-link">${c}</a>`; } else { result += c; diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 3b37754d..2a2b39fd 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -17,442 +17,373 @@ */ /* global + * jp * wanakana */ -const JP_HALFWIDTH_KATAKANA_MAPPING = new Map([ - ['ヲ', 'ヲヺ-'], - ['ァ', 'ァ--'], - ['ィ', 'ィ--'], - ['ゥ', 'ゥ--'], - ['ェ', 'ェ--'], - ['ォ', 'ォ--'], - ['ャ', 'ャ--'], - ['ュ', 'ュ--'], - ['ョ', 'ョ--'], - ['ッ', 'ッ--'], - ['ー', 'ー--'], - ['ア', 'ア--'], - ['イ', 'イ--'], - ['ウ', 'ウヴ-'], - ['エ', 'エ--'], - ['オ', 'オ--'], - ['カ', 'カガ-'], - ['キ', 'キギ-'], - ['ク', 'クグ-'], - ['ケ', 'ケゲ-'], - ['コ', 'コゴ-'], - ['サ', 'サザ-'], - ['シ', 'シジ-'], - ['ス', 'スズ-'], - ['セ', 'セゼ-'], - ['ソ', 'ソゾ-'], - ['タ', 'タダ-'], - ['チ', 'チヂ-'], - ['ツ', 'ツヅ-'], - ['テ', 'テデ-'], - ['ト', 'トド-'], - ['ナ', 'ナ--'], - ['ニ', 'ニ--'], - ['ヌ', 'ヌ--'], - ['ネ', 'ネ--'], - ['ノ', 'ノ--'], - ['ハ', 'ハバパ'], - ['ヒ', 'ヒビピ'], - ['フ', 'フブプ'], - ['ヘ', 'ヘベペ'], - ['ホ', 'ホボポ'], - ['マ', 'マ--'], - ['ミ', 'ミ--'], - ['ム', 'ム--'], - ['メ', 'メ--'], - ['モ', 'モ--'], - ['ヤ', 'ヤ--'], - ['ユ', 'ユ--'], - ['ヨ', 'ヨ--'], - ['ラ', 'ラ--'], - ['リ', 'リ--'], - ['ル', 'ル--'], - ['レ', 'レ--'], - ['ロ', 'ロ--'], - ['ワ', 'ワ--'], - ['ン', 'ン--'] -]); - -const JP_HIRAGANA_RANGE = [0x3040, 0x309f]; -const JP_KATAKANA_RANGE = [0x30a0, 0x30ff]; -const JP_KANA_RANGES = [JP_HIRAGANA_RANGE, JP_KATAKANA_RANGE]; - -const JP_CJK_COMMON_RANGE = [0x4e00, 0x9fff]; -const JP_CJK_RARE_RANGE = [0x3400, 0x4dbf]; -const JP_CJK_RANGES = [JP_CJK_COMMON_RANGE, JP_CJK_RARE_RANGE]; - -const JP_ITERATION_MARK_CHAR_CODE = 0x3005; - -// Japanese character ranges, roughly ordered in order of expected frequency -const JP_JAPANESE_RANGES = [ - JP_HIRAGANA_RANGE, - JP_KATAKANA_RANGE, - - JP_CJK_COMMON_RANGE, - JP_CJK_RARE_RANGE, - - [0xff66, 0xff9f], // Halfwidth katakana - - [0x30fb, 0x30fc], // Katakana punctuation - [0xff61, 0xff65], // Kana punctuation - [0x3000, 0x303f], // CJK punctuation - - [0xff10, 0xff19], // Fullwidth numbers - [0xff21, 0xff3a], // Fullwidth upper case Latin letters - [0xff41, 0xff5a], // Fullwidth lower case Latin letters - - [0xff01, 0xff0f], // Fullwidth punctuation 1 - [0xff1a, 0xff1f], // Fullwidth punctuation 2 - [0xff3b, 0xff3f], // Fullwidth punctuation 3 - [0xff5b, 0xff60], // Fullwidth punctuation 4 - [0xffe0, 0xffee] // Currency markers -]; - - -// Helper functions - -function _jpIsCodePointInRanges(codePoint, ranges) { - for (const [min, max] of ranges) { - if (codePoint >= min && codePoint <= max) { - return true; +(() => { + const HALFWIDTH_KATAKANA_MAPPING = new Map([ + ['ヲ', 'ヲヺ-'], + ['ァ', 'ァ--'], + ['ィ', 'ィ--'], + ['ゥ', 'ゥ--'], + ['ェ', 'ェ--'], + ['ォ', 'ォ--'], + ['ャ', 'ャ--'], + ['ュ', 'ュ--'], + ['ョ', 'ョ--'], + ['ッ', 'ッ--'], + ['ー', 'ー--'], + ['ア', 'ア--'], + ['イ', 'イ--'], + ['ウ', 'ウヴ-'], + ['エ', 'エ--'], + ['オ', 'オ--'], + ['カ', 'カガ-'], + ['キ', 'キギ-'], + ['ク', 'クグ-'], + ['ケ', 'ケゲ-'], + ['コ', 'コゴ-'], + ['サ', 'サザ-'], + ['シ', 'シジ-'], + ['ス', 'スズ-'], + ['セ', 'セゼ-'], + ['ソ', 'ソゾ-'], + ['タ', 'タダ-'], + ['チ', 'チヂ-'], + ['ツ', 'ツヅ-'], + ['テ', 'テデ-'], + ['ト', 'トド-'], + ['ナ', 'ナ--'], + ['ニ', 'ニ--'], + ['ヌ', 'ヌ--'], + ['ネ', 'ネ--'], + ['ノ', 'ノ--'], + ['ハ', 'ハバパ'], + ['ヒ', 'ヒビピ'], + ['フ', 'フブプ'], + ['ヘ', 'ヘベペ'], + ['ホ', 'ホボポ'], + ['マ', 'マ--'], + ['ミ', 'ミ--'], + ['ム', 'ム--'], + ['メ', 'メ--'], + ['モ', 'モ--'], + ['ヤ', 'ヤ--'], + ['ユ', 'ユ--'], + ['ヨ', 'ヨ--'], + ['ラ', 'ラ--'], + ['リ', 'リ--'], + ['ル', 'ル--'], + ['レ', 'レ--'], + ['ロ', 'ロ--'], + ['ワ', 'ワ--'], + ['ン', 'ン--'] + ]); + + const ITERATION_MARK_CODE_POINT = 0x3005; + + + // Existing functions + + const isCodePointKanji = jp.isCodePointKanji; + const isStringEntirelyKana = jp.isStringEntirelyKana; + + + // Conversion functions + + function convertKatakanaToHiragana(text) { + let result = ''; + for (const c of text) { + if (wanakana.isKatakana(c)) { + result += wanakana.toHiragana(c); + } else { + result += c; + } } - } - return false; -} - - -// Character code testing functions -function jpIsCodePointKanji(codePoint) { - return _jpIsCodePointInRanges(codePoint, JP_CJK_RANGES); -} - -function jpIsCodePointKana(codePoint) { - return _jpIsCodePointInRanges(codePoint, JP_KANA_RANGES); -} + return result; + } -function jpIsCodePointJapanese(codePoint) { - return _jpIsCodePointInRanges(codePoint, JP_JAPANESE_RANGES); -} + function convertHiraganaToKatakana(text) { + let result = ''; + for (const c of text) { + if (wanakana.isHiragana(c)) { + result += wanakana.toKatakana(c); + } else { + result += c; + } + } + return result; + } -// String testing functions + function convertToRomaji(text) { + return wanakana.toRomaji(text); + } -function jpIsStringEntirelyKana(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (!jpIsCodePointKana(c.codePointAt(0))) { - return false; + function convertReading(expressionFragment, readingFragment, readingMode) { + switch (readingMode) { + case 'hiragana': + return convertKatakanaToHiragana(readingFragment || ''); + case 'katakana': + return convertHiraganaToKatakana(readingFragment || ''); + case 'romaji': + if (readingFragment) { + return convertToRomaji(readingFragment); + } else { + if (isStringEntirelyKana(expressionFragment)) { + return convertToRomaji(expressionFragment); + } + } + return readingFragment; + case 'none': + return null; + default: + return readingFragment; } } - return true; -} - -function jpIsStringPartiallyJapanese(str) { - if (str.length === 0) { return false; } - for (const c of str) { - if (jpIsCodePointJapanese(c.codePointAt(0))) { - return true; + + function convertNumericToFullWidth(text) { + let result = ''; + for (const char of text) { + let c = char.codePointAt(0); + if (c >= 0x30 && c <= 0x39) { // ['0', '9'] + c += 0xff10 - 0x30; // 0xff10 = '0' full width + result += String.fromCodePoint(c); + } else { + result += char; + } } + return result; } - return false; -} + function convertHalfWidthKanaToFullWidth(text, sourceMap=null) { + let result = ''; + + // This function is safe to use charCodeAt instead of codePointAt, since all + // the relevant characters are represented with a single UTF-16 character code. + for (let i = 0, ii = text.length; i < ii; ++i) { + const c = text[i]; + const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c); + if (typeof mapping !== 'string') { + result += c; + continue; + } -// Conversion functions - -function jpKatakanaToHiragana(text) { - let result = ''; - for (const c of text) { - if (wanakana.isKatakana(c)) { - result += wanakana.toHiragana(c); - } else { - result += c; - } - } + let index = 0; + switch (text.charCodeAt(i + 1)) { + case 0xff9e: // dakuten + index = 1; + break; + case 0xff9f: // handakuten + index = 2; + break; + } - return result; -} + let c2 = mapping[index]; + if (index > 0) { + if (c2 === '-') { // invalid + index = 0; + c2 = mapping[0]; + } else { + ++i; + } + } -function jpHiraganaToKatakana(text) { - let result = ''; - for (const c of text) { - if (wanakana.isHiragana(c)) { - result += wanakana.toKatakana(c); - } else { - result += c; + if (sourceMap !== null && index > 0) { + sourceMap.combine(result.length, 1); + } + result += c2; } + + return result; } - return result; -} - -function jpToRomaji(text) { - return wanakana.toRomaji(text); -} - -function jpConvertReading(expressionFragment, readingFragment, readingMode) { - switch (readingMode) { - case 'hiragana': - return jpKatakanaToHiragana(readingFragment || ''); - case 'katakana': - return jpHiraganaToKatakana(readingFragment || ''); - case 'romaji': - if (readingFragment) { - return jpToRomaji(readingFragment); + function convertAlphabeticToKana(text, sourceMap=null) { + let part = ''; + let result = ''; + + for (const char of text) { + // Note: 0x61 is the character code for 'a' + let c = char.codePointAt(0); + if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] + c += (0x61 - 0x41); + } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] + // NOP; c += (0x61 - 0x61); + } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth + c += (0x61 - 0xff21); + } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth + c += (0x61 - 0xff41); + } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash + c = 0x2d; // '-' } else { - if (jpIsStringEntirelyKana(expressionFragment)) { - return jpToRomaji(expressionFragment); + if (part.length > 0) { + result += convertAlphabeticPartToKana(part, sourceMap, result.length); + part = ''; } + result += char; + continue; } - return readingFragment; - case 'none': - return null; - default: - return readingFragment; - } -} - -function jpDistributeFurigana(expression, reading) { - const fallback = [{furigana: reading, text: expression}]; - if (!reading) { - return fallback; - } + part += String.fromCodePoint(c); + } - let isAmbiguous = false; - const segmentize = (reading2, groups) => { - if (groups.length === 0 || isAmbiguous) { - return []; + if (part.length > 0) { + result += convertAlphabeticPartToKana(part, sourceMap, result.length); } + return result; + } - const group = groups[0]; - if (group.mode === 'kana') { - if (jpKatakanaToHiragana(reading2).startsWith(jpKatakanaToHiragana(group.text))) { - const readingLeft = reading2.substring(group.text.length); - const segs = segmentize(readingLeft, groups.splice(1)); - if (segs) { - return [{text: group.text}].concat(segs); - } - } - } else { - let foundSegments = null; - for (let i = reading2.length; i >= group.text.length; --i) { - const readingUsed = reading2.substring(0, i); - const readingLeft = reading2.substring(i); - const segs = segmentize(readingLeft, groups.slice(1)); - if (segs) { - if (foundSegments !== null) { - // more than one way to segmentize the tail, mark as ambiguous - isAmbiguous = true; - return null; + function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) { + const result = wanakana.toHiragana(text); + + // Generate source mapping + if (sourceMap !== null) { + let i = 0; + let resultPos = 0; + const ii = text.length; + while (i < ii) { + // Find smallest matching substring + let iNext = i + 1; + let resultPosNext = result.length; + while (iNext < ii) { + const t = wanakana.toHiragana(text.substring(0, iNext)); + if (t === result.substring(0, t.length)) { + resultPosNext = t.length; + break; } - foundSegments = [{text: group.text, furigana: readingUsed}].concat(segs); + ++iNext; } - // there is only one way to segmentize the last non-kana group - if (groups.length === 1) { - break; + + // Merge characters + const removals = iNext - i - 1; + if (removals > 0) { + sourceMap.combine(sourceMapStart, removals); + } + ++sourceMapStart; + + // Empty elements + const additions = resultPosNext - resultPos - 1; + for (let j = 0; j < additions; ++j) { + sourceMap.insert(sourceMapStart, 0); + ++sourceMapStart; } + + i = iNext; + resultPos = resultPosNext; } - return foundSegments; - } - }; - - const groups = []; - let modePrev = null; - for (const c of expression) { - const codePoint = c.codePointAt(0); - const modeCurr = jpIsCodePointKanji(codePoint) || codePoint === JP_ITERATION_MARK_CHAR_CODE ? 'kanji' : 'kana'; - if (modeCurr === modePrev) { - groups[groups.length - 1].text += c; - } else { - groups.push({mode: modeCurr, text: c}); - modePrev = modeCurr; } - } - const segments = segmentize(reading, groups); - if (segments && !isAmbiguous) { - return segments; - } - return fallback; -} - -function jpDistributeFuriganaInflected(expression, reading, source) { - const output = []; - - let stemLength = 0; - const shortest = Math.min(source.length, expression.length); - const sourceHiragana = jpKatakanaToHiragana(source); - const expressionHiragana = jpKatakanaToHiragana(expression); - while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) { - ++stemLength; - } - const offset = source.length - stemLength; - - const stemExpression = source.substring(0, source.length - offset); - const stemReading = reading.substring( - 0, - offset === 0 ? reading.length : reading.length - expression.length + stemLength - ); - for (const segment of jpDistributeFurigana(stemExpression, stemReading)) { - output.push(segment); + return result; } - if (stemLength !== source.length) { - output.push({text: source.substring(stemLength)}); - } - return output; -} - -function jpConvertHalfWidthKanaToFullWidth(text, sourceMapping) { - let result = ''; - const hasSourceMapping = Array.isArray(sourceMapping); - - // This function is safe to use charCodeAt instead of codePointAt, since all - // the relevant characters are represented with a single UTF-16 character code. - for (let i = 0, ii = text.length; i < ii; ++i) { - const c = text[i]; - const mapping = JP_HALFWIDTH_KATAKANA_MAPPING.get(c); - if (typeof mapping !== 'string') { - result += c; - continue; - } + // Furigana distribution - let index = 0; - switch (text.charCodeAt(i + 1)) { - case 0xff9e: // dakuten - index = 1; - break; - case 0xff9f: // handakuten - index = 2; - break; + function distributeFurigana(expression, reading) { + const fallback = [{furigana: reading, text: expression}]; + if (!reading) { + return fallback; } - let c2 = mapping[index]; - if (index > 0) { - if (c2 === '-') { // invalid - index = 0; - c2 = mapping[0]; - } else { - ++i; + let isAmbiguous = false; + const segmentize = (reading2, groups) => { + if (groups.length === 0 || isAmbiguous) { + return []; } - } - if (hasSourceMapping && index > 0) { - index = result.length; - const v = sourceMapping.splice(index + 1, 1)[0]; - sourceMapping[index] += v; + const group = groups[0]; + if (group.mode === 'kana') { + if (convertKatakanaToHiragana(reading2).startsWith(convertKatakanaToHiragana(group.text))) { + const readingLeft = reading2.substring(group.text.length); + const segs = segmentize(readingLeft, groups.splice(1)); + if (segs) { + return [{text: group.text}].concat(segs); + } + } + } else { + let foundSegments = null; + for (let i = reading2.length; i >= group.text.length; --i) { + const readingUsed = reading2.substring(0, i); + const readingLeft = reading2.substring(i); + const segs = segmentize(readingLeft, groups.slice(1)); + if (segs) { + if (foundSegments !== null) { + // more than one way to segmentize the tail, mark as ambiguous + isAmbiguous = true; + return null; + } + foundSegments = [{text: group.text, furigana: readingUsed}].concat(segs); + } + // there is only one way to segmentize the last non-kana group + if (groups.length === 1) { + break; + } + } + return foundSegments; + } + }; + + const groups = []; + let modePrev = null; + for (const c of expression) { + const codePoint = c.codePointAt(0); + const modeCurr = isCodePointKanji(codePoint) || codePoint === ITERATION_MARK_CODE_POINT ? 'kanji' : 'kana'; + if (modeCurr === modePrev) { + groups[groups.length - 1].text += c; + } else { + groups.push({mode: modeCurr, text: c}); + modePrev = modeCurr; + } } - result += c2; - } - return result; -} - -function jpConvertNumericTofullWidth(text) { - let result = ''; - for (const char of text) { - let c = char.codePointAt(0); - if (c >= 0x30 && c <= 0x39) { // ['0', '9'] - c += 0xff10 - 0x30; // 0xff10 = '0' full width - result += String.fromCodePoint(c); - } else { - result += char; + const segments = segmentize(reading, groups); + if (segments && !isAmbiguous) { + return segments; } + return fallback; } - return result; -} -function jpConvertAlphabeticToKana(text, sourceMapping) { - let part = ''; - let result = ''; - const ii = text.length; + function distributeFuriganaInflected(expression, reading, source) { + const output = []; - if (sourceMapping.length === ii) { - sourceMapping.length = ii; - sourceMapping.fill(1); - } + let stemLength = 0; + const shortest = Math.min(source.length, expression.length); + const sourceHiragana = convertKatakanaToHiragana(source); + const expressionHiragana = convertKatakanaToHiragana(expression); + while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) { + ++stemLength; + } + const offset = source.length - stemLength; + + const stemExpression = source.substring(0, source.length - offset); + const stemReading = reading.substring( + 0, + offset === 0 ? reading.length : reading.length - expression.length + stemLength + ); + for (const segment of distributeFurigana(stemExpression, stemReading)) { + output.push(segment); + } - for (const char of text) { - // Note: 0x61 is the character code for 'a' - let c = char.codePointAt(0); - if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] - c += (0x61 - 0x41); - } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] - // NOP; c += (0x61 - 0x61); - } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth - c += (0x61 - 0xff21); - } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth - c += (0x61 - 0xff41); - } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash - c = 0x2d; // '-' - } else { - if (part.length > 0) { - result += jpToHiragana(part, sourceMapping, result.length); - part = ''; - } - result += char; - continue; + if (stemLength !== source.length) { + output.push({text: source.substring(stemLength)}); } - part += String.fromCodePoint(c); - } - if (part.length > 0) { - result += jpToHiragana(part, sourceMapping, result.length); + return output; } - return result; -} - -function jpToHiragana(text, sourceMapping, sourceMappingStart) { - const result = wanakana.toHiragana(text); - - // Generate source mapping - if (Array.isArray(sourceMapping)) { - if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; } - let i = 0; - let resultPos = 0; - const ii = text.length; - while (i < ii) { - // Find smallest matching substring - let iNext = i + 1; - let resultPosNext = result.length; - while (iNext < ii) { - const t = wanakana.toHiragana(text.substring(0, iNext)); - if (t === result.substring(0, t.length)) { - resultPosNext = t.length; - break; - } - ++iNext; - } - - // Merge characters - const removals = iNext - i - 1; - if (removals > 0) { - let sum = 0; - const vs = sourceMapping.splice(sourceMappingStart + 1, removals); - for (const v of vs) { sum += v; } - sourceMapping[sourceMappingStart] += sum; - } - ++sourceMappingStart; - // Empty elements - const additions = resultPosNext - resultPos - 1; - for (let j = 0; j < additions; ++j) { - sourceMapping.splice(sourceMappingStart, 0, 0); - ++sourceMappingStart; - } - - i = iNext; - resultPos = resultPosNext; - } - } - return result; -} + // Exports + + Object.assign(jp, { + convertKatakanaToHiragana, + convertHiraganaToKatakana, + convertToRomaji, + convertReading, + convertNumericToFullWidth, + convertHalfWidthKanaToFullWidth, + convertAlphabeticToKana, + distributeFurigana, + distributeFuriganaInflected + }); +})(); diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index bd0bbe0e..abb054d4 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -91,6 +91,15 @@ const profileOptionsVersionUpdates = [ if (utilStringHashCode(options.anki.fieldTemplates) === 1444379824) { options.anki.fieldTemplates = null; } + }, + (options) => { + // Version 13 changes: + // Default anki field tempaltes updated to include {document-title}. + let fieldTemplates = options.anki.fieldTemplates; + if (typeof fieldTemplates === 'string') { + fieldTemplates += '\n\n{{#*inline "document-title"}}\n {{~context.document.title~}}\n{{/inline}}'; + options.anki.fieldTemplates = fieldTemplates; + } } ]; @@ -124,7 +133,11 @@ function profileOptionsCreateDefaults() { customPopupCss: '', customPopupOuterCss: '', enableWanakana: true, - enableClipboardMonitor: false + enableClipboardMonitor: false, + showPitchAccentDownstepNotation: true, + showPitchAccentPositionNotation: true, + showPitchAccentGraph: false, + showIframePopupsInRootFrame: false }, audio: { diff --git a/ext/bg/js/search-frontend.js b/ext/bg/js/search-frontend.js index a470e873..f130a6fa 100644 --- a/ext/bg/js/search-frontend.js +++ b/ext/bg/js/search-frontend.js @@ -30,16 +30,12 @@ async function searchFrontendSetup() { const options = await apiOptionsGet(optionsContext); if (!options.scanning.enableOnSearchPage) { return; } - const ignoreNodes = ['.scan-disable', '.scan-disable *']; - if (!options.scanning.enableOnPopupExpressions) { - ignoreNodes.push('.source-text', '.source-text *'); - } - - window.frontendInitializationData = {depth: 1, ignoreNodes, proxy: false}; + window.frontendInitializationData = {depth: 1, proxy: false}; const scriptSrcs = [ '/mixed/js/text-scanner.js', '/fg/js/frontend-api-receiver.js', + '/fg/js/frame-offset-forwarder.js', '/fg/js/popup.js', '/fg/js/popup-proxy-host.js', '/fg/js/frontend.js', diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 06316ce2..9f59f2e5 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -27,12 +27,14 @@ */ class QueryParser extends TextScanner { - constructor(search) { - super(document.querySelector('#query-parser-content'), [], [], []); - this.search = search; + constructor({getOptionsContext, setContent, setSpinnerVisible}) { + super(document.querySelector('#query-parser-content'), [], []); + + this.getOptionsContext = getOptionsContext; + this.setContent = setContent; + this.setSpinnerVisible = setSpinnerVisible; this.parseResults = []; - this.selectedParser = null; this.queryParser = document.querySelector('#query-parser-content'); this.queryParserSelect = document.querySelector('#query-parser-select-container'); @@ -56,18 +58,18 @@ class QueryParser extends TextScanner { async onSearchSource(textSource, cause) { if (textSource === null) { return null; } - this.setTextSourceScanLength(textSource, this.search.options.scanning.length); + this.setTextSourceScanLength(textSource, this.options.scanning.length); const searchText = textSource.text(); if (searchText.length === 0) { return; } - const {definitions, length} = await apiTermsFind(searchText, {}, this.search.getOptionsContext()); + const {definitions, length} = await apiTermsFind(searchText, {}, this.getOptionsContext()); if (definitions.length === 0) { return null; } - const sentence = docSentenceExtract(textSource, this.search.options.anki.sentenceExt); + const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt); textSource.setEndOffset(length); - this.search.setContent('terms', {definitions, context: { + this.setContent('terms', {definitions, context: { focus: false, disableHistory: cause === 'mouse', sentence, @@ -79,9 +81,7 @@ class QueryParser extends TextScanner { onParserChange(e) { const selectedParser = e.target.value; - this.selectedParser = selectedParser; - apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); - this.renderParseResult(); + apiOptionsSet({parsing: {selectedParser}}, this.getOptionsContext()); } getMouseEventListeners() { @@ -112,23 +112,20 @@ class QueryParser extends TextScanner { refreshSelectedParser() { if (this.parseResults.length > 0) { - if (this.selectedParser === null) { - this.selectedParser = this.search.options.parsing.selectedParser; - } - if (this.selectedParser === null || !this.getParseResult()) { + if (!this.getParseResult()) { const selectedParser = this.parseResults[0].id; - this.selectedParser = selectedParser; - apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); + apiOptionsSet({parsing: {selectedParser}}, this.getOptionsContext()); } } } getParseResult() { - return this.parseResults.find((r) => r.id === this.selectedParser); + const {selectedParser} = this.options.parsing; + return this.parseResults.find((r) => r.id === selectedParser); } async setText(text) { - this.search.setSpinnerVisible(true); + this.setSpinnerVisible(true); this.setPreview(text); @@ -138,20 +135,20 @@ class QueryParser extends TextScanner { this.renderParserSelect(); this.renderParseResult(); - this.search.setSpinnerVisible(false); + this.setSpinnerVisible(false); } async parseText(text) { const results = []; - if (this.search.options.parsing.enableScanningParser) { + if (this.options.parsing.enableScanningParser) { results.push({ name: 'Scanning parser', id: 'scan', - parsedText: await apiTextParse(text, this.search.getOptionsContext()) + parsedText: await apiTextParse(text, this.getOptionsContext()) }); } - if (this.search.options.parsing.enableMecabParser) { - const mecabResults = await apiTextParseMecab(text, this.search.getOptionsContext()); + if (this.options.parsing.enableMecabParser) { + const mecabResults = await apiTextParseMecab(text, this.getOptionsContext()); for (const [mecabDictName, mecabDictResults] of mecabResults) { results.push({ name: `MeCab: ${mecabDictName}`, @@ -176,7 +173,8 @@ class QueryParser extends TextScanner { renderParserSelect() { this.queryParserSelect.textContent = ''; if (this.parseResults.length > 1) { - const select = this.queryParserGenerator.createParserSelect(this.parseResults, this.selectedParser); + const {selectedParser} = this.options.parsing; + const select = this.queryParserGenerator.createParserSelect(this.parseResults, selectedParser); select.addEventListener('change', this.onParserChange.bind(this)); this.queryParserSelect.appendChild(select); } diff --git a/ext/bg/js/search.js b/ext/bg/js/search.js index e2bdff73..9250fdde 100644 --- a/ext/bg/js/search.js +++ b/ext/bg/js/search.js @@ -29,12 +29,18 @@ class DisplaySearch extends Display { constructor() { super(document.querySelector('#spinner'), document.querySelector('#content')); + this._isPrepared = false; + this.optionsContext = { depth: 0, url: window.location.href }; - this.queryParser = new QueryParser(this); + this.queryParser = new QueryParser({ + getOptionsContext: this.getOptionsContext.bind(this), + setContent: this.setContent.bind(this), + setSpinnerVisible: this.setSpinnerVisible.bind(this) + }); this.search = document.querySelector('#search'); this.query = document.querySelector('#query'); @@ -112,6 +118,8 @@ class DisplaySearch extends Display { this.clipboardMonitor.on('change', this.onExternalSearchUpdate.bind(this)); this.updateSearchButton(); + + this._isPrepared = true; } catch (e) { this.onError(e); } @@ -247,15 +255,12 @@ class DisplaySearch extends Display { } onWanakanaEnableChange(e) { - const {queryParams: {query=''}} = parseUrl(window.location.href); const enableWanakana = e.target.checked; if (enableWanakana) { window.wanakana.bind(this.query); } else { window.wanakana.unbind(this.query); } - this.setQuery(query); - this.onSearchQueryUpdated(this.query.value, false); apiOptionsSet({general: {enableWanakana}}, this.getOptionsContext()); } @@ -278,19 +283,21 @@ class DisplaySearch extends Display { } } - async updateOptions(options) { - await super.updateOptions(options); + async updateOptions() { + await super.updateOptions(); this.queryParser.setOptions(this.options); + if (!this._isPrepared) { return; } + const query = this.query.value; + if (query) { + this.setQuery(query); + this.onSearchQueryUpdated(query, false); + } } isWanakanaEnabled() { return this.wanakanaEnable !== null && this.wanakanaEnable.checked; } - getOptionsContext() { - return this.optionsContext; - } - setQuery(query) { const interpretedQuery = this.isWanakanaEnabled() ? window.wanakana.toKana(query) : query; this.query.value = interpretedQuery; diff --git a/ext/bg/js/settings/anki-templates.js b/ext/bg/js/settings/anki-templates.js index c5222d30..e3852eb4 100644 --- a/ext/bg/js/settings/anki-templates.js +++ b/ext/bg/js/settings/anki-templates.js @@ -99,10 +99,15 @@ async function ankiTemplatesValidate(infoNode, field, mode, showSuccessResult, i const definition = await ankiTemplatesValidateGetDefinition(text, optionsContext); if (definition !== null) { const options = await apiOptionsGet(optionsContext); + const context = { + document: { + title: document.title + } + }; let templates = options.anki.fieldTemplates; if (typeof templates !== 'string') { templates = await apiGetDefaultAnkiFieldTemplates(); } const ankiNoteBuilder = new AnkiNoteBuilder({renderTemplate: apiTemplateRender}); - result = await ankiNoteBuilder.formatField(field, definition, mode, options, templates, exceptions); + result = await ankiNoteBuilder.formatField(field, definition, mode, context, options, templates, exceptions); } } catch (e) { exceptions.push(e); diff --git a/ext/bg/js/settings/anki.js b/ext/bg/js/settings/anki.js index b706cd1b..f2e1ca76 100644 --- a/ext/bg/js/settings/anki.js +++ b/ext/bg/js/settings/anki.js @@ -243,6 +243,7 @@ function ankiGetFieldMarkers(type) { 'cloze-prefix', 'cloze-suffix', 'dictionary', + 'document-title', 'expression', 'furigana', 'furigana-plain', @@ -258,6 +259,7 @@ function ankiGetFieldMarkers(type) { return [ 'character', 'dictionary', + 'document-title', 'glossary', 'kunyomi', 'onyomi', diff --git a/ext/bg/js/settings/dictionaries.js b/ext/bg/js/settings/dictionaries.js index 5e59cc3d..33ced3b9 100644 --- a/ext/bg/js/settings/dictionaries.js +++ b/ext/bg/js/settings/dictionaries.js @@ -199,11 +199,16 @@ class SettingsDictionaryEntryUI { this.allowSecondarySearchesCheckbox = this.content.querySelector('.dict-allow-secondary-searches'); this.priorityInput = this.content.querySelector('.dict-priority'); this.deleteButton = this.content.querySelector('.dict-delete-button'); + this.detailsToggleLink = this.content.querySelector('.dict-details-toggle-link'); + this.detailsContainer = this.content.querySelector('.dict-details'); + this.detailsTable = this.content.querySelector('.dict-details-table'); if (this.dictionaryInfo.version < 3) { this.content.querySelector('.dict-outdated').hidden = false; } + this.setupDetails(dictionaryInfo); + this.content.querySelector('.dict-title').textContent = this.dictionaryInfo.title; this.content.querySelector('.dict-revision').textContent = `rev.${this.dictionaryInfo.revision}`; this.content.querySelector('.dict-prefix-wildcard-searches-supported').checked = !!this.dictionaryInfo.prefixWildcardsSupported; @@ -214,6 +219,45 @@ class SettingsDictionaryEntryUI { this.eventListeners.addEventListener(this.allowSecondarySearchesCheckbox, 'change', this.onAllowSecondarySearchesChanged.bind(this), false); this.eventListeners.addEventListener(this.priorityInput, 'change', this.onPriorityChanged.bind(this), false); this.eventListeners.addEventListener(this.deleteButton, 'click', this.onDeleteButtonClicked.bind(this), false); + this.eventListeners.addEventListener(this.detailsToggleLink, 'click', this.onDetailsToggleLinkClicked.bind(this), false); + } + + setupDetails(dictionaryInfo) { + const targets = [ + ['Author', 'author'], + ['URL', 'url'], + ['Description', 'description'], + ['Attribution', 'attribution'] + ]; + + let count = 0; + for (const [label, key] of targets) { + const info = dictionaryInfo[key]; + if (typeof info !== 'string') { continue; } + + const n1 = document.createElement('div'); + n1.className = 'dict-details-entry'; + n1.dataset.type = key; + + const n2 = document.createElement('span'); + n2.className = 'dict-details-entry-label'; + n2.textContent = `${label}:`; + n1.appendChild(n2); + + const n3 = document.createElement('span'); + n3.className = 'dict-details-entry-info'; + n3.textContent = info; + n1.appendChild(n3); + + this.detailsTable.appendChild(n1); + + ++count; + } + + if (count === 0) { + this.detailsContainer.hidden = true; + this.detailsToggleLink.hidden = true; + } } cleanup() { @@ -318,6 +362,12 @@ class SettingsDictionaryEntryUI { document.querySelector('#dict-remove-modal-dict-name').textContent = title; $(n).modal('show'); } + + onDetailsToggleLinkClicked(e) { + e.preventDefault(); + + this.detailsContainer.hidden = !this.detailsContainer.hidden; + } } class SettingsDictionaryExtraUI { @@ -505,7 +555,7 @@ function dictionaryErrorsShow(errors) { if (errors !== null && errors.length > 0) { const uniqueErrors = new Map(); for (let e of errors) { - console.error(e); + logError(e); e = dictionaryErrorToString(e); let count = uniqueErrors.get(e); if (typeof count === 'undefined') { @@ -643,9 +693,9 @@ async function onDictionaryImport(e) { await settingsSaveOptions(); if (errors.length > 0) { - errors.push(...errors); - errors.push(`Dictionary may not have been imported properly: ${errors.length} error${errors.length === 1 ? '' : 's'} reported.`); - dictionaryErrorsShow(errors); + const errors2 = errors.map((error) => jsonToError(error)); + errors2.push(`Dictionary may not have been imported properly: ${errors2.length} error${errors2.length === 1 ? '' : 's'} reported.`); + dictionaryErrorsShow(errors2); } onDatabaseUpdated(); diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js index ebc443df..1653ee35 100644 --- a/ext/bg/js/settings/main.js +++ b/ext/bg/js/settings/main.js @@ -84,6 +84,10 @@ async function formRead(options) { options.general.popupScalingFactor = parseFloat($('#popup-scaling-factor').val()); options.general.popupScaleRelativeToPageZoom = $('#popup-scale-relative-to-page-zoom').prop('checked'); options.general.popupScaleRelativeToVisualViewport = $('#popup-scale-relative-to-visual-viewport').prop('checked'); + options.general.showPitchAccentDownstepNotation = $('#show-pitch-accent-downstep-notation').prop('checked'); + options.general.showPitchAccentPositionNotation = $('#show-pitch-accent-position-notation').prop('checked'); + options.general.showPitchAccentGraph = $('#show-pitch-accent-graph').prop('checked'); + options.general.showIframePopupsInRootFrame = $('#show-iframe-popups-in-root-frame').prop('checked'); options.general.popupTheme = $('#popup-theme').val(); options.general.popupOuterTheme = $('#popup-outer-theme').val(); options.general.customPopupCss = $('#custom-popup-css').val(); @@ -161,6 +165,10 @@ async function formWrite(options) { $('#popup-scaling-factor').val(options.general.popupScalingFactor); $('#popup-scale-relative-to-page-zoom').prop('checked', options.general.popupScaleRelativeToPageZoom); $('#popup-scale-relative-to-visual-viewport').prop('checked', options.general.popupScaleRelativeToVisualViewport); + $('#show-pitch-accent-downstep-notation').prop('checked', options.general.showPitchAccentDownstepNotation); + $('#show-pitch-accent-position-notation').prop('checked', options.general.showPitchAccentPositionNotation); + $('#show-pitch-accent-graph').prop('checked', options.general.showPitchAccentGraph); + $('#show-iframe-popups-in-root-frame').prop('checked', options.general.showIframePopupsInRootFrame); $('#popup-theme').val(options.general.popupTheme); $('#popup-outer-theme').val(options.general.popupOuterTheme); $('#custom-popup-css').val(options.general.customPopupCss); diff --git a/ext/bg/js/text-source-map.js b/ext/bg/js/text-source-map.js new file mode 100644 index 00000000..24970978 --- /dev/null +++ b/ext/bg/js/text-source-map.js @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +class TextSourceMap { + constructor(source, mapping=null) { + this._source = source; + this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null); + } + + get source() { + return this._source; + } + + equals(other) { + if (this === other) { + return true; + } + + const source = this._source; + if (!(other instanceof TextSourceMap && source === other._source)) { + return false; + } + + let mapping = this._mapping; + let otherMapping = other._mapping; + if (mapping === null) { + if (otherMapping === null) { + return true; + } + mapping = TextSourceMap._createMapping(source); + } else if (otherMapping === null) { + otherMapping = TextSourceMap._createMapping(source); + } + + const mappingLength = mapping.length; + if (mappingLength !== otherMapping.length) { + return false; + } + + for (let i = 0; i < mappingLength; ++i) { + if (mapping[i] !== otherMapping[i]) { + return false; + } + } + + return true; + } + + getSourceLength(finalLength) { + const mapping = this._mapping; + if (mapping === null) { + return finalLength; + } + + let sourceLength = 0; + for (let i = 0; i < finalLength; ++i) { + sourceLength += mapping[i]; + } + return sourceLength; + } + + combine(index, count) { + if (count <= 0) { return; } + + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + let sum = this._mapping[index]; + const parts = this._mapping.splice(index + 1, count); + for (const part of parts) { + sum += part; + } + this._mapping[index] = sum; + } + + insert(index, ...items) { + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + this._mapping.splice(index, 0, ...items); + } + + static _createMapping(text) { + return new Array(text.length).fill(1); + } + + static _normalizeMapping(mapping) { + const result = []; + for (const value of mapping) { + result.push( + (typeof value === 'number' && Number.isFinite(value)) ? + Math.floor(value) : + 0 + ); + } + return result; + } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 25da9bf0..27f91c05 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -17,8 +17,8 @@ */ /* global - * Database * Deinflector + * TextSourceMap * dictEnabledSet * dictTagBuildSource * dictTagSanitize @@ -29,34 +29,21 @@ * dictTermsMergeBySequence * dictTermsSort * dictTermsUndupe - * jpConvertAlphabeticToKana - * jpConvertHalfWidthKanaToFullWidth - * jpConvertNumericTofullWidth - * jpDistributeFurigana - * jpHiraganaToKatakana - * jpIsCodePointJapanese - * jpKatakanaToHiragana + * jp * requestJson */ class Translator { - constructor() { - this.database = null; + constructor(database) { + this.database = database; this.deinflector = null; this.tagCache = new Map(); } async prepare() { - if (!this.database) { - this.database = new Database(); - await this.database.prepare(); - } - - if (!this.deinflector) { - const url = chrome.runtime.getURL('/bg/lang/deinflect.json'); - const reasons = await requestJson(url, 'GET'); - this.deinflector = new Deinflector(reasons); - } + const url = chrome.runtime.getURL('/bg/lang/deinflect.json'); + const reasons = await requestJson(url, 'GET'); + this.deinflector = new Deinflector(reasons); } async purgeDatabase() { @@ -275,7 +262,7 @@ class Translator { const termTags = await this.expandTags(definition.termTags, definition.dictionary); const {expression, reading} = definition; - const furiganaSegments = jpDistributeFurigana(expression, reading); + const furiganaSegments = jp.distributeFurigana(expression, reading); definitions.push({ source: deinflection.source, @@ -373,23 +360,21 @@ class Translator { const used = new Set(); for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { let text2 = text; - let sourceMapping = null; + const sourceMap = new TextSourceMap(text2); if (halfWidth) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jpConvertHalfWidthKanaToFullWidth(text2, sourceMapping); + text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap); } if (numeric) { - text2 = jpConvertNumericTofullWidth(text2); + text2 = jp.convertNumericToFullWidth(text2); } if (alphabetic) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jpConvertAlphabeticToKana(text2, sourceMapping); + text2 = jp.convertAlphabeticToKana(text2, sourceMap); } if (katakana) { - text2 = jpHiraganaToKatakana(text2); + text2 = jp.convertHiraganaToKatakana(text2); } if (hiragana) { - text2 = jpKatakanaToHiragana(text2); + text2 = jp.convertKatakanaToHiragana(text2); } for (let i = text2.length; i > 0; --i) { @@ -397,7 +382,7 @@ class Translator { if (used.has(text2Substring)) { break; } used.add(text2Substring); for (const deinflection of this.deinflector.deinflect(text2Substring)) { - deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping); + deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); deinflections.push(deinflection); } } @@ -413,25 +398,6 @@ class Translator { } } - static getDeinflectionRawSource(source, length, sourceMapping) { - if (sourceMapping === null) { - return source.substring(0, length); - } - - let result = ''; - let index = 0; - for (let i = 0; i < length; ++i) { - const c = sourceMapping[i]; - result += source.substring(index, index + c); - index += c; - } - return result; - } - - static createTextSourceMapping(text) { - return new Array(text.length).fill(1); - } - async findKanji(text, options) { const dictionaries = dictEnabledSet(options); const kanjiUnique = new Set(); @@ -496,6 +462,7 @@ class Translator { // New data term.frequencies = []; + term.pitches = []; } const metas = await this.database.findTermMetaBulk(expressionsUnique, dictionaries); @@ -506,6 +473,13 @@ class Translator { term.frequencies.push({expression, frequency: data, dictionary}); } break; + case 'pitch': + for (const term of termsUnique[index]) { + const pitchData = await this.getPitchData(expression, data, dictionary, term); + if (pitchData === null) { continue; } + term.pitches.push(pitchData); + } + break; } } } @@ -589,8 +563,22 @@ class Translator { return tagMetaList; } + async getPitchData(expression, data, dictionary, term) { + const reading = data.reading; + const termReading = term.reading || expression; + if (reading !== termReading) { return null; } + + const pitches = []; + for (let {position, tags} of data.pitches) { + tags = Array.isArray(tags) ? await this.getTagMetaList(tags, dictionary) : []; + pitches.push({position, tags}); + } + + return {reading, pitches, dictionary}; + } + static createExpression(expression, reading, termTags=null, termFrequency=null) { - const furiganaSegments = jpDistributeFurigana(expression, reading); + const furiganaSegments = jp.distributeFurigana(expression, reading); return { expression, reading, @@ -639,7 +627,7 @@ class Translator { if (!options.scanning.alphanumeric) { let newText = ''; for (const c of text) { - if (!jpIsCodePointJapanese(c.codePointAt(0))) { + if (!jp.isCodePointJapanese(c.codePointAt(0))) { break; } newText += c; diff --git a/ext/bg/js/util.js b/ext/bg/js/util.js index 79c6af06..a7ed4a34 100644 --- a/ext/bg/js/util.js +++ b/ext/bg/js/util.js @@ -118,7 +118,7 @@ async function utilDatabaseDeleteDictionary(dictionaryName, onProgress) { async function utilDatabaseImport(data, onProgress, details) { data = await utilReadFile(data); - return utilIsolate(await utilBackend().translator.database.importDictionary( + return utilIsolate(await utilBackend().importDictionary( utilBackgroundIsolate(data), utilBackgroundFunctionIsolate(onProgress), utilBackgroundIsolate(details) |