diff options
author | Alex Yatskov <FooSoft@users.noreply.github.com> | 2019-10-20 11:04:17 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-20 11:04:17 -0700 |
commit | 65923238556212fef2d7ed7a156373c88382ffd2 (patch) | |
tree | 54272c663f1992e4a1117b5e5a62dc3f9f911dcc /ext/bg | |
parent | 03c72e94290fdb1e77f1ca247efb526a0dc9f44e (diff) | |
parent | eea95eb130e8c444b93353f68f417ad2390795d7 (diff) |
Merge pull request #259 from toasted-nutbread/database-optimizations2
Database optimizations part 2
Diffstat (limited to 'ext/bg')
-rw-r--r-- | ext/bg/js/database.js | 167 | ||||
-rw-r--r-- | ext/bg/js/translator.js | 306 | ||||
-rw-r--r-- | ext/bg/js/util.js | 2 |
3 files changed, 257 insertions, 218 deletions
diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 771a71c9..9f477b24 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -20,7 +20,6 @@ class Database { constructor() { this.db = null; - this.tagCache = {}; } async prepare() { @@ -53,33 +52,20 @@ class Database { this.db.close(); await this.db.delete(); this.db = null; - this.tagCache = {}; await this.prepare(); } - async findTerms(term, titles) { + async findTermsBulk(termList, titles) { this.validate(); - const results = []; - await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => { - if (titles.includes(row.dictionary)) { - results.push(Database.createTerm(row)); - } - }); - - return results; - } - - async findTermsBulk(terms, titles) { const promises = []; const visited = {}; const results = []; - const createResult = Database.createTerm; const processRow = (row, index) => { if (titles.includes(row.dictionary) && !visited.hasOwnProperty(row.id)) { visited[row.id] = true; - results.push(createResult(row, index)); + results.push(Database.createTerm(row, index)); } }; @@ -89,8 +75,8 @@ class Database { const dbIndex1 = dbTerms.index('expression'); const dbIndex2 = dbTerms.index('reading'); - for (let i = 0; i < terms.length; ++i) { - const only = IDBKeyRange.only(terms[i]); + for (let i = 0; i < termList.length; ++i) { + const only = IDBKeyRange.only(termList[i]); promises.push( Database.getAll(dbIndex1, only, i, processRow), Database.getAll(dbIndex2, only, i, processRow) @@ -102,66 +88,50 @@ class Database { return results; } - async findTermsExact(term, reading, titles) { + async findTermsExactBulk(termList, readingList, titles) { this.validate(); + const promises = []; const results = []; - await this.db.terms.where('expression').equals(term).each(row => { - if (row.reading === reading && titles.includes(row.dictionary)) { - results.push(Database.createTerm(row)); + const processRow = (row, index) => { + if (row.reading === readingList[index] && titles.includes(row.dictionary)) { + results.push(Database.createTerm(row, index)); } - }); + }; - return results; - } + const db = this.db.backendDB(); + const dbTransaction = db.transaction(['terms'], 'readonly'); + const dbTerms = dbTransaction.objectStore('terms'); + const dbIndex = dbTerms.index('expression'); - async findTermsBySequence(sequence, mainDictionary) { - this.validate(); + for (let i = 0; i < termList.length; ++i) { + const only = IDBKeyRange.only(termList[i]); + promises.push(Database.getAll(dbIndex, only, i, processRow)); + } - const results = []; - await this.db.terms.where('sequence').equals(sequence).each(row => { - if (row.dictionary === mainDictionary) { - results.push(Database.createTerm(row)); - } - }); + await Promise.all(promises); return results; } - async findTermMeta(term, titles) { + async findTermsBySequenceBulk(sequenceList, mainDictionary) { this.validate(); - const results = []; - await this.db.termMeta.where('expression').equals(term).each(row => { - if (titles.includes(row.dictionary)) { - results.push({ - mode: row.mode, - data: row.data, - dictionary: row.dictionary - }); - } - }); - - return results; - } - - async findTermMetaBulk(terms, titles) { const promises = []; const results = []; - const createResult = Database.createTermMeta; const processRow = (row, index) => { - if (titles.includes(row.dictionary)) { - results.push(createResult(row, index)); + if (row.dictionary === mainDictionary) { + results.push(Database.createTerm(row, index)); } }; const db = this.db.backendDB(); - const dbTransaction = db.transaction(['termMeta'], 'readonly'); - const dbTerms = dbTransaction.objectStore('termMeta'); - const dbIndex = dbTerms.index('expression'); + const dbTransaction = db.transaction(['terms'], 'readonly'); + const dbTerms = dbTransaction.objectStore('terms'); + const dbIndex = dbTerms.index('sequence'); - for (let i = 0; i < terms.length; ++i) { - const only = IDBKeyRange.only(terms[i]); + for (let i = 0; i < sequenceList.length; ++i) { + const only = IDBKeyRange.only(sequenceList[i]); promises.push(Database.getAll(dbIndex, only, i, processRow)); } @@ -170,67 +140,59 @@ class Database { return results; } - async findKanji(kanji, titles) { - this.validate(); + async findTermMetaBulk(termList, titles) { + return this.findGenericBulk('termMeta', 'expression', termList, titles, Database.createMeta); + } - const results = []; - await this.db.kanji.where('character').equals(kanji).each(row => { - if (titles.includes(row.dictionary)) { - results.push({ - character: row.character, - onyomi: dictFieldSplit(row.onyomi), - kunyomi: dictFieldSplit(row.kunyomi), - tags: dictFieldSplit(row.tags), - glossary: row.meanings, - stats: row.stats, - dictionary: row.dictionary - }); - } - }); + async findKanjiBulk(kanjiList, titles) { + return this.findGenericBulk('kanji', 'character', kanjiList, titles, Database.createKanji); + } - return results; + async findKanjiMetaBulk(kanjiList, titles) { + return this.findGenericBulk('kanjiMeta', 'character', kanjiList, titles, Database.createMeta); } - async findKanjiMeta(kanji, titles) { + async findGenericBulk(tableName, indexName, indexValueList, titles, createResult) { this.validate(); + const promises = []; const results = []; - await this.db.kanjiMeta.where('character').equals(kanji).each(row => { + const processRow = (row, index) => { if (titles.includes(row.dictionary)) { - results.push({ - mode: row.mode, - data: row.data, - dictionary: row.dictionary - }); + results.push(createResult(row, index)); } - }); + }; - return results; - } + const db = this.db.backendDB(); + const dbTransaction = db.transaction([tableName], 'readonly'); + const dbTerms = dbTransaction.objectStore(tableName); + const dbIndex = dbTerms.index(indexName); - findTagForTitleCached(name, title) { - if (this.tagCache.hasOwnProperty(title)) { - const cache = this.tagCache[title]; - if (cache.hasOwnProperty(name)) { - return cache[name]; - } + for (let i = 0; i < indexValueList.length; ++i) { + const only = IDBKeyRange.only(indexValueList[i]); + promises.push(Database.getAll(dbIndex, only, i, processRow)); } + + await Promise.all(promises); + + return results; } async findTagForTitle(name, title) { this.validate(); - const cache = (this.tagCache.hasOwnProperty(title) ? this.tagCache[title] : (this.tagCache[title] = {})); - let result = null; - await this.db.tagMeta.where('name').equals(name).each(row => { + const db = this.db.backendDB(); + const dbTransaction = db.transaction(['tagMeta'], 'readonly'); + const dbTerms = dbTransaction.objectStore('tagMeta'); + const dbIndex = dbTerms.index('name'); + const only = IDBKeyRange.only(name); + await Database.getAll(dbIndex, only, null, row => { if (title === row.dictionary) { result = row; } }); - cache[name] = result; - return result; } @@ -522,7 +484,20 @@ class Database { }; } - static createTermMeta(row, index) { + static createKanji(row, index) { + return { + index, + character: row.character, + onyomi: dictFieldSplit(row.onyomi), + kunyomi: dictFieldSplit(row.kunyomi), + tags: dictFieldSplit(row.tags), + glossary: row.meanings, + stats: row.stats, + dictionary: row.dictionary + }; + } + + static createMeta(row, index) { return { index, mode: row.mode, diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 601ee30c..ee012d96 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -21,6 +21,7 @@ class Translator { constructor() { this.database = null; this.deinflector = null; + this.tagCache = {}; } async prepare() { @@ -36,6 +37,11 @@ class Translator { } } + async purgeDatabase() { + this.tagCache = {}; + await this.database.purge(); + } + async findTermsGrouped(text, dictionaries, alphanumeric, options) { const titles = Object.keys(dictionaries); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); @@ -52,94 +58,121 @@ class Translator { return {length, definitions: definitionsGrouped}; } - async findTermsMerged(text, dictionaries, alphanumeric, options) { - const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches); - const titles = Object.keys(dictionaries); - const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); + async getSequencedDefinitions(definitions, mainDictionary) { + const definitionsBySequence = dictTermsMergeBySequence(definitions, mainDictionary); + const defaultDefinitions = definitionsBySequence['-1']; - const definitionsBySequence = dictTermsMergeBySequence(definitions, options.general.mainDictionary); + const sequenceList = Object.keys(definitionsBySequence).map(v => Number(v)).filter(v => v >= 0); + const sequencedDefinitions = sequenceList.map((key) => ({ + definitions: definitionsBySequence[key], + rawDefinitions: [] + })); - const definitionsMerged = []; - const mergedByTermIndices = new Set(); - for (const sequence in definitionsBySequence) { - if (sequence < 0) { - continue; - } + for (const definition of await this.database.findTermsBySequenceBulk(sequenceList, mainDictionary)) { + sequencedDefinitions[definition.index].rawDefinitions.push(definition); + } - const result = definitionsBySequence[sequence]; + return {sequencedDefinitions, defaultDefinitions}; + } - const rawDefinitionsBySequence = await this.database.findTermsBySequence(Number(sequence), options.general.mainDictionary); + async getMergedSecondarySearchResults(text, expressionsMap, secondarySearchTitles) { + if (secondarySearchTitles.length === 0) { + return []; + } - for (const definition of rawDefinitionsBySequence) { - const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); - definitionTags.push(dictTagBuildSource(definition.dictionary)); - definition.definitionTags = definitionTags; - const termTags = await this.expandTags(definition.termTags, definition.dictionary); - definition.termTags = termTags; + const expressionList = []; + const readingList = []; + for (const expression of expressionsMap.keys()) { + if (expression === text) { continue; } + for (const reading of expressionsMap.get(expression).keys()) { + expressionList.push(expression); + readingList.push(reading); } + } - const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence); - - const secondarySearchResults = []; - if (secondarySearchTitles.length > 0) { - for (const expression of result.expressions.keys()) { - if (expression === text) { - continue; - } - - for (const reading of result.expressions.get(expression).keys()) { - for (const definition of await this.database.findTermsExact(expression, reading, secondarySearchTitles)) { - const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); - definitionTags.push(dictTagBuildSource(definition.dictionary)); - definition.definitionTags = definitionTags; - const termTags = await this.expandTags(definition.termTags, definition.dictionary); - definition.termTags = termTags; - secondarySearchResults.push(definition); - } - } - } - } + const definitions = await this.database.findTermsExactBulk(expressionList, readingList, secondarySearchTitles); + for (const definition of definitions) { + const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); + definitionTags.push(dictTagBuildSource(definition.dictionary)); + definition.definitionTags = definitionTags; + const termTags = await this.expandTags(definition.termTags, definition.dictionary); + definition.termTags = termTags; + } - dictTermsMergeByGloss(result, definitionsBySequence['-1'].concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices); + if (definitions.length > 1) { + definitions.sort((a, b) => a.index - b.index); + } - for (const gloss in definitionsByGloss) { - const definition = definitionsByGloss[gloss]; - dictTagsSort(definition.definitionTags); - result.definitions.push(definition); - } + return definitions; + } - dictTermsSort(result.definitions, dictionaries); - - const expressions = []; - for (const expression of result.expressions.keys()) { - for (const reading of result.expressions.get(expression).keys()) { - const termTags = result.expressions.get(expression).get(reading); - expressions.push({ - expression: expression, - reading: reading, - termTags: dictTagsSort(termTags), - termFrequency: (score => { - if (score > 0) { - return 'popular'; - } else if (score < 0) { - return 'rare'; - } else { - return 'normal'; - } - })(termTags.map(tag => tag.score).reduce((p, v) => p + v, 0)) - }); - } + async getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchTitles, mergedByTermIndices) { + const result = sequencedDefinition.definitions; + const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions; + + for (const definition of rawDefinitionsBySequence) { + const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); + definitionTags.push(dictTagBuildSource(definition.dictionary)); + definition.definitionTags = definitionTags; + const termTags = await this.expandTags(definition.termTags, definition.dictionary); + definition.termTags = termTags; + } + + const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence); + const secondarySearchResults = await this.getMergedSecondarySearchResults(text, result.expressions, secondarySearchTitles); + + dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices); + + for (const gloss in definitionsByGloss) { + const definition = definitionsByGloss[gloss]; + dictTagsSort(definition.definitionTags); + result.definitions.push(definition); + } + + dictTermsSort(result.definitions, dictionaries); + + const expressions = []; + for (const expression of result.expressions.keys()) { + for (const reading of result.expressions.get(expression).keys()) { + const termTags = result.expressions.get(expression).get(reading); + const score = termTags.map(tag => tag.score).reduce((p, v) => p + v, 0); + expressions.push({ + expression: expression, + reading: reading, + termTags: dictTagsSort(termTags), + termFrequency: Translator.scoreToTermFrequency(score) + }); } + } - result.expressions = expressions; + result.expressions = expressions; + result.expression = Array.from(result.expression); + result.reading = Array.from(result.reading); - result.expression = Array.from(result.expression); - result.reading = Array.from(result.reading); + return result; + } + + async findTermsMerged(text, dictionaries, alphanumeric, options) { + const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches); + const titles = Object.keys(dictionaries); + const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); + const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary); + const definitionsMerged = []; + const mergedByTermIndices = new Set(); + for (const sequencedDefinition of sequencedDefinitions) { + const result = await this.getMergedDefinition( + text, + dictionaries, + sequencedDefinition, + defaultDefinitions, + secondarySearchTitles, + mergedByTermIndices + ); definitionsMerged.push(result); } - const strayDefinitions = definitionsBySequence['-1'].filter((definition, index) => !mergedByTermIndices.has(index)); + const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index)); for (const groupedDefinition of dictTermsGroup(strayDefinitions, dictionaries)) { groupedDefinition.expressions = [{expression: groupedDefinition.expression, reading: groupedDefinition.reading}]; definitionsMerged.push(groupedDefinition); @@ -277,33 +310,44 @@ class Translator { } async findKanji(text, dictionaries) { - let definitions = []; - const processed = {}; const titles = Object.keys(dictionaries); + const kanjiUnique = {}; + const kanjiList = []; for (const c of text) { - if (!processed[c]) { - definitions.push(...await this.database.findKanji(c, titles)); - processed[c] = true; + if (!kanjiUnique.hasOwnProperty(c)) { + kanjiList.push(c); + kanjiUnique[c] = true; } } + const definitions = await this.database.findKanjiBulk(kanjiList, titles); + if (definitions.length === 0) { + return definitions; + } + + if (definitions.length > 1) { + definitions.sort((a, b) => a.index - b.index); + } + + const kanjiList2 = []; for (const definition of definitions) { + kanjiList2.push(definition.character); + const tags = await this.expandTags(definition.tags, definition.dictionary); tags.push(dictTagBuildSource(definition.dictionary)); definition.tags = dictTagsSort(tags); definition.stats = await this.expandStats(definition.stats, definition.dictionary); - definition.frequencies = []; - for (const meta of await this.database.findKanjiMeta(definition.character, titles)) { - if (meta.mode === 'freq') { - definition.frequencies.push({ - character: meta.character, - frequency: meta.data, - dictionary: meta.dictionary - }); - } - } + } + + for (const meta of await this.database.findKanjiMetaBulk(kanjiList2, titles)) { + if (meta.mode !== 'freq') { continue; } + definitions[meta.index].frequencies.push({ + character: meta.character, + frequency: meta.data, + dictionary: meta.dictionary + }); } return definitions; @@ -359,56 +403,76 @@ class Translator { } async expandTags(names, title) { - const tags = []; - for (const name of names) { - const base = Translator.getNameBase(name); - let meta = this.database.findTagForTitleCached(base, title); - if (typeof meta === 'undefined') { - meta = await this.database.findTagForTitle(base, title); - } - - const tag = Object.assign({}, meta !== null ? meta : {}, {name}); - - tags.push(dictTagSanitize(tag)); - } - - return tags; + const tagMetaList = await this.getTagMetaList(names, title); + return tagMetaList.map((meta, index) => { + const name = names[index]; + const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name})); + return dictTagSanitize(tag); + }); } async expandStats(items, title) { - const stats = {}; - for (const name in items) { - const base = Translator.getNameBase(name); - let meta = this.database.findTagForTitleCached(base, title); - if (typeof meta === 'undefined') { - meta = await this.database.findTagForTitle(base, title); - if (meta === null) { - continue; - } - } + const names = Object.keys(items); + const tagMetaList = await this.getTagMetaList(names, title); - const group = stats[meta.category] = stats[meta.category] || []; + const stats = {}; + for (let i = 0; i < names.length; ++i) { + const name = names[i]; + const meta = tagMetaList[i]; + if (meta === null) { continue; } + + const category = meta.category; + const group = ( + stats.hasOwnProperty(category) ? + stats[category] : + (stats[category] = []) + ); const stat = Object.assign({}, meta, {name, value: items[name]}); - group.push(dictTagSanitize(stat)); } + const sortCompare = (a, b) => a.notes - b.notes; for (const category in stats) { - stats[category].sort((a, b) => { - if (a.notes < b.notes) { - return -1; - } else if (a.notes > b.notes) { - return 1; - } else { - return 0; - } - }); + stats[category].sort(sortCompare); } return stats; } + async getTagMetaList(names, title) { + const tagMetaList = []; + const cache = ( + this.tagCache.hasOwnProperty(title) ? + this.tagCache[title] : + (this.tagCache[title] = {}) + ); + + for (const name of names) { + const base = Translator.getNameBase(name); + + if (cache.hasOwnProperty(base)) { + tagMetaList.push(cache[base]); + } else { + const tagMeta = await this.database.findTagForTitle(base, title); + cache[base] = tagMeta; + tagMetaList.push(tagMeta); + } + } + + return tagMetaList; + } + + static scoreToTermFrequency(score) { + if (score > 0) { + return 'popular'; + } else if (score < 0) { + return 'rare'; + } else { + return 'normal'; + } + } + static getNameBase(name) { const pos = name.indexOf(':'); return (pos >= 0 ? name.substr(0, pos) : name); diff --git a/ext/bg/js/util.js b/ext/bg/js/util.js index 73a8396f..1ca0833b 100644 --- a/ext/bg/js/util.js +++ b/ext/bg/js/util.js @@ -89,7 +89,7 @@ function utilAnkiGetModelFieldNames(modelName) { } function utilDatabasePurge() { - return utilBackend().translator.database.purge(); + return utilBackend().translator.purgeDatabase(); } async function utilDatabaseImport(data, progress, exceptions) { |