summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorAlex Yatskov <FooSoft@users.noreply.github.com>2019-10-20 11:04:17 -0700
committerGitHub <noreply@github.com>2019-10-20 11:04:17 -0700
commit65923238556212fef2d7ed7a156373c88382ffd2 (patch)
tree54272c663f1992e4a1117b5e5a62dc3f9f911dcc /ext
parent03c72e94290fdb1e77f1ca247efb526a0dc9f44e (diff)
parenteea95eb130e8c444b93353f68f417ad2390795d7 (diff)
Merge pull request #259 from toasted-nutbread/database-optimizations2
Database optimizations part 2
Diffstat (limited to 'ext')
-rw-r--r--ext/bg/js/database.js167
-rw-r--r--ext/bg/js/translator.js306
-rw-r--r--ext/bg/js/util.js2
3 files changed, 257 insertions, 218 deletions
diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js
index 771a71c9..9f477b24 100644
--- a/ext/bg/js/database.js
+++ b/ext/bg/js/database.js
@@ -20,7 +20,6 @@
class Database {
constructor() {
this.db = null;
- this.tagCache = {};
}
async prepare() {
@@ -53,33 +52,20 @@ class Database {
this.db.close();
await this.db.delete();
this.db = null;
- this.tagCache = {};
await this.prepare();
}
- async findTerms(term, titles) {
+ async findTermsBulk(termList, titles) {
this.validate();
- const results = [];
- await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => {
- if (titles.includes(row.dictionary)) {
- results.push(Database.createTerm(row));
- }
- });
-
- return results;
- }
-
- async findTermsBulk(terms, titles) {
const promises = [];
const visited = {};
const results = [];
- const createResult = Database.createTerm;
const processRow = (row, index) => {
if (titles.includes(row.dictionary) && !visited.hasOwnProperty(row.id)) {
visited[row.id] = true;
- results.push(createResult(row, index));
+ results.push(Database.createTerm(row, index));
}
};
@@ -89,8 +75,8 @@ class Database {
const dbIndex1 = dbTerms.index('expression');
const dbIndex2 = dbTerms.index('reading');
- for (let i = 0; i < terms.length; ++i) {
- const only = IDBKeyRange.only(terms[i]);
+ for (let i = 0; i < termList.length; ++i) {
+ const only = IDBKeyRange.only(termList[i]);
promises.push(
Database.getAll(dbIndex1, only, i, processRow),
Database.getAll(dbIndex2, only, i, processRow)
@@ -102,66 +88,50 @@ class Database {
return results;
}
- async findTermsExact(term, reading, titles) {
+ async findTermsExactBulk(termList, readingList, titles) {
this.validate();
+ const promises = [];
const results = [];
- await this.db.terms.where('expression').equals(term).each(row => {
- if (row.reading === reading && titles.includes(row.dictionary)) {
- results.push(Database.createTerm(row));
+ const processRow = (row, index) => {
+ if (row.reading === readingList[index] && titles.includes(row.dictionary)) {
+ results.push(Database.createTerm(row, index));
}
- });
+ };
- return results;
- }
+ const db = this.db.backendDB();
+ const dbTransaction = db.transaction(['terms'], 'readonly');
+ const dbTerms = dbTransaction.objectStore('terms');
+ const dbIndex = dbTerms.index('expression');
- async findTermsBySequence(sequence, mainDictionary) {
- this.validate();
+ for (let i = 0; i < termList.length; ++i) {
+ const only = IDBKeyRange.only(termList[i]);
+ promises.push(Database.getAll(dbIndex, only, i, processRow));
+ }
- const results = [];
- await this.db.terms.where('sequence').equals(sequence).each(row => {
- if (row.dictionary === mainDictionary) {
- results.push(Database.createTerm(row));
- }
- });
+ await Promise.all(promises);
return results;
}
- async findTermMeta(term, titles) {
+ async findTermsBySequenceBulk(sequenceList, mainDictionary) {
this.validate();
- const results = [];
- await this.db.termMeta.where('expression').equals(term).each(row => {
- if (titles.includes(row.dictionary)) {
- results.push({
- mode: row.mode,
- data: row.data,
- dictionary: row.dictionary
- });
- }
- });
-
- return results;
- }
-
- async findTermMetaBulk(terms, titles) {
const promises = [];
const results = [];
- const createResult = Database.createTermMeta;
const processRow = (row, index) => {
- if (titles.includes(row.dictionary)) {
- results.push(createResult(row, index));
+ if (row.dictionary === mainDictionary) {
+ results.push(Database.createTerm(row, index));
}
};
const db = this.db.backendDB();
- const dbTransaction = db.transaction(['termMeta'], 'readonly');
- const dbTerms = dbTransaction.objectStore('termMeta');
- const dbIndex = dbTerms.index('expression');
+ const dbTransaction = db.transaction(['terms'], 'readonly');
+ const dbTerms = dbTransaction.objectStore('terms');
+ const dbIndex = dbTerms.index('sequence');
- for (let i = 0; i < terms.length; ++i) {
- const only = IDBKeyRange.only(terms[i]);
+ for (let i = 0; i < sequenceList.length; ++i) {
+ const only = IDBKeyRange.only(sequenceList[i]);
promises.push(Database.getAll(dbIndex, only, i, processRow));
}
@@ -170,67 +140,59 @@ class Database {
return results;
}
- async findKanji(kanji, titles) {
- this.validate();
+ async findTermMetaBulk(termList, titles) {
+ return this.findGenericBulk('termMeta', 'expression', termList, titles, Database.createMeta);
+ }
- const results = [];
- await this.db.kanji.where('character').equals(kanji).each(row => {
- if (titles.includes(row.dictionary)) {
- results.push({
- character: row.character,
- onyomi: dictFieldSplit(row.onyomi),
- kunyomi: dictFieldSplit(row.kunyomi),
- tags: dictFieldSplit(row.tags),
- glossary: row.meanings,
- stats: row.stats,
- dictionary: row.dictionary
- });
- }
- });
+ async findKanjiBulk(kanjiList, titles) {
+ return this.findGenericBulk('kanji', 'character', kanjiList, titles, Database.createKanji);
+ }
- return results;
+ async findKanjiMetaBulk(kanjiList, titles) {
+ return this.findGenericBulk('kanjiMeta', 'character', kanjiList, titles, Database.createMeta);
}
- async findKanjiMeta(kanji, titles) {
+ async findGenericBulk(tableName, indexName, indexValueList, titles, createResult) {
this.validate();
+ const promises = [];
const results = [];
- await this.db.kanjiMeta.where('character').equals(kanji).each(row => {
+ const processRow = (row, index) => {
if (titles.includes(row.dictionary)) {
- results.push({
- mode: row.mode,
- data: row.data,
- dictionary: row.dictionary
- });
+ results.push(createResult(row, index));
}
- });
+ };
- return results;
- }
+ const db = this.db.backendDB();
+ const dbTransaction = db.transaction([tableName], 'readonly');
+ const dbTerms = dbTransaction.objectStore(tableName);
+ const dbIndex = dbTerms.index(indexName);
- findTagForTitleCached(name, title) {
- if (this.tagCache.hasOwnProperty(title)) {
- const cache = this.tagCache[title];
- if (cache.hasOwnProperty(name)) {
- return cache[name];
- }
+ for (let i = 0; i < indexValueList.length; ++i) {
+ const only = IDBKeyRange.only(indexValueList[i]);
+ promises.push(Database.getAll(dbIndex, only, i, processRow));
}
+
+ await Promise.all(promises);
+
+ return results;
}
async findTagForTitle(name, title) {
this.validate();
- const cache = (this.tagCache.hasOwnProperty(title) ? this.tagCache[title] : (this.tagCache[title] = {}));
-
let result = null;
- await this.db.tagMeta.where('name').equals(name).each(row => {
+ const db = this.db.backendDB();
+ const dbTransaction = db.transaction(['tagMeta'], 'readonly');
+ const dbTerms = dbTransaction.objectStore('tagMeta');
+ const dbIndex = dbTerms.index('name');
+ const only = IDBKeyRange.only(name);
+ await Database.getAll(dbIndex, only, null, row => {
if (title === row.dictionary) {
result = row;
}
});
- cache[name] = result;
-
return result;
}
@@ -522,7 +484,20 @@ class Database {
};
}
- static createTermMeta(row, index) {
+ static createKanji(row, index) {
+ return {
+ index,
+ character: row.character,
+ onyomi: dictFieldSplit(row.onyomi),
+ kunyomi: dictFieldSplit(row.kunyomi),
+ tags: dictFieldSplit(row.tags),
+ glossary: row.meanings,
+ stats: row.stats,
+ dictionary: row.dictionary
+ };
+ }
+
+ static createMeta(row, index) {
return {
index,
mode: row.mode,
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 601ee30c..ee012d96 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -21,6 +21,7 @@ class Translator {
constructor() {
this.database = null;
this.deinflector = null;
+ this.tagCache = {};
}
async prepare() {
@@ -36,6 +37,11 @@ class Translator {
}
}
+ async purgeDatabase() {
+ this.tagCache = {};
+ await this.database.purge();
+ }
+
async findTermsGrouped(text, dictionaries, alphanumeric, options) {
const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
@@ -52,94 +58,121 @@ class Translator {
return {length, definitions: definitionsGrouped};
}
- async findTermsMerged(text, dictionaries, alphanumeric, options) {
- const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches);
- const titles = Object.keys(dictionaries);
- const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
+ async getSequencedDefinitions(definitions, mainDictionary) {
+ const definitionsBySequence = dictTermsMergeBySequence(definitions, mainDictionary);
+ const defaultDefinitions = definitionsBySequence['-1'];
- const definitionsBySequence = dictTermsMergeBySequence(definitions, options.general.mainDictionary);
+ const sequenceList = Object.keys(definitionsBySequence).map(v => Number(v)).filter(v => v >= 0);
+ const sequencedDefinitions = sequenceList.map((key) => ({
+ definitions: definitionsBySequence[key],
+ rawDefinitions: []
+ }));
- const definitionsMerged = [];
- const mergedByTermIndices = new Set();
- for (const sequence in definitionsBySequence) {
- if (sequence < 0) {
- continue;
- }
+ for (const definition of await this.database.findTermsBySequenceBulk(sequenceList, mainDictionary)) {
+ sequencedDefinitions[definition.index].rawDefinitions.push(definition);
+ }
- const result = definitionsBySequence[sequence];
+ return {sequencedDefinitions, defaultDefinitions};
+ }
- const rawDefinitionsBySequence = await this.database.findTermsBySequence(Number(sequence), options.general.mainDictionary);
+ async getMergedSecondarySearchResults(text, expressionsMap, secondarySearchTitles) {
+ if (secondarySearchTitles.length === 0) {
+ return [];
+ }
- for (const definition of rawDefinitionsBySequence) {
- const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
- definitionTags.push(dictTagBuildSource(definition.dictionary));
- definition.definitionTags = definitionTags;
- const termTags = await this.expandTags(definition.termTags, definition.dictionary);
- definition.termTags = termTags;
+ const expressionList = [];
+ const readingList = [];
+ for (const expression of expressionsMap.keys()) {
+ if (expression === text) { continue; }
+ for (const reading of expressionsMap.get(expression).keys()) {
+ expressionList.push(expression);
+ readingList.push(reading);
}
+ }
- const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
-
- const secondarySearchResults = [];
- if (secondarySearchTitles.length > 0) {
- for (const expression of result.expressions.keys()) {
- if (expression === text) {
- continue;
- }
-
- for (const reading of result.expressions.get(expression).keys()) {
- for (const definition of await this.database.findTermsExact(expression, reading, secondarySearchTitles)) {
- const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
- definitionTags.push(dictTagBuildSource(definition.dictionary));
- definition.definitionTags = definitionTags;
- const termTags = await this.expandTags(definition.termTags, definition.dictionary);
- definition.termTags = termTags;
- secondarySearchResults.push(definition);
- }
- }
- }
- }
+ const definitions = await this.database.findTermsExactBulk(expressionList, readingList, secondarySearchTitles);
+ for (const definition of definitions) {
+ const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
+ definitionTags.push(dictTagBuildSource(definition.dictionary));
+ definition.definitionTags = definitionTags;
+ const termTags = await this.expandTags(definition.termTags, definition.dictionary);
+ definition.termTags = termTags;
+ }
- dictTermsMergeByGloss(result, definitionsBySequence['-1'].concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
+ if (definitions.length > 1) {
+ definitions.sort((a, b) => a.index - b.index);
+ }
- for (const gloss in definitionsByGloss) {
- const definition = definitionsByGloss[gloss];
- dictTagsSort(definition.definitionTags);
- result.definitions.push(definition);
- }
+ return definitions;
+ }
- dictTermsSort(result.definitions, dictionaries);
-
- const expressions = [];
- for (const expression of result.expressions.keys()) {
- for (const reading of result.expressions.get(expression).keys()) {
- const termTags = result.expressions.get(expression).get(reading);
- expressions.push({
- expression: expression,
- reading: reading,
- termTags: dictTagsSort(termTags),
- termFrequency: (score => {
- if (score > 0) {
- return 'popular';
- } else if (score < 0) {
- return 'rare';
- } else {
- return 'normal';
- }
- })(termTags.map(tag => tag.score).reduce((p, v) => p + v, 0))
- });
- }
+ async getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchTitles, mergedByTermIndices) {
+ const result = sequencedDefinition.definitions;
+ const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions;
+
+ for (const definition of rawDefinitionsBySequence) {
+ const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
+ definitionTags.push(dictTagBuildSource(definition.dictionary));
+ definition.definitionTags = definitionTags;
+ const termTags = await this.expandTags(definition.termTags, definition.dictionary);
+ definition.termTags = termTags;
+ }
+
+ const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
+ const secondarySearchResults = await this.getMergedSecondarySearchResults(text, result.expressions, secondarySearchTitles);
+
+ dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
+
+ for (const gloss in definitionsByGloss) {
+ const definition = definitionsByGloss[gloss];
+ dictTagsSort(definition.definitionTags);
+ result.definitions.push(definition);
+ }
+
+ dictTermsSort(result.definitions, dictionaries);
+
+ const expressions = [];
+ for (const expression of result.expressions.keys()) {
+ for (const reading of result.expressions.get(expression).keys()) {
+ const termTags = result.expressions.get(expression).get(reading);
+ const score = termTags.map(tag => tag.score).reduce((p, v) => p + v, 0);
+ expressions.push({
+ expression: expression,
+ reading: reading,
+ termTags: dictTagsSort(termTags),
+ termFrequency: Translator.scoreToTermFrequency(score)
+ });
}
+ }
- result.expressions = expressions;
+ result.expressions = expressions;
+ result.expression = Array.from(result.expression);
+ result.reading = Array.from(result.reading);
- result.expression = Array.from(result.expression);
- result.reading = Array.from(result.reading);
+ return result;
+ }
+
+ async findTermsMerged(text, dictionaries, alphanumeric, options) {
+ const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches);
+ const titles = Object.keys(dictionaries);
+ const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
+ const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary);
+ const definitionsMerged = [];
+ const mergedByTermIndices = new Set();
+ for (const sequencedDefinition of sequencedDefinitions) {
+ const result = await this.getMergedDefinition(
+ text,
+ dictionaries,
+ sequencedDefinition,
+ defaultDefinitions,
+ secondarySearchTitles,
+ mergedByTermIndices
+ );
definitionsMerged.push(result);
}
- const strayDefinitions = definitionsBySequence['-1'].filter((definition, index) => !mergedByTermIndices.has(index));
+ const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
for (const groupedDefinition of dictTermsGroup(strayDefinitions, dictionaries)) {
groupedDefinition.expressions = [{expression: groupedDefinition.expression, reading: groupedDefinition.reading}];
definitionsMerged.push(groupedDefinition);
@@ -277,33 +310,44 @@ class Translator {
}
async findKanji(text, dictionaries) {
- let definitions = [];
- const processed = {};
const titles = Object.keys(dictionaries);
+ const kanjiUnique = {};
+ const kanjiList = [];
for (const c of text) {
- if (!processed[c]) {
- definitions.push(...await this.database.findKanji(c, titles));
- processed[c] = true;
+ if (!kanjiUnique.hasOwnProperty(c)) {
+ kanjiList.push(c);
+ kanjiUnique[c] = true;
}
}
+ const definitions = await this.database.findKanjiBulk(kanjiList, titles);
+ if (definitions.length === 0) {
+ return definitions;
+ }
+
+ if (definitions.length > 1) {
+ definitions.sort((a, b) => a.index - b.index);
+ }
+
+ const kanjiList2 = [];
for (const definition of definitions) {
+ kanjiList2.push(definition.character);
+
const tags = await this.expandTags(definition.tags, definition.dictionary);
tags.push(dictTagBuildSource(definition.dictionary));
definition.tags = dictTagsSort(tags);
definition.stats = await this.expandStats(definition.stats, definition.dictionary);
-
definition.frequencies = [];
- for (const meta of await this.database.findKanjiMeta(definition.character, titles)) {
- if (meta.mode === 'freq') {
- definition.frequencies.push({
- character: meta.character,
- frequency: meta.data,
- dictionary: meta.dictionary
- });
- }
- }
+ }
+
+ for (const meta of await this.database.findKanjiMetaBulk(kanjiList2, titles)) {
+ if (meta.mode !== 'freq') { continue; }
+ definitions[meta.index].frequencies.push({
+ character: meta.character,
+ frequency: meta.data,
+ dictionary: meta.dictionary
+ });
}
return definitions;
@@ -359,56 +403,76 @@ class Translator {
}
async expandTags(names, title) {
- const tags = [];
- for (const name of names) {
- const base = Translator.getNameBase(name);
- let meta = this.database.findTagForTitleCached(base, title);
- if (typeof meta === 'undefined') {
- meta = await this.database.findTagForTitle(base, title);
- }
-
- const tag = Object.assign({}, meta !== null ? meta : {}, {name});
-
- tags.push(dictTagSanitize(tag));
- }
-
- return tags;
+ const tagMetaList = await this.getTagMetaList(names, title);
+ return tagMetaList.map((meta, index) => {
+ const name = names[index];
+ const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name}));
+ return dictTagSanitize(tag);
+ });
}
async expandStats(items, title) {
- const stats = {};
- for (const name in items) {
- const base = Translator.getNameBase(name);
- let meta = this.database.findTagForTitleCached(base, title);
- if (typeof meta === 'undefined') {
- meta = await this.database.findTagForTitle(base, title);
- if (meta === null) {
- continue;
- }
- }
+ const names = Object.keys(items);
+ const tagMetaList = await this.getTagMetaList(names, title);
- const group = stats[meta.category] = stats[meta.category] || [];
+ const stats = {};
+ for (let i = 0; i < names.length; ++i) {
+ const name = names[i];
+ const meta = tagMetaList[i];
+ if (meta === null) { continue; }
+
+ const category = meta.category;
+ const group = (
+ stats.hasOwnProperty(category) ?
+ stats[category] :
+ (stats[category] = [])
+ );
const stat = Object.assign({}, meta, {name, value: items[name]});
-
group.push(dictTagSanitize(stat));
}
+ const sortCompare = (a, b) => a.notes - b.notes;
for (const category in stats) {
- stats[category].sort((a, b) => {
- if (a.notes < b.notes) {
- return -1;
- } else if (a.notes > b.notes) {
- return 1;
- } else {
- return 0;
- }
- });
+ stats[category].sort(sortCompare);
}
return stats;
}
+ async getTagMetaList(names, title) {
+ const tagMetaList = [];
+ const cache = (
+ this.tagCache.hasOwnProperty(title) ?
+ this.tagCache[title] :
+ (this.tagCache[title] = {})
+ );
+
+ for (const name of names) {
+ const base = Translator.getNameBase(name);
+
+ if (cache.hasOwnProperty(base)) {
+ tagMetaList.push(cache[base]);
+ } else {
+ const tagMeta = await this.database.findTagForTitle(base, title);
+ cache[base] = tagMeta;
+ tagMetaList.push(tagMeta);
+ }
+ }
+
+ return tagMetaList;
+ }
+
+ static scoreToTermFrequency(score) {
+ if (score > 0) {
+ return 'popular';
+ } else if (score < 0) {
+ return 'rare';
+ } else {
+ return 'normal';
+ }
+ }
+
static getNameBase(name) {
const pos = name.indexOf(':');
return (pos >= 0 ? name.substr(0, pos) : name);
diff --git a/ext/bg/js/util.js b/ext/bg/js/util.js
index 73a8396f..1ca0833b 100644
--- a/ext/bg/js/util.js
+++ b/ext/bg/js/util.js
@@ -89,7 +89,7 @@ function utilAnkiGetModelFieldNames(modelName) {
}
function utilDatabasePurge() {
- return utilBackend().translator.database.purge();
+ return utilBackend().translator.purgeDatabase();
}
async function utilDatabaseImport(data, progress, exceptions) {