summaryrefslogtreecommitdiff
path: root/ext/bg
diff options
context:
space:
mode:
authorAlex Yatskov <FooSoft@users.noreply.github.com>2019-10-05 09:18:09 -0700
committerGitHub <noreply@github.com>2019-10-05 09:18:09 -0700
commita369f8d0a41338a0d4aa85d904f0f20c71eb7817 (patch)
tree5d0c5f2503472070699e408f338c1c9e41080552 /ext/bg
parentd3d162ea785457d9533d57f861afa82d2c2b0e01 (diff)
parent44119eea2c8ad4c8eed38070ef1a3ce27fa9359e (diff)
Merge pull request #229 from toasted-nutbread/database-optimizations
Database optimizations
Diffstat (limited to 'ext/bg')
-rw-r--r--ext/bg/js/database.js180
-rw-r--r--ext/bg/js/deinflector.js122
-rw-r--r--ext/bg/js/translator.js179
3 files changed, 297 insertions, 184 deletions
diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js
index 093ec102..e8214c3c 100644
--- a/ext/bg/js/database.js
+++ b/ext/bg/js/database.js
@@ -68,24 +68,39 @@ class Database {
const results = [];
await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => {
if (titles.includes(row.dictionary)) {
- results.push({
- expression: row.expression,
- reading: row.reading,
- definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
- termTags: dictFieldSplit(row.termTags || ''),
- rules: dictFieldSplit(row.rules),
- glossary: row.glossary,
- score: row.score,
- dictionary: row.dictionary,
- id: row.id,
- sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
- });
+ results.push(Database.createTerm(row));
}
});
return results;
}
+ async findTermsBulk(terms, titles) {
+ const promises = [];
+ const visited = {};
+ const results = [];
+ const createResult = Database.createTerm;
+ const filter = (row) => titles.includes(row.dictionary);
+
+ const db = this.db.backendDB();
+ const dbTransaction = db.transaction(['terms'], 'readonly');
+ const dbTerms = dbTransaction.objectStore('terms');
+ const dbIndex1 = dbTerms.index('expression');
+ const dbIndex2 = dbTerms.index('reading');
+
+ for (let i = 0; i < terms.length; ++i) {
+ const only = IDBKeyRange.only(terms[i]);
+ promises.push(
+ Database.getAll(dbIndex1, only, i, visited, filter, createResult, results),
+ Database.getAll(dbIndex2, only, i, visited, filter, createResult, results)
+ );
+ }
+
+ await Promise.all(promises);
+
+ return results;
+ }
+
async findTermsExact(term, reading, titles) {
if (!this.db) {
throw 'Database not initialized';
@@ -94,18 +109,7 @@ class Database {
const results = [];
await this.db.terms.where('expression').equals(term).each(row => {
if (row.reading === reading && titles.includes(row.dictionary)) {
- results.push({
- expression: row.expression,
- reading: row.reading,
- definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
- termTags: dictFieldSplit(row.termTags || ''),
- rules: dictFieldSplit(row.rules),
- glossary: row.glossary,
- score: row.score,
- dictionary: row.dictionary,
- id: row.id,
- sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
- });
+ results.push(Database.createTerm(row));
}
});
@@ -120,18 +124,7 @@ class Database {
const results = [];
await this.db.terms.where('sequence').equals(sequence).each(row => {
if (row.dictionary === mainDictionary) {
- results.push({
- expression: row.expression,
- reading: row.reading,
- definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
- termTags: dictFieldSplit(row.termTags || ''),
- rules: dictFieldSplit(row.rules),
- glossary: row.glossary,
- score: row.score,
- dictionary: row.dictionary,
- id: row.id,
- sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
- });
+ results.push(Database.createTerm(row));
}
});
@@ -157,6 +150,28 @@ class Database {
return results;
}
+ async findTermMetaBulk(terms, titles) {
+ const promises = [];
+ const visited = {};
+ const results = [];
+ const createResult = Database.createTermMeta;
+ const filter = (row) => titles.includes(row.dictionary);
+
+ const db = this.db.backendDB();
+ const dbTransaction = db.transaction(['termMeta'], 'readonly');
+ const dbTerms = dbTransaction.objectStore('termMeta');
+ const dbIndex = dbTerms.index('expression');
+
+ for (let i = 0; i < terms.length; ++i) {
+ const only = IDBKeyRange.only(terms[i]);
+ promises.push(Database.getAll(dbIndex, only, i, visited, filter, createResult, results));
+ }
+
+ await Promise.all(promises);
+
+ return results;
+ }
+
async findKanji(kanji, titles) {
if (!this.db) {
throw 'Database not initialized';
@@ -199,23 +214,30 @@ class Database {
return results;
}
+ findTagForTitleCached(name, title) {
+ if (this.tagCache.hasOwnProperty(title)) {
+ const cache = this.tagCache[title];
+ if (cache.hasOwnProperty(name)) {
+ return cache[name];
+ }
+ }
+ }
+
async findTagForTitle(name, title) {
if (!this.db) {
throw 'Database not initialized';
}
- this.tagCache[title] = this.tagCache[title] || {};
+ const cache = (this.tagCache.hasOwnProperty(title) ? this.tagCache[title] : (this.tagCache[title] = {}));
- let result = this.tagCache[title][name];
- if (!result) {
- await this.db.tagMeta.where('name').equals(name).each(row => {
- if (title === row.dictionary) {
- result = row;
- }
- });
+ let result = null;
+ await this.db.tagMeta.where('name').equals(name).each(row => {
+ if (title === row.dictionary) {
+ result = row;
+ }
+ });
- this.tagCache[title][name] = result;
- }
+ cache[name] = result;
return result;
}
@@ -489,4 +511,70 @@ class Database {
return summary;
}
+
+ static createTerm(row, index) {
+ return {
+ index,
+ expression: row.expression,
+ reading: row.reading,
+ definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
+ termTags: dictFieldSplit(row.termTags || ''),
+ rules: dictFieldSplit(row.rules),
+ glossary: row.glossary,
+ score: row.score,
+ dictionary: row.dictionary,
+ id: row.id,
+ sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
+ };
+ }
+
+ static createTermMeta(row, index) {
+ return {
+ index,
+ mode: row.mode,
+ data: row.data,
+ dictionary: row.dictionary
+ };
+ }
+
+ static getAll(dbIndex, query, index, visited, filter, createResult, results) {
+ const fn = typeof dbIndex.getAll === 'function' ? Database.getAllFast : Database.getAllUsingCursor;
+ return fn(dbIndex, query, index, visited, filter, createResult, results);
+ }
+
+ static getAllFast(dbIndex, query, index, visited, filter, createResult, results) {
+ return new Promise((resolve, reject) => {
+ const request = dbIndex.getAll(query);
+ request.onerror = (e) => reject(e);
+ request.onsuccess = (e) => {
+ for (const row of e.target.result) {
+ if (filter(row, index) && !visited.hasOwnProperty(row.id)) {
+ visited[row.id] = true;
+ results.push(createResult(row, index));
+ }
+ }
+ resolve();
+ };
+ });
+ }
+
+ static getAllUsingCursor(dbIndex, query, index, visited, filter, createResult, results) {
+ return new Promise((resolve, reject) => {
+ const request = dbIndex.openCursor(query, 'next');
+ request.onerror = (e) => reject(e);
+ request.onsuccess = (e) => {
+ const cursor = e.target.result;
+ if (cursor) {
+ const row = cursor.value;
+ if (filter(row, index) && !visited.hasOwnProperty(row.id)) {
+ visited[row.id] = true;
+ results.push(createResult(row, index));
+ }
+ cursor.continue();
+ } else {
+ resolve();
+ }
+ };
+ });
+ }
}
diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js
index 0abde99d..ad77895c 100644
--- a/ext/bg/js/deinflector.js
+++ b/ext/bg/js/deinflector.js
@@ -17,103 +17,53 @@
*/
-class Deinflection {
- constructor(term, {rules=[], definitions=[], reason=''} = {}) {
- this.term = term;
- this.rules = rules;
- this.definitions = definitions;
- this.reason = reason;
- this.children = [];
+class Deinflector {
+ constructor(reasons) {
+ this.reasons = reasons;
}
- async deinflect(definer, reasons) {
- for (const reason in reasons) {
- for (const variant of reasons[reason]) {
- let accept = this.rules.length === 0;
- if (!accept) {
- for (const rule of this.rules) {
- if (variant.rulesIn.includes(rule)) {
- accept = true;
- break;
+ deinflect(source) {
+ const results = [{
+ source,
+ term: source,
+ rules: [],
+ definitions: [],
+ reasons: []
+ }];
+ for (let i = 0; i < results.length; ++i) {
+ const entry = results[i];
+
+ for (const reason in this.reasons) {
+ for (const variant of this.reasons[reason]) {
+ let accept = entry.rules.length === 0;
+ if (!accept) {
+ for (const rule of entry.rules) {
+ if (variant.rulesIn.includes(rule)) {
+ accept = true;
+ break;
+ }
}
}
- }
- if (!accept || !this.term.endsWith(variant.kanaIn)) {
- continue;
- }
-
- const term = this.term.slice(0, -variant.kanaIn.length) + variant.kanaOut;
- if (term.length === 0) {
- continue;
- }
-
- const child = new Deinflection(term, {reason, rules: variant.rulesOut});
- if (await child.deinflect(definer, reasons)) {
- this.children.push(child);
- }
- }
- }
-
- const definitions = await definer(this.term);
- if (this.rules.length === 0) {
- this.definitions = definitions;
- } else {
- for (const rule of this.rules) {
- for (const definition of definitions) {
- if (definition.rules.includes(rule)) {
- this.definitions.push(definition);
+ if (!accept || !entry.term.endsWith(variant.kanaIn)) {
+ continue;
}
- }
- }
- }
-
- if (this.definitions.length > 0 && this.children.length > 0) {
- const child = new Deinflection(this.term, {rules: this.rules, definitions: this.definitions});
- this.children.push(child);
- }
- return this.definitions.length > 0 || this.children.length > 0;
- }
-
- gather() {
- if (this.children.length === 0) {
- return [{
- source: this.term,
- rules: this.rules,
- definitions: this.definitions,
- reasons: this.reason.length > 0 ? [this.reason] : []
- }];
- }
+ const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut;
+ if (term.length === 0) {
+ continue;
+ }
- const results = [];
- for (const child of this.children) {
- for (const result of child.gather()) {
- if (this.reason.length > 0) {
- result.reasons.push(this.reason);
+ results.push({
+ source,
+ term,
+ rules: variant.rulesOut,
+ definitions: [],
+ reasons: [reason, ...entry.reasons]
+ });
}
-
- result.source = this.term;
- results.push(result);
}
}
-
return results;
}
}
-
-
-class Deinflector {
- constructor(reasons) {
- this.reasons = reasons;
- }
-
- async deinflect(term, definer) {
- const node = new Deinflection(term);
- if (await node.deinflect(definer, this.reasons)) {
- return node.gather();
- } else {
- return [];
- }
- }
-}
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 7b952622..65d746ea 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -41,9 +41,7 @@ class Translator {
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
- for (const definition of definitionsGrouped) {
- await this.buildTermFrequencies(definition, titles);
- }
+ await this.buildTermFrequencies(definitionsGrouped, titles);
if (options.general.compactTags) {
for (const definition of definitionsGrouped) {
@@ -147,9 +145,7 @@ class Translator {
definitionsMerged.push(groupedDefinition);
}
- for (const definition of definitionsMerged) {
- await this.buildTermFrequencies(definition, titles);
- }
+ await this.buildTermFrequencies(definitionsMerged, titles);
if (options.general.compactTags) {
for (const definition of definitionsMerged) {
@@ -164,9 +160,7 @@ class Translator {
const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
- for (const definition of definitions) {
- await this.buildTermFrequencies(definition, titles);
- }
+ await this.buildTermFrequencies(definitions, titles);
return {length, definitions};
}
@@ -179,13 +173,9 @@ class Translator {
}
}
- const cache = {};
- const titles = Object.keys(dictionaries);
- let deinflections = await this.findTermDeinflections(text, titles, cache);
const textHiragana = jpKatakanaToHiragana(text);
- if (text !== textHiragana) {
- deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache));
- }
+ const titles = Object.keys(dictionaries);
+ const deinflections = await this.findTermDeinflections(text, textHiragana, titles);
let definitions = [];
for (const deinflection of deinflections) {
@@ -221,19 +211,77 @@ class Translator {
return {length, definitions};
}
- async findTermDeinflections(text, titles, cache) {
- const definer = async term => {
- if (cache.hasOwnProperty(term)) {
- return cache[term];
+ async findTermDeinflections(text, text2, titles) {
+ const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2));
+
+ if (deinflections.length === 0) {
+ return [];
+ }
+
+ const uniqueDeinflectionTerms = [];
+ const uniqueDeinflectionArrays = [];
+ const uniqueDeinflectionsMap = {};
+ for (const deinflection of deinflections) {
+ const term = deinflection.term;
+ let deinflectionArray;
+ if (uniqueDeinflectionsMap.hasOwnProperty(term)) {
+ deinflectionArray = uniqueDeinflectionsMap[term];
} else {
- return cache[term] = await this.database.findTerms(term, titles);
+ deinflectionArray = [];
+ uniqueDeinflectionTerms.push(term);
+ uniqueDeinflectionArrays.push(deinflectionArray);
+ uniqueDeinflectionsMap[term] = deinflectionArray;
}
- };
+ deinflectionArray.push(deinflection);
+ }
+
+ const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles);
+
+ for (const definition of definitions) {
+ for (const deinflection of uniqueDeinflectionArrays[definition.index]) {
+ if (Translator.definitionContainsAnyRule(definition, deinflection.rules)) {
+ deinflection.definitions.push(definition);
+ }
+ }
+ }
+
+ return deinflections.filter(e => e.definitions.length > 0);
+ }
+
+ static definitionContainsAnyRule(definition, rules) {
+ if (rules.length === 0) {
+ return true;
+ }
+ const definitionRules = definition.rules;
+ for (const rule of rules) {
+ if (definitionRules.includes(rule)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ getDeinflections(text) {
+ const deinflections = [];
+
+ for (let i = text.length; i > 0; --i) {
+ const textSlice = text.slice(0, i);
+ deinflections.push(...this.deinflector.deinflect(textSlice));
+ }
+
+ return deinflections;
+ }
+
+ getDeinflections2(text, text2) {
+ const deinflections = [];
- let deinflections = [];
for (let i = text.length; i > 0; --i) {
const textSlice = text.slice(0, i);
- deinflections.push(...await this.deinflector.deinflect(textSlice, definer));
+ const text2Slice = text2.slice(0, i);
+ deinflections.push(...this.deinflector.deinflect(textSlice));
+ if (textSlice !== text2Slice) {
+ deinflections.push(...this.deinflector.deinflect(text2Slice));
+ }
}
return deinflections;
@@ -272,24 +320,51 @@ class Translator {
return definitions;
}
- async buildTermFrequencies(definition, titles) {
- let terms = [];
- if (definition.expressions) {
- terms.push(...definition.expressions);
- } else {
- terms.push(definition);
+ async buildTermFrequencies(definitions, titles) {
+ const terms = [];
+ for (const definition of definitions) {
+ if (definition.expressions) {
+ terms.push(...definition.expressions);
+ } else {
+ terms.push(definition);
+ }
+ }
+
+ if (terms.length === 0) {
+ return;
}
- for (const term of terms) {
+ // Create mapping of unique terms
+ const expressionsUnique = [];
+ const termsUnique = [];
+ const termsUniqueMap = {};
+ for (let i = 0, ii = terms.length; i < ii; ++i) {
+ const term = terms[i];
+ const expression = term.expression;
term.frequencies = [];
- for (const meta of await this.database.findTermMeta(term.expression, titles)) {
- if (meta.mode === 'freq') {
- term.frequencies.push({
- expression: meta.expression,
- frequency: meta.data,
- dictionary: meta.dictionary
- });
- }
+
+ if (termsUniqueMap.hasOwnProperty(expression)) {
+ termsUniqueMap[expression].push(term);
+ } else {
+ const termList = [term];
+ expressionsUnique.push(expression);
+ termsUnique.push(termList);
+ termsUniqueMap[expression] = termList;
+ }
+ }
+
+ const metas = await this.database.findTermMetaBulk(expressionsUnique, titles);
+ for (const meta of metas) {
+ if (meta.mode !== 'freq') {
+ continue;
+ }
+
+ for (const term of termsUnique[meta.index]) {
+ term.frequencies.push({
+ expression: meta.expression,
+ frequency: meta.data,
+ dictionary: meta.dictionary
+ });
}
}
}
@@ -298,15 +373,13 @@ class Translator {
const tags = [];
for (const name of names) {
const base = Translator.getNameBase(name);
- const meta = await this.database.findTagForTitle(base, title);
-
- const tag = {name};
- for (const prop in meta || {}) {
- if (prop !== 'name') {
- tag[prop] = meta[prop];
- }
+ let meta = this.database.findTagForTitleCached(base, title);
+ if (typeof meta === 'undefined') {
+ meta = await this.database.findTagForTitle(base, title);
}
+ const tag = Object.assign({}, meta !== null ? meta : {}, {name});
+
tags.push(dictTagSanitize(tag));
}
@@ -317,16 +390,18 @@ class Translator {
const stats = {};
for (const name in items) {
const base = Translator.getNameBase(name);
- const meta = await this.database.findTagForTitle(base, title);
- const group = stats[meta.category] = stats[meta.category] || [];
-
- const stat = {name, value: items[name]};
- for (const prop in meta || {}) {
- if (prop !== 'name') {
- stat[prop] = meta[prop];
+ let meta = this.database.findTagForTitleCached(base, title);
+ if (typeof meta === 'undefined') {
+ meta = await this.database.findTagForTitle(base, title);
+ if (meta === null) {
+ continue;
}
}
+ const group = stats[meta.category] = stats[meta.category] || [];
+
+ const stat = Object.assign({}, meta, {name, value: items[name]});
+
group.push(dictTagSanitize(stat));
}