diff options
author | Alex Yatskov <FooSoft@users.noreply.github.com> | 2019-10-05 09:18:09 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-05 09:18:09 -0700 |
commit | a369f8d0a41338a0d4aa85d904f0f20c71eb7817 (patch) | |
tree | 5d0c5f2503472070699e408f338c1c9e41080552 /ext/bg | |
parent | d3d162ea785457d9533d57f861afa82d2c2b0e01 (diff) | |
parent | 44119eea2c8ad4c8eed38070ef1a3ce27fa9359e (diff) |
Merge pull request #229 from toasted-nutbread/database-optimizations
Database optimizations
Diffstat (limited to 'ext/bg')
-rw-r--r-- | ext/bg/js/database.js | 180 | ||||
-rw-r--r-- | ext/bg/js/deinflector.js | 122 | ||||
-rw-r--r-- | ext/bg/js/translator.js | 179 |
3 files changed, 297 insertions, 184 deletions
diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 093ec102..e8214c3c 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -68,24 +68,39 @@ class Database { const results = []; await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => { if (titles.includes(row.dictionary)) { - results.push({ - expression: row.expression, - reading: row.reading, - definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), - termTags: dictFieldSplit(row.termTags || ''), - rules: dictFieldSplit(row.rules), - glossary: row.glossary, - score: row.score, - dictionary: row.dictionary, - id: row.id, - sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence - }); + results.push(Database.createTerm(row)); } }); return results; } + async findTermsBulk(terms, titles) { + const promises = []; + const visited = {}; + const results = []; + const createResult = Database.createTerm; + const filter = (row) => titles.includes(row.dictionary); + + const db = this.db.backendDB(); + const dbTransaction = db.transaction(['terms'], 'readonly'); + const dbTerms = dbTransaction.objectStore('terms'); + const dbIndex1 = dbTerms.index('expression'); + const dbIndex2 = dbTerms.index('reading'); + + for (let i = 0; i < terms.length; ++i) { + const only = IDBKeyRange.only(terms[i]); + promises.push( + Database.getAll(dbIndex1, only, i, visited, filter, createResult, results), + Database.getAll(dbIndex2, only, i, visited, filter, createResult, results) + ); + } + + await Promise.all(promises); + + return results; + } + async findTermsExact(term, reading, titles) { if (!this.db) { throw 'Database not initialized'; @@ -94,18 +109,7 @@ class Database { const results = []; await this.db.terms.where('expression').equals(term).each(row => { if (row.reading === reading && titles.includes(row.dictionary)) { - results.push({ - expression: row.expression, - reading: row.reading, - definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), - termTags: dictFieldSplit(row.termTags || ''), - rules: dictFieldSplit(row.rules), - glossary: row.glossary, - score: row.score, - dictionary: row.dictionary, - id: row.id, - sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence - }); + results.push(Database.createTerm(row)); } }); @@ -120,18 +124,7 @@ class Database { const results = []; await this.db.terms.where('sequence').equals(sequence).each(row => { if (row.dictionary === mainDictionary) { - results.push({ - expression: row.expression, - reading: row.reading, - definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), - termTags: dictFieldSplit(row.termTags || ''), - rules: dictFieldSplit(row.rules), - glossary: row.glossary, - score: row.score, - dictionary: row.dictionary, - id: row.id, - sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence - }); + results.push(Database.createTerm(row)); } }); @@ -157,6 +150,28 @@ class Database { return results; } + async findTermMetaBulk(terms, titles) { + const promises = []; + const visited = {}; + const results = []; + const createResult = Database.createTermMeta; + const filter = (row) => titles.includes(row.dictionary); + + const db = this.db.backendDB(); + const dbTransaction = db.transaction(['termMeta'], 'readonly'); + const dbTerms = dbTransaction.objectStore('termMeta'); + const dbIndex = dbTerms.index('expression'); + + for (let i = 0; i < terms.length; ++i) { + const only = IDBKeyRange.only(terms[i]); + promises.push(Database.getAll(dbIndex, only, i, visited, filter, createResult, results)); + } + + await Promise.all(promises); + + return results; + } + async findKanji(kanji, titles) { if (!this.db) { throw 'Database not initialized'; @@ -199,23 +214,30 @@ class Database { return results; } + findTagForTitleCached(name, title) { + if (this.tagCache.hasOwnProperty(title)) { + const cache = this.tagCache[title]; + if (cache.hasOwnProperty(name)) { + return cache[name]; + } + } + } + async findTagForTitle(name, title) { if (!this.db) { throw 'Database not initialized'; } - this.tagCache[title] = this.tagCache[title] || {}; + const cache = (this.tagCache.hasOwnProperty(title) ? this.tagCache[title] : (this.tagCache[title] = {})); - let result = this.tagCache[title][name]; - if (!result) { - await this.db.tagMeta.where('name').equals(name).each(row => { - if (title === row.dictionary) { - result = row; - } - }); + let result = null; + await this.db.tagMeta.where('name').equals(name).each(row => { + if (title === row.dictionary) { + result = row; + } + }); - this.tagCache[title][name] = result; - } + cache[name] = result; return result; } @@ -489,4 +511,70 @@ class Database { return summary; } + + static createTerm(row, index) { + return { + index, + expression: row.expression, + reading: row.reading, + definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), + termTags: dictFieldSplit(row.termTags || ''), + rules: dictFieldSplit(row.rules), + glossary: row.glossary, + score: row.score, + dictionary: row.dictionary, + id: row.id, + sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence + }; + } + + static createTermMeta(row, index) { + return { + index, + mode: row.mode, + data: row.data, + dictionary: row.dictionary + }; + } + + static getAll(dbIndex, query, index, visited, filter, createResult, results) { + const fn = typeof dbIndex.getAll === 'function' ? Database.getAllFast : Database.getAllUsingCursor; + return fn(dbIndex, query, index, visited, filter, createResult, results); + } + + static getAllFast(dbIndex, query, index, visited, filter, createResult, results) { + return new Promise((resolve, reject) => { + const request = dbIndex.getAll(query); + request.onerror = (e) => reject(e); + request.onsuccess = (e) => { + for (const row of e.target.result) { + if (filter(row, index) && !visited.hasOwnProperty(row.id)) { + visited[row.id] = true; + results.push(createResult(row, index)); + } + } + resolve(); + }; + }); + } + + static getAllUsingCursor(dbIndex, query, index, visited, filter, createResult, results) { + return new Promise((resolve, reject) => { + const request = dbIndex.openCursor(query, 'next'); + request.onerror = (e) => reject(e); + request.onsuccess = (e) => { + const cursor = e.target.result; + if (cursor) { + const row = cursor.value; + if (filter(row, index) && !visited.hasOwnProperty(row.id)) { + visited[row.id] = true; + results.push(createResult(row, index)); + } + cursor.continue(); + } else { + resolve(); + } + }; + }); + } } diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js index 0abde99d..ad77895c 100644 --- a/ext/bg/js/deinflector.js +++ b/ext/bg/js/deinflector.js @@ -17,103 +17,53 @@ */ -class Deinflection { - constructor(term, {rules=[], definitions=[], reason=''} = {}) { - this.term = term; - this.rules = rules; - this.definitions = definitions; - this.reason = reason; - this.children = []; +class Deinflector { + constructor(reasons) { + this.reasons = reasons; } - async deinflect(definer, reasons) { - for (const reason in reasons) { - for (const variant of reasons[reason]) { - let accept = this.rules.length === 0; - if (!accept) { - for (const rule of this.rules) { - if (variant.rulesIn.includes(rule)) { - accept = true; - break; + deinflect(source) { + const results = [{ + source, + term: source, + rules: [], + definitions: [], + reasons: [] + }]; + for (let i = 0; i < results.length; ++i) { + const entry = results[i]; + + for (const reason in this.reasons) { + for (const variant of this.reasons[reason]) { + let accept = entry.rules.length === 0; + if (!accept) { + for (const rule of entry.rules) { + if (variant.rulesIn.includes(rule)) { + accept = true; + break; + } } } - } - if (!accept || !this.term.endsWith(variant.kanaIn)) { - continue; - } - - const term = this.term.slice(0, -variant.kanaIn.length) + variant.kanaOut; - if (term.length === 0) { - continue; - } - - const child = new Deinflection(term, {reason, rules: variant.rulesOut}); - if (await child.deinflect(definer, reasons)) { - this.children.push(child); - } - } - } - - const definitions = await definer(this.term); - if (this.rules.length === 0) { - this.definitions = definitions; - } else { - for (const rule of this.rules) { - for (const definition of definitions) { - if (definition.rules.includes(rule)) { - this.definitions.push(definition); + if (!accept || !entry.term.endsWith(variant.kanaIn)) { + continue; } - } - } - } - - if (this.definitions.length > 0 && this.children.length > 0) { - const child = new Deinflection(this.term, {rules: this.rules, definitions: this.definitions}); - this.children.push(child); - } - return this.definitions.length > 0 || this.children.length > 0; - } - - gather() { - if (this.children.length === 0) { - return [{ - source: this.term, - rules: this.rules, - definitions: this.definitions, - reasons: this.reason.length > 0 ? [this.reason] : [] - }]; - } + const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut; + if (term.length === 0) { + continue; + } - const results = []; - for (const child of this.children) { - for (const result of child.gather()) { - if (this.reason.length > 0) { - result.reasons.push(this.reason); + results.push({ + source, + term, + rules: variant.rulesOut, + definitions: [], + reasons: [reason, ...entry.reasons] + }); } - - result.source = this.term; - results.push(result); } } - return results; } } - - -class Deinflector { - constructor(reasons) { - this.reasons = reasons; - } - - async deinflect(term, definer) { - const node = new Deinflection(term); - if (await node.deinflect(definer, this.reasons)) { - return node.gather(); - } else { - return []; - } - } -} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 7b952622..65d746ea 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -41,9 +41,7 @@ class Translator { const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const definitionsGrouped = dictTermsGroup(definitions, dictionaries); - for (const definition of definitionsGrouped) { - await this.buildTermFrequencies(definition, titles); - } + await this.buildTermFrequencies(definitionsGrouped, titles); if (options.general.compactTags) { for (const definition of definitionsGrouped) { @@ -147,9 +145,7 @@ class Translator { definitionsMerged.push(groupedDefinition); } - for (const definition of definitionsMerged) { - await this.buildTermFrequencies(definition, titles); - } + await this.buildTermFrequencies(definitionsMerged, titles); if (options.general.compactTags) { for (const definition of definitionsMerged) { @@ -164,9 +160,7 @@ class Translator { const titles = Object.keys(dictionaries); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); - for (const definition of definitions) { - await this.buildTermFrequencies(definition, titles); - } + await this.buildTermFrequencies(definitions, titles); return {length, definitions}; } @@ -179,13 +173,9 @@ class Translator { } } - const cache = {}; - const titles = Object.keys(dictionaries); - let deinflections = await this.findTermDeinflections(text, titles, cache); const textHiragana = jpKatakanaToHiragana(text); - if (text !== textHiragana) { - deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache)); - } + const titles = Object.keys(dictionaries); + const deinflections = await this.findTermDeinflections(text, textHiragana, titles); let definitions = []; for (const deinflection of deinflections) { @@ -221,19 +211,77 @@ class Translator { return {length, definitions}; } - async findTermDeinflections(text, titles, cache) { - const definer = async term => { - if (cache.hasOwnProperty(term)) { - return cache[term]; + async findTermDeinflections(text, text2, titles) { + const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2)); + + if (deinflections.length === 0) { + return []; + } + + const uniqueDeinflectionTerms = []; + const uniqueDeinflectionArrays = []; + const uniqueDeinflectionsMap = {}; + for (const deinflection of deinflections) { + const term = deinflection.term; + let deinflectionArray; + if (uniqueDeinflectionsMap.hasOwnProperty(term)) { + deinflectionArray = uniqueDeinflectionsMap[term]; } else { - return cache[term] = await this.database.findTerms(term, titles); + deinflectionArray = []; + uniqueDeinflectionTerms.push(term); + uniqueDeinflectionArrays.push(deinflectionArray); + uniqueDeinflectionsMap[term] = deinflectionArray; } - }; + deinflectionArray.push(deinflection); + } + + const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles); + + for (const definition of definitions) { + for (const deinflection of uniqueDeinflectionArrays[definition.index]) { + if (Translator.definitionContainsAnyRule(definition, deinflection.rules)) { + deinflection.definitions.push(definition); + } + } + } + + return deinflections.filter(e => e.definitions.length > 0); + } + + static definitionContainsAnyRule(definition, rules) { + if (rules.length === 0) { + return true; + } + const definitionRules = definition.rules; + for (const rule of rules) { + if (definitionRules.includes(rule)) { + return true; + } + } + return false; + } + + getDeinflections(text) { + const deinflections = []; + + for (let i = text.length; i > 0; --i) { + const textSlice = text.slice(0, i); + deinflections.push(...this.deinflector.deinflect(textSlice)); + } + + return deinflections; + } + + getDeinflections2(text, text2) { + const deinflections = []; - let deinflections = []; for (let i = text.length; i > 0; --i) { const textSlice = text.slice(0, i); - deinflections.push(...await this.deinflector.deinflect(textSlice, definer)); + const text2Slice = text2.slice(0, i); + deinflections.push(...this.deinflector.deinflect(textSlice)); + if (textSlice !== text2Slice) { + deinflections.push(...this.deinflector.deinflect(text2Slice)); + } } return deinflections; @@ -272,24 +320,51 @@ class Translator { return definitions; } - async buildTermFrequencies(definition, titles) { - let terms = []; - if (definition.expressions) { - terms.push(...definition.expressions); - } else { - terms.push(definition); + async buildTermFrequencies(definitions, titles) { + const terms = []; + for (const definition of definitions) { + if (definition.expressions) { + terms.push(...definition.expressions); + } else { + terms.push(definition); + } + } + + if (terms.length === 0) { + return; } - for (const term of terms) { + // Create mapping of unique terms + const expressionsUnique = []; + const termsUnique = []; + const termsUniqueMap = {}; + for (let i = 0, ii = terms.length; i < ii; ++i) { + const term = terms[i]; + const expression = term.expression; term.frequencies = []; - for (const meta of await this.database.findTermMeta(term.expression, titles)) { - if (meta.mode === 'freq') { - term.frequencies.push({ - expression: meta.expression, - frequency: meta.data, - dictionary: meta.dictionary - }); - } + + if (termsUniqueMap.hasOwnProperty(expression)) { + termsUniqueMap[expression].push(term); + } else { + const termList = [term]; + expressionsUnique.push(expression); + termsUnique.push(termList); + termsUniqueMap[expression] = termList; + } + } + + const metas = await this.database.findTermMetaBulk(expressionsUnique, titles); + for (const meta of metas) { + if (meta.mode !== 'freq') { + continue; + } + + for (const term of termsUnique[meta.index]) { + term.frequencies.push({ + expression: meta.expression, + frequency: meta.data, + dictionary: meta.dictionary + }); } } } @@ -298,15 +373,13 @@ class Translator { const tags = []; for (const name of names) { const base = Translator.getNameBase(name); - const meta = await this.database.findTagForTitle(base, title); - - const tag = {name}; - for (const prop in meta || {}) { - if (prop !== 'name') { - tag[prop] = meta[prop]; - } + let meta = this.database.findTagForTitleCached(base, title); + if (typeof meta === 'undefined') { + meta = await this.database.findTagForTitle(base, title); } + const tag = Object.assign({}, meta !== null ? meta : {}, {name}); + tags.push(dictTagSanitize(tag)); } @@ -317,16 +390,18 @@ class Translator { const stats = {}; for (const name in items) { const base = Translator.getNameBase(name); - const meta = await this.database.findTagForTitle(base, title); - const group = stats[meta.category] = stats[meta.category] || []; - - const stat = {name, value: items[name]}; - for (const prop in meta || {}) { - if (prop !== 'name') { - stat[prop] = meta[prop]; + let meta = this.database.findTagForTitleCached(base, title); + if (typeof meta === 'undefined') { + meta = await this.database.findTagForTitle(base, title); + if (meta === null) { + continue; } } + const group = stats[meta.category] = stats[meta.category] || []; + + const stat = Object.assign({}, meta, {name, value: items[name]}); + group.push(dictTagSanitize(stat)); } |