diff options
| author | Alex Yatskov <FooSoft@users.noreply.github.com> | 2019-10-05 09:18:09 -0700 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-10-05 09:18:09 -0700 | 
| commit | a369f8d0a41338a0d4aa85d904f0f20c71eb7817 (patch) | |
| tree | 5d0c5f2503472070699e408f338c1c9e41080552 /ext | |
| parent | d3d162ea785457d9533d57f861afa82d2c2b0e01 (diff) | |
| parent | 44119eea2c8ad4c8eed38070ef1a3ce27fa9359e (diff) | |
Merge pull request #229 from toasted-nutbread/database-optimizations
Database optimizations
Diffstat (limited to 'ext')
| -rw-r--r-- | ext/bg/js/database.js | 180 | ||||
| -rw-r--r-- | ext/bg/js/deinflector.js | 122 | ||||
| -rw-r--r-- | ext/bg/js/translator.js | 179 | 
3 files changed, 297 insertions, 184 deletions
| diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 093ec102..e8214c3c 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -68,24 +68,39 @@ class Database {          const results = [];          await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => {              if (titles.includes(row.dictionary)) { -                results.push({ -                    expression: row.expression, -                    reading: row.reading, -                    definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), -                    termTags: dictFieldSplit(row.termTags || ''), -                    rules: dictFieldSplit(row.rules), -                    glossary: row.glossary, -                    score: row.score, -                    dictionary: row.dictionary, -                    id: row.id, -                    sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence -                }); +                results.push(Database.createTerm(row));              }          });          return results;      } +    async findTermsBulk(terms, titles) { +        const promises = []; +        const visited = {}; +        const results = []; +        const createResult = Database.createTerm; +        const filter = (row) => titles.includes(row.dictionary); + +        const db = this.db.backendDB(); +        const dbTransaction = db.transaction(['terms'], 'readonly'); +        const dbTerms = dbTransaction.objectStore('terms'); +        const dbIndex1 = dbTerms.index('expression'); +        const dbIndex2 = dbTerms.index('reading'); + +        for (let i = 0; i < terms.length; ++i) { +            const only = IDBKeyRange.only(terms[i]); +            promises.push( +                Database.getAll(dbIndex1, only, i, visited, filter, createResult, results), +                Database.getAll(dbIndex2, only, i, visited, filter, createResult, results) +            ); +        } + +        await Promise.all(promises); + +        return results; +    } +      async findTermsExact(term, reading, titles) {          if (!this.db) {              throw 'Database not initialized'; @@ -94,18 +109,7 @@ class Database {          const results = [];          await this.db.terms.where('expression').equals(term).each(row => {              if (row.reading === reading && titles.includes(row.dictionary)) { -                results.push({ -                    expression: row.expression, -                    reading: row.reading, -                    definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), -                    termTags: dictFieldSplit(row.termTags || ''), -                    rules: dictFieldSplit(row.rules), -                    glossary: row.glossary, -                    score: row.score, -                    dictionary: row.dictionary, -                    id: row.id, -                    sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence -                }); +                results.push(Database.createTerm(row));              }          }); @@ -120,18 +124,7 @@ class Database {          const results = [];          await this.db.terms.where('sequence').equals(sequence).each(row => {              if (row.dictionary === mainDictionary) { -                results.push({ -                    expression: row.expression, -                    reading: row.reading, -                    definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), -                    termTags: dictFieldSplit(row.termTags || ''), -                    rules: dictFieldSplit(row.rules), -                    glossary: row.glossary, -                    score: row.score, -                    dictionary: row.dictionary, -                    id: row.id, -                    sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence -                }); +                results.push(Database.createTerm(row));              }          }); @@ -157,6 +150,28 @@ class Database {          return results;      } +    async findTermMetaBulk(terms, titles) { +        const promises = []; +        const visited = {}; +        const results = []; +        const createResult = Database.createTermMeta; +        const filter = (row) => titles.includes(row.dictionary); + +        const db = this.db.backendDB(); +        const dbTransaction = db.transaction(['termMeta'], 'readonly'); +        const dbTerms = dbTransaction.objectStore('termMeta'); +        const dbIndex = dbTerms.index('expression'); + +        for (let i = 0; i < terms.length; ++i) { +            const only = IDBKeyRange.only(terms[i]); +            promises.push(Database.getAll(dbIndex, only, i, visited, filter, createResult, results)); +        } + +        await Promise.all(promises); + +        return results; +    } +      async findKanji(kanji, titles) {          if (!this.db) {              throw 'Database not initialized'; @@ -199,23 +214,30 @@ class Database {          return results;      } +    findTagForTitleCached(name, title) { +        if (this.tagCache.hasOwnProperty(title)) { +            const cache = this.tagCache[title]; +            if (cache.hasOwnProperty(name)) { +                return cache[name]; +            } +        } +    } +      async findTagForTitle(name, title) {          if (!this.db) {              throw 'Database not initialized';          } -        this.tagCache[title] = this.tagCache[title] || {}; +        const cache = (this.tagCache.hasOwnProperty(title) ? this.tagCache[title] : (this.tagCache[title] = {})); -        let result = this.tagCache[title][name]; -        if (!result) { -            await this.db.tagMeta.where('name').equals(name).each(row => { -                if (title === row.dictionary) { -                    result = row; -                } -            }); +        let result = null; +        await this.db.tagMeta.where('name').equals(name).each(row => { +            if (title === row.dictionary) { +                result = row; +            } +        }); -            this.tagCache[title][name] = result; -        } +        cache[name] = result;          return result;      } @@ -489,4 +511,70 @@ class Database {          return summary;      } + +    static createTerm(row, index) { +        return { +            index, +            expression: row.expression, +            reading: row.reading, +            definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), +            termTags: dictFieldSplit(row.termTags || ''), +            rules: dictFieldSplit(row.rules), +            glossary: row.glossary, +            score: row.score, +            dictionary: row.dictionary, +            id: row.id, +            sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence +        }; +    } + +    static createTermMeta(row, index) { +        return { +            index, +            mode: row.mode, +            data: row.data, +            dictionary: row.dictionary +        }; +    } + +    static getAll(dbIndex, query, index, visited, filter, createResult, results) { +        const fn = typeof dbIndex.getAll === 'function' ? Database.getAllFast : Database.getAllUsingCursor; +        return fn(dbIndex, query, index, visited, filter, createResult, results); +    } + +    static getAllFast(dbIndex, query, index, visited, filter, createResult, results) { +        return new Promise((resolve, reject) => { +            const request = dbIndex.getAll(query); +            request.onerror = (e) => reject(e); +            request.onsuccess = (e) => { +                for (const row of e.target.result) { +                    if (filter(row, index) && !visited.hasOwnProperty(row.id)) { +                        visited[row.id] = true; +                        results.push(createResult(row, index)); +                    } +                } +                resolve(); +            }; +        }); +    } + +    static getAllUsingCursor(dbIndex, query, index, visited, filter, createResult, results) { +        return new Promise((resolve, reject) => { +            const request = dbIndex.openCursor(query, 'next'); +            request.onerror = (e) => reject(e); +            request.onsuccess = (e) => { +                const cursor = e.target.result; +                if (cursor) { +                    const row = cursor.value; +                    if (filter(row, index) && !visited.hasOwnProperty(row.id)) { +                        visited[row.id] = true; +                        results.push(createResult(row, index)); +                    } +                    cursor.continue(); +                } else { +                    resolve(); +                } +            }; +        }); +    }  } diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js index 0abde99d..ad77895c 100644 --- a/ext/bg/js/deinflector.js +++ b/ext/bg/js/deinflector.js @@ -17,103 +17,53 @@   */ -class Deinflection { -    constructor(term, {rules=[], definitions=[], reason=''} = {}) { -        this.term = term; -        this.rules = rules; -        this.definitions = definitions; -        this.reason = reason; -        this.children = []; +class Deinflector { +    constructor(reasons) { +        this.reasons = reasons;      } -    async deinflect(definer, reasons) { -        for (const reason in reasons) { -            for (const variant of reasons[reason]) { -                let accept = this.rules.length === 0; -                if (!accept) { -                    for (const rule of this.rules) { -                        if (variant.rulesIn.includes(rule)) { -                            accept = true; -                            break; +    deinflect(source) { +        const results = [{ +            source, +            term: source, +            rules: [], +            definitions: [], +            reasons: [] +        }]; +        for (let i = 0; i < results.length; ++i) { +            const entry = results[i]; + +            for (const reason in this.reasons) { +                for (const variant of this.reasons[reason]) { +                    let accept = entry.rules.length === 0; +                    if (!accept) { +                        for (const rule of entry.rules) { +                            if (variant.rulesIn.includes(rule)) { +                                accept = true; +                                break; +                            }                          }                      } -                } -                if (!accept || !this.term.endsWith(variant.kanaIn)) { -                    continue; -                } - -                const term = this.term.slice(0, -variant.kanaIn.length) + variant.kanaOut; -                if (term.length === 0) { -                    continue; -                } - -                const child = new Deinflection(term, {reason, rules: variant.rulesOut}); -                if (await child.deinflect(definer, reasons)) { -                    this.children.push(child); -                } -            } -        } - -        const definitions = await definer(this.term); -        if (this.rules.length === 0) { -            this.definitions = definitions; -        } else { -            for (const rule of this.rules) { -                for (const definition of definitions) { -                    if (definition.rules.includes(rule)) { -                        this.definitions.push(definition); +                    if (!accept || !entry.term.endsWith(variant.kanaIn)) { +                        continue;                      } -                } -            } -        } - -        if (this.definitions.length > 0 && this.children.length > 0) { -            const child = new Deinflection(this.term, {rules: this.rules, definitions: this.definitions}); -            this.children.push(child); -        } -        return this.definitions.length > 0 || this.children.length > 0; -    } - -    gather() { -        if (this.children.length === 0) { -            return [{ -                source: this.term, -                rules: this.rules, -                definitions: this.definitions, -                reasons: this.reason.length > 0 ? [this.reason] : [] -            }]; -        } +                    const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut; +                    if (term.length === 0) { +                        continue; +                    } -        const results = []; -        for (const child of this.children) { -            for (const result of child.gather()) { -                if (this.reason.length > 0) { -                    result.reasons.push(this.reason); +                    results.push({ +                        source, +                        term, +                        rules: variant.rulesOut, +                        definitions: [], +                        reasons: [reason, ...entry.reasons] +                    });                  } - -                result.source = this.term; -                results.push(result);              }          } -          return results;      }  } - - -class Deinflector { -    constructor(reasons) { -        this.reasons = reasons; -    } - -    async deinflect(term, definer) { -        const node = new Deinflection(term); -        if (await node.deinflect(definer, this.reasons)) { -            return node.gather(); -        } else { -            return []; -        } -    } -} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 7b952622..65d746ea 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -41,9 +41,7 @@ class Translator {          const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);          const definitionsGrouped = dictTermsGroup(definitions, dictionaries); -        for (const definition of definitionsGrouped) { -            await this.buildTermFrequencies(definition, titles); -        } +        await this.buildTermFrequencies(definitionsGrouped, titles);          if (options.general.compactTags) {              for (const definition of definitionsGrouped) { @@ -147,9 +145,7 @@ class Translator {              definitionsMerged.push(groupedDefinition);          } -        for (const definition of definitionsMerged) { -            await this.buildTermFrequencies(definition, titles); -        } +        await this.buildTermFrequencies(definitionsMerged, titles);          if (options.general.compactTags) {              for (const definition of definitionsMerged) { @@ -164,9 +160,7 @@ class Translator {          const titles = Object.keys(dictionaries);          const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); -        for (const definition of definitions) { -            await this.buildTermFrequencies(definition, titles); -        } +        await this.buildTermFrequencies(definitions, titles);          return {length, definitions};      } @@ -179,13 +173,9 @@ class Translator {              }          } -        const cache = {}; -        const titles = Object.keys(dictionaries); -        let deinflections = await this.findTermDeinflections(text, titles, cache);          const textHiragana = jpKatakanaToHiragana(text); -        if (text !== textHiragana) { -            deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache)); -        } +        const titles = Object.keys(dictionaries); +        const deinflections = await this.findTermDeinflections(text, textHiragana, titles);          let definitions = [];          for (const deinflection of deinflections) { @@ -221,19 +211,77 @@ class Translator {          return {length, definitions};      } -    async findTermDeinflections(text, titles, cache) { -        const definer = async term => { -            if (cache.hasOwnProperty(term)) { -                return cache[term]; +    async findTermDeinflections(text, text2, titles) { +        const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2)); + +        if (deinflections.length === 0) { +            return []; +        } + +        const uniqueDeinflectionTerms = []; +        const uniqueDeinflectionArrays = []; +        const uniqueDeinflectionsMap = {}; +        for (const deinflection of deinflections) { +            const term = deinflection.term; +            let deinflectionArray; +            if (uniqueDeinflectionsMap.hasOwnProperty(term)) { +                deinflectionArray = uniqueDeinflectionsMap[term];              } else { -                return cache[term] = await this.database.findTerms(term, titles); +                deinflectionArray = []; +                uniqueDeinflectionTerms.push(term); +                uniqueDeinflectionArrays.push(deinflectionArray); +                uniqueDeinflectionsMap[term] = deinflectionArray;              } -        }; +            deinflectionArray.push(deinflection); +        } + +        const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles); + +        for (const definition of definitions) { +            for (const deinflection of uniqueDeinflectionArrays[definition.index]) { +                if (Translator.definitionContainsAnyRule(definition, deinflection.rules)) { +                    deinflection.definitions.push(definition); +                } +            } +        } + +        return deinflections.filter(e => e.definitions.length > 0); +    } + +    static definitionContainsAnyRule(definition, rules) { +        if (rules.length === 0) { +            return true; +        } +        const definitionRules = definition.rules; +        for (const rule of rules) { +            if (definitionRules.includes(rule)) { +                return true; +            } +        } +        return false; +    } + +    getDeinflections(text) { +        const deinflections = []; + +        for (let i = text.length; i > 0; --i) { +            const textSlice = text.slice(0, i); +            deinflections.push(...this.deinflector.deinflect(textSlice)); +        } + +        return deinflections; +    } + +    getDeinflections2(text, text2) { +        const deinflections = []; -        let deinflections = [];          for (let i = text.length; i > 0; --i) {              const textSlice = text.slice(0, i); -            deinflections.push(...await this.deinflector.deinflect(textSlice, definer)); +            const text2Slice = text2.slice(0, i); +            deinflections.push(...this.deinflector.deinflect(textSlice)); +            if (textSlice !== text2Slice) { +                deinflections.push(...this.deinflector.deinflect(text2Slice)); +            }          }          return deinflections; @@ -272,24 +320,51 @@ class Translator {          return definitions;      } -    async buildTermFrequencies(definition, titles) { -        let terms = []; -        if (definition.expressions) { -            terms.push(...definition.expressions); -        } else { -            terms.push(definition); +    async buildTermFrequencies(definitions, titles) { +        const terms = []; +        for (const definition of definitions) { +            if (definition.expressions) { +                terms.push(...definition.expressions); +            } else { +                terms.push(definition); +            } +        } + +        if (terms.length === 0) { +            return;          } -        for (const term of terms) { +        // Create mapping of unique terms +        const expressionsUnique = []; +        const termsUnique = []; +        const termsUniqueMap = {}; +        for (let i = 0, ii = terms.length; i < ii; ++i) { +            const term = terms[i]; +            const expression = term.expression;              term.frequencies = []; -            for (const meta of await this.database.findTermMeta(term.expression, titles)) { -                if (meta.mode === 'freq') { -                    term.frequencies.push({ -                        expression: meta.expression, -                        frequency: meta.data, -                        dictionary: meta.dictionary -                    }); -                } + +            if (termsUniqueMap.hasOwnProperty(expression)) { +                termsUniqueMap[expression].push(term); +            } else { +                const termList = [term]; +                expressionsUnique.push(expression); +                termsUnique.push(termList); +                termsUniqueMap[expression] = termList; +            } +        } + +        const metas = await this.database.findTermMetaBulk(expressionsUnique, titles); +        for (const meta of metas) { +            if (meta.mode !== 'freq') { +                continue; +            } + +            for (const term of termsUnique[meta.index]) { +                term.frequencies.push({ +                    expression: meta.expression, +                    frequency: meta.data, +                    dictionary: meta.dictionary +                });              }          }      } @@ -298,15 +373,13 @@ class Translator {          const tags = [];          for (const name of names) {              const base = Translator.getNameBase(name); -            const meta = await this.database.findTagForTitle(base, title); - -            const tag = {name}; -            for (const prop in meta || {}) { -                if (prop !== 'name') { -                    tag[prop] = meta[prop]; -                } +            let meta = this.database.findTagForTitleCached(base, title); +            if (typeof meta === 'undefined') { +                meta = await this.database.findTagForTitle(base, title);              } +            const tag = Object.assign({}, meta !== null ? meta : {}, {name}); +              tags.push(dictTagSanitize(tag));          } @@ -317,16 +390,18 @@ class Translator {          const stats = {};          for (const name in items) {              const base = Translator.getNameBase(name); -            const meta = await this.database.findTagForTitle(base, title); -            const group = stats[meta.category] = stats[meta.category] || []; - -            const stat = {name, value: items[name]}; -            for (const prop in meta || {}) { -                if (prop !== 'name') { -                    stat[prop] = meta[prop]; +            let meta = this.database.findTagForTitleCached(base, title); +            if (typeof meta === 'undefined') { +                meta = await this.database.findTagForTitle(base, title); +                if (meta === null) { +                    continue;                  }              } +            const group = stats[meta.category] = stats[meta.category] || []; + +            const stat = Object.assign({}, meta, {name, value: items[name]}); +              group.push(dictTagSanitize(stat));          } |