diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2019-08-30 21:06:21 -0400 | 
|---|---|---|
| committer | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2019-09-28 22:05:10 -0400 | 
| commit | 1286b5115f101a822e0083d0c642b38a28b4fea7 (patch) | |
| tree | 60da2c0f125e28ad9eca8f1c930bead9c660f65e | |
| parent | 664a318d7f5062bd397a8f40241c476dd3107f47 (diff) | |
Use bulk database searches
| -rw-r--r-- | ext/bg/js/database.js | 101 | ||||
| -rw-r--r-- | ext/bg/js/deinflector.js | 122 | ||||
| -rw-r--r-- | ext/bg/js/translator.js | 133 | 
3 files changed, 229 insertions, 127 deletions
| diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index 12a2577f..e8214c3c 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -75,6 +75,32 @@ class Database {          return results;      } +    async findTermsBulk(terms, titles) { +        const promises = []; +        const visited = {}; +        const results = []; +        const createResult = Database.createTerm; +        const filter = (row) => titles.includes(row.dictionary); + +        const db = this.db.backendDB(); +        const dbTransaction = db.transaction(['terms'], 'readonly'); +        const dbTerms = dbTransaction.objectStore('terms'); +        const dbIndex1 = dbTerms.index('expression'); +        const dbIndex2 = dbTerms.index('reading'); + +        for (let i = 0; i < terms.length; ++i) { +            const only = IDBKeyRange.only(terms[i]); +            promises.push( +                Database.getAll(dbIndex1, only, i, visited, filter, createResult, results), +                Database.getAll(dbIndex2, only, i, visited, filter, createResult, results) +            ); +        } + +        await Promise.all(promises); + +        return results; +    } +      async findTermsExact(term, reading, titles) {          if (!this.db) {              throw 'Database not initialized'; @@ -124,6 +150,28 @@ class Database {          return results;      } +    async findTermMetaBulk(terms, titles) { +        const promises = []; +        const visited = {}; +        const results = []; +        const createResult = Database.createTermMeta; +        const filter = (row) => titles.includes(row.dictionary); + +        const db = this.db.backendDB(); +        const dbTransaction = db.transaction(['termMeta'], 'readonly'); +        const dbTerms = dbTransaction.objectStore('termMeta'); +        const dbIndex = dbTerms.index('expression'); + +        for (let i = 0; i < terms.length; ++i) { +            const only = IDBKeyRange.only(terms[i]); +            promises.push(Database.getAll(dbIndex, only, i, visited, filter, createResult, results)); +        } + +        await Promise.all(promises); + +        return results; +    } +      async findKanji(kanji, titles) {          if (!this.db) {              throw 'Database not initialized'; @@ -464,8 +512,9 @@ class Database {          return summary;      } -    static createTerm(row) { +    static createTerm(row, index) {          return { +            index,              expression: row.expression,              reading: row.reading,              definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''), @@ -478,4 +527,54 @@ class Database {              sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence          };      } + +    static createTermMeta(row, index) { +        return { +            index, +            mode: row.mode, +            data: row.data, +            dictionary: row.dictionary +        }; +    } + +    static getAll(dbIndex, query, index, visited, filter, createResult, results) { +        const fn = typeof dbIndex.getAll === 'function' ? Database.getAllFast : Database.getAllUsingCursor; +        return fn(dbIndex, query, index, visited, filter, createResult, results); +    } + +    static getAllFast(dbIndex, query, index, visited, filter, createResult, results) { +        return new Promise((resolve, reject) => { +            const request = dbIndex.getAll(query); +            request.onerror = (e) => reject(e); +            request.onsuccess = (e) => { +                for (const row of e.target.result) { +                    if (filter(row, index) && !visited.hasOwnProperty(row.id)) { +                        visited[row.id] = true; +                        results.push(createResult(row, index)); +                    } +                } +                resolve(); +            }; +        }); +    } + +    static getAllUsingCursor(dbIndex, query, index, visited, filter, createResult, results) { +        return new Promise((resolve, reject) => { +            const request = dbIndex.openCursor(query, 'next'); +            request.onerror = (e) => reject(e); +            request.onsuccess = (e) => { +                const cursor = e.target.result; +                if (cursor) { +                    const row = cursor.value; +                    if (filter(row, index) && !visited.hasOwnProperty(row.id)) { +                        visited[row.id] = true; +                        results.push(createResult(row, index)); +                    } +                    cursor.continue(); +                } else { +                    resolve(); +                } +            }; +        }); +    }  } diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js index 0abde99d..ad77895c 100644 --- a/ext/bg/js/deinflector.js +++ b/ext/bg/js/deinflector.js @@ -17,103 +17,53 @@   */ -class Deinflection { -    constructor(term, {rules=[], definitions=[], reason=''} = {}) { -        this.term = term; -        this.rules = rules; -        this.definitions = definitions; -        this.reason = reason; -        this.children = []; +class Deinflector { +    constructor(reasons) { +        this.reasons = reasons;      } -    async deinflect(definer, reasons) { -        for (const reason in reasons) { -            for (const variant of reasons[reason]) { -                let accept = this.rules.length === 0; -                if (!accept) { -                    for (const rule of this.rules) { -                        if (variant.rulesIn.includes(rule)) { -                            accept = true; -                            break; +    deinflect(source) { +        const results = [{ +            source, +            term: source, +            rules: [], +            definitions: [], +            reasons: [] +        }]; +        for (let i = 0; i < results.length; ++i) { +            const entry = results[i]; + +            for (const reason in this.reasons) { +                for (const variant of this.reasons[reason]) { +                    let accept = entry.rules.length === 0; +                    if (!accept) { +                        for (const rule of entry.rules) { +                            if (variant.rulesIn.includes(rule)) { +                                accept = true; +                                break; +                            }                          }                      } -                } -                if (!accept || !this.term.endsWith(variant.kanaIn)) { -                    continue; -                } - -                const term = this.term.slice(0, -variant.kanaIn.length) + variant.kanaOut; -                if (term.length === 0) { -                    continue; -                } - -                const child = new Deinflection(term, {reason, rules: variant.rulesOut}); -                if (await child.deinflect(definer, reasons)) { -                    this.children.push(child); -                } -            } -        } - -        const definitions = await definer(this.term); -        if (this.rules.length === 0) { -            this.definitions = definitions; -        } else { -            for (const rule of this.rules) { -                for (const definition of definitions) { -                    if (definition.rules.includes(rule)) { -                        this.definitions.push(definition); +                    if (!accept || !entry.term.endsWith(variant.kanaIn)) { +                        continue;                      } -                } -            } -        } - -        if (this.definitions.length > 0 && this.children.length > 0) { -            const child = new Deinflection(this.term, {rules: this.rules, definitions: this.definitions}); -            this.children.push(child); -        } -        return this.definitions.length > 0 || this.children.length > 0; -    } - -    gather() { -        if (this.children.length === 0) { -            return [{ -                source: this.term, -                rules: this.rules, -                definitions: this.definitions, -                reasons: this.reason.length > 0 ? [this.reason] : [] -            }]; -        } +                    const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut; +                    if (term.length === 0) { +                        continue; +                    } -        const results = []; -        for (const child of this.children) { -            for (const result of child.gather()) { -                if (this.reason.length > 0) { -                    result.reasons.push(this.reason); +                    results.push({ +                        source, +                        term, +                        rules: variant.rulesOut, +                        definitions: [], +                        reasons: [reason, ...entry.reasons] +                    });                  } - -                result.source = this.term; -                results.push(result);              }          } -          return results;      }  } - - -class Deinflector { -    constructor(reasons) { -        this.reasons = reasons; -    } - -    async deinflect(term, definer) { -        const node = new Deinflection(term); -        if (await node.deinflect(definer, this.reasons)) { -            return node.gather(); -        } else { -            return []; -        } -    } -} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 21f40fe9..4eb4b03a 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -41,9 +41,7 @@ class Translator {          const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);          const definitionsGrouped = dictTermsGroup(definitions, dictionaries); -        for (const definition of definitionsGrouped) { -            await this.buildTermFrequencies(definition, titles); -        } +        await this.buildTermFrequencies(definitionsGrouped, titles);          if (options.general.compactTags) {              for (const definition of definitionsGrouped) { @@ -147,9 +145,7 @@ class Translator {              definitionsMerged.push(groupedDefinition);          } -        for (const definition of definitionsMerged) { -            await this.buildTermFrequencies(definition, titles); -        } +        await this.buildTermFrequencies(definitionsMerged, titles);          if (options.general.compactTags) {              for (const definition of definitionsMerged) { @@ -164,9 +160,7 @@ class Translator {          const titles = Object.keys(dictionaries);          const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); -        for (const definition of definitions) { -            await this.buildTermFrequencies(definition, titles); -        } +        await this.buildTermFrequencies(definitions, titles);          return {length, definitions};      } @@ -179,13 +173,9 @@ class Translator {              }          } -        const cache = {}; -        const titles = Object.keys(dictionaries); -        let deinflections = await this.findTermDeinflections(text, titles, cache);          const textHiragana = jpKatakanaToHiragana(text); -        if (text !== textHiragana) { -            deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache)); -        } +        const titles = Object.keys(dictionaries); +        const deinflections = await this.findTermDeinflections(text, textHiragana, titles);          let definitions = [];          for (const deinflection of deinflections) { @@ -221,24 +211,60 @@ class Translator {          return {length, definitions};      } -    async findTermDeinflections(text, titles, cache) { -        const definer = async term => { -            if (cache.hasOwnProperty(term)) { -                return cache[term]; -            } else { -                return cache[term] = await this.database.findTerms(term, titles); -            } -        }; +    async findTermDeinflections(text, text2, titles) { +        const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2)); + +        if (deinflections.length === 0) { +            return []; +        } + +        const definitions = await this.database.findTermsBulk(deinflections.map(e => e.term), titles); + +        for (const d of definitions) { +            deinflections[d.index].definitions.push(d); +        } + +        return deinflections.filter(e => e.definitions.length > 0); +    } + +    getDeinflections(text) { +        const deinflections = []; +        const deinflectionsKeys = {}; -        let deinflections = [];          for (let i = text.length; i > 0; --i) {              const textSlice = text.slice(0, i); -            deinflections.push(...await this.deinflector.deinflect(textSlice, definer)); +            Translator.addUniqueDeinflections(this.deinflector.deinflect(textSlice), deinflections, deinflectionsKeys);          }          return deinflections;      } +    getDeinflections2(text, text2) { +        const deinflections = []; +        const deinflectionsKeys = {}; + +        for (let i = text.length; i > 0; --i) { +            const textSlice = text.slice(0, i); +            const text2Slice = text2.slice(0, i); +            Translator.addUniqueDeinflections(this.deinflector.deinflect(textSlice), deinflections, deinflectionsKeys); +            if (textSlice !== text2Slice) { +                Translator.addUniqueDeinflections(this.deinflector.deinflect(text2Slice), deinflections, deinflectionsKeys); +            } +        } + +        return deinflections; +    } + +    static addUniqueDeinflections(newValues, deinflections, deinflectionsKeys) { +        for (const value of newValues) { +            const key = value.term; +            if (!deinflectionsKeys.hasOwnProperty(key)) { +                deinflections.push(value); +                deinflectionsKeys[key] = true; +            } +        } +    } +      async findKanji(text, dictionaries) {          let definitions = [];          const processed = {}; @@ -272,24 +298,51 @@ class Translator {          return definitions;      } -    async buildTermFrequencies(definition, titles) { -        let terms = []; -        if (definition.expressions) { -            terms.push(...definition.expressions); -        } else { -            terms.push(definition); +    async buildTermFrequencies(definitions, titles) { +        const terms = []; +        for (const definition of definitions) { +            if (definition.expressions) { +                terms.push(...definition.expressions); +            } else { +                terms.push(definition); +            } +        } + +        if (terms.length === 0) { +            return;          } -        for (const term of terms) { +        // Create mapping of unique terms +        const expressionsUnique = []; +        const termsUnique = []; +        const termsUniqueMap = {}; +        for (let i = 0, ii = terms.length; i < ii; ++i) { +            const term = terms[i]; +            const expression = term.expression;              term.frequencies = []; -            for (const meta of await this.database.findTermMeta(term.expression, titles)) { -                if (meta.mode === 'freq') { -                    term.frequencies.push({ -                        expression: meta.expression, -                        frequency: meta.data, -                        dictionary: meta.dictionary -                    }); -                } + +            if (termsUniqueMap.hasOwnProperty(expression)) { +                termsUniqueMap[expression].push(term); +            } else { +                const termList = [term]; +                expressionsUnique.push(expression); +                termsUnique.push(termList); +                termsUniqueMap[expression] = termList; +            } +        } + +        const metas = await this.database.findTermMetaBulk(expressionsUnique, titles); +        for (const meta of metas) { +            if (meta.mode !== 'freq') { +                continue; +            } + +            for (const term of termsUnique[meta.index]) { +                term.frequencies.push({ +                    expression: meta.expression, +                    frequency: meta.data, +                    dictionary: meta.dictionary +                });              }          }      } |