aboutsummaryrefslogtreecommitdiff
path: root/ext/bg/js/translator.js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2019-08-30 21:06:21 -0400
committertoasted-nutbread <toasted-nutbread@users.noreply.github.com>2019-09-28 22:05:10 -0400
commit1286b5115f101a822e0083d0c642b38a28b4fea7 (patch)
tree60da2c0f125e28ad9eca8f1c930bead9c660f65e /ext/bg/js/translator.js
parent664a318d7f5062bd397a8f40241c476dd3107f47 (diff)
Use bulk database searches
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r--ext/bg/js/translator.js133
1 files changed, 93 insertions, 40 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 21f40fe9..4eb4b03a 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -41,9 +41,7 @@ class Translator {
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
- for (const definition of definitionsGrouped) {
- await this.buildTermFrequencies(definition, titles);
- }
+ await this.buildTermFrequencies(definitionsGrouped, titles);
if (options.general.compactTags) {
for (const definition of definitionsGrouped) {
@@ -147,9 +145,7 @@ class Translator {
definitionsMerged.push(groupedDefinition);
}
- for (const definition of definitionsMerged) {
- await this.buildTermFrequencies(definition, titles);
- }
+ await this.buildTermFrequencies(definitionsMerged, titles);
if (options.general.compactTags) {
for (const definition of definitionsMerged) {
@@ -164,9 +160,7 @@ class Translator {
const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
- for (const definition of definitions) {
- await this.buildTermFrequencies(definition, titles);
- }
+ await this.buildTermFrequencies(definitions, titles);
return {length, definitions};
}
@@ -179,13 +173,9 @@ class Translator {
}
}
- const cache = {};
- const titles = Object.keys(dictionaries);
- let deinflections = await this.findTermDeinflections(text, titles, cache);
const textHiragana = jpKatakanaToHiragana(text);
- if (text !== textHiragana) {
- deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache));
- }
+ const titles = Object.keys(dictionaries);
+ const deinflections = await this.findTermDeinflections(text, textHiragana, titles);
let definitions = [];
for (const deinflection of deinflections) {
@@ -221,24 +211,60 @@ class Translator {
return {length, definitions};
}
- async findTermDeinflections(text, titles, cache) {
- const definer = async term => {
- if (cache.hasOwnProperty(term)) {
- return cache[term];
- } else {
- return cache[term] = await this.database.findTerms(term, titles);
- }
- };
+ async findTermDeinflections(text, text2, titles) {
+ const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2));
+
+ if (deinflections.length === 0) {
+ return [];
+ }
+
+ const definitions = await this.database.findTermsBulk(deinflections.map(e => e.term), titles);
+
+ for (const d of definitions) {
+ deinflections[d.index].definitions.push(d);
+ }
+
+ return deinflections.filter(e => e.definitions.length > 0);
+ }
+
+ getDeinflections(text) {
+ const deinflections = [];
+ const deinflectionsKeys = {};
- let deinflections = [];
for (let i = text.length; i > 0; --i) {
const textSlice = text.slice(0, i);
- deinflections.push(...await this.deinflector.deinflect(textSlice, definer));
+ Translator.addUniqueDeinflections(this.deinflector.deinflect(textSlice), deinflections, deinflectionsKeys);
}
return deinflections;
}
+ getDeinflections2(text, text2) {
+ const deinflections = [];
+ const deinflectionsKeys = {};
+
+ for (let i = text.length; i > 0; --i) {
+ const textSlice = text.slice(0, i);
+ const text2Slice = text2.slice(0, i);
+ Translator.addUniqueDeinflections(this.deinflector.deinflect(textSlice), deinflections, deinflectionsKeys);
+ if (textSlice !== text2Slice) {
+ Translator.addUniqueDeinflections(this.deinflector.deinflect(text2Slice), deinflections, deinflectionsKeys);
+ }
+ }
+
+ return deinflections;
+ }
+
+ static addUniqueDeinflections(newValues, deinflections, deinflectionsKeys) {
+ for (const value of newValues) {
+ const key = value.term;
+ if (!deinflectionsKeys.hasOwnProperty(key)) {
+ deinflections.push(value);
+ deinflectionsKeys[key] = true;
+ }
+ }
+ }
+
async findKanji(text, dictionaries) {
let definitions = [];
const processed = {};
@@ -272,24 +298,51 @@ class Translator {
return definitions;
}
- async buildTermFrequencies(definition, titles) {
- let terms = [];
- if (definition.expressions) {
- terms.push(...definition.expressions);
- } else {
- terms.push(definition);
+ async buildTermFrequencies(definitions, titles) {
+ const terms = [];
+ for (const definition of definitions) {
+ if (definition.expressions) {
+ terms.push(...definition.expressions);
+ } else {
+ terms.push(definition);
+ }
+ }
+
+ if (terms.length === 0) {
+ return;
}
- for (const term of terms) {
+ // Create mapping of unique terms
+ const expressionsUnique = [];
+ const termsUnique = [];
+ const termsUniqueMap = {};
+ for (let i = 0, ii = terms.length; i < ii; ++i) {
+ const term = terms[i];
+ const expression = term.expression;
term.frequencies = [];
- for (const meta of await this.database.findTermMeta(term.expression, titles)) {
- if (meta.mode === 'freq') {
- term.frequencies.push({
- expression: meta.expression,
- frequency: meta.data,
- dictionary: meta.dictionary
- });
- }
+
+ if (termsUniqueMap.hasOwnProperty(expression)) {
+ termsUniqueMap[expression].push(term);
+ } else {
+ const termList = [term];
+ expressionsUnique.push(expression);
+ termsUnique.push(termList);
+ termsUniqueMap[expression] = termList;
+ }
+ }
+
+ const metas = await this.database.findTermMetaBulk(expressionsUnique, titles);
+ for (const meta of metas) {
+ if (meta.mode !== 'freq') {
+ continue;
+ }
+
+ for (const term of termsUnique[meta.index]) {
+ term.frequencies.push({
+ expression: meta.expression,
+ frequency: meta.data,
+ dictionary: meta.dictionary
+ });
}
}
}