diff options
author | Alex Yatskov <alex@foosoft.net> | 2019-10-05 10:14:07 -0700 |
---|---|---|
committer | Alex Yatskov <alex@foosoft.net> | 2019-10-05 10:14:07 -0700 |
commit | 7f5f3951f057b84864f7f37825804f9049ddab10 (patch) | |
tree | 1a724bbd4d8cf2b053f3e2b99b46ba41008508b8 /ext/bg/js/translator.js | |
parent | f4b6527ed6ed1f0f4f5a63b94766b20f3b90e6ec (diff) | |
parent | 46ab36180f486a19332c538401b4db12ffe1bda1 (diff) |
Merge branch 'master' into testing
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r-- | ext/bg/js/translator.js | 179 |
1 files changed, 127 insertions, 52 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 7b952622..65d746ea 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -41,9 +41,7 @@ class Translator { const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const definitionsGrouped = dictTermsGroup(definitions, dictionaries); - for (const definition of definitionsGrouped) { - await this.buildTermFrequencies(definition, titles); - } + await this.buildTermFrequencies(definitionsGrouped, titles); if (options.general.compactTags) { for (const definition of definitionsGrouped) { @@ -147,9 +145,7 @@ class Translator { definitionsMerged.push(groupedDefinition); } - for (const definition of definitionsMerged) { - await this.buildTermFrequencies(definition, titles); - } + await this.buildTermFrequencies(definitionsMerged, titles); if (options.general.compactTags) { for (const definition of definitionsMerged) { @@ -164,9 +160,7 @@ class Translator { const titles = Object.keys(dictionaries); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); - for (const definition of definitions) { - await this.buildTermFrequencies(definition, titles); - } + await this.buildTermFrequencies(definitions, titles); return {length, definitions}; } @@ -179,13 +173,9 @@ class Translator { } } - const cache = {}; - const titles = Object.keys(dictionaries); - let deinflections = await this.findTermDeinflections(text, titles, cache); const textHiragana = jpKatakanaToHiragana(text); - if (text !== textHiragana) { - deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache)); - } + const titles = Object.keys(dictionaries); + const deinflections = await this.findTermDeinflections(text, textHiragana, titles); let definitions = []; for (const deinflection of deinflections) { @@ -221,19 +211,77 @@ class Translator { return {length, definitions}; } - async findTermDeinflections(text, titles, cache) { - const definer = async term => { - if (cache.hasOwnProperty(term)) { - return cache[term]; + async findTermDeinflections(text, text2, titles) { + const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2)); + + if (deinflections.length === 0) { + return []; + } + + const uniqueDeinflectionTerms = []; + const uniqueDeinflectionArrays = []; + const uniqueDeinflectionsMap = {}; + for (const deinflection of deinflections) { + const term = deinflection.term; + let deinflectionArray; + if (uniqueDeinflectionsMap.hasOwnProperty(term)) { + deinflectionArray = uniqueDeinflectionsMap[term]; } else { - return cache[term] = await this.database.findTerms(term, titles); + deinflectionArray = []; + uniqueDeinflectionTerms.push(term); + uniqueDeinflectionArrays.push(deinflectionArray); + uniqueDeinflectionsMap[term] = deinflectionArray; } - }; + deinflectionArray.push(deinflection); + } + + const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles); + + for (const definition of definitions) { + for (const deinflection of uniqueDeinflectionArrays[definition.index]) { + if (Translator.definitionContainsAnyRule(definition, deinflection.rules)) { + deinflection.definitions.push(definition); + } + } + } + + return deinflections.filter(e => e.definitions.length > 0); + } + + static definitionContainsAnyRule(definition, rules) { + if (rules.length === 0) { + return true; + } + const definitionRules = definition.rules; + for (const rule of rules) { + if (definitionRules.includes(rule)) { + return true; + } + } + return false; + } + + getDeinflections(text) { + const deinflections = []; + + for (let i = text.length; i > 0; --i) { + const textSlice = text.slice(0, i); + deinflections.push(...this.deinflector.deinflect(textSlice)); + } + + return deinflections; + } + + getDeinflections2(text, text2) { + const deinflections = []; - let deinflections = []; for (let i = text.length; i > 0; --i) { const textSlice = text.slice(0, i); - deinflections.push(...await this.deinflector.deinflect(textSlice, definer)); + const text2Slice = text2.slice(0, i); + deinflections.push(...this.deinflector.deinflect(textSlice)); + if (textSlice !== text2Slice) { + deinflections.push(...this.deinflector.deinflect(text2Slice)); + } } return deinflections; @@ -272,24 +320,51 @@ class Translator { return definitions; } - async buildTermFrequencies(definition, titles) { - let terms = []; - if (definition.expressions) { - terms.push(...definition.expressions); - } else { - terms.push(definition); + async buildTermFrequencies(definitions, titles) { + const terms = []; + for (const definition of definitions) { + if (definition.expressions) { + terms.push(...definition.expressions); + } else { + terms.push(definition); + } + } + + if (terms.length === 0) { + return; } - for (const term of terms) { + // Create mapping of unique terms + const expressionsUnique = []; + const termsUnique = []; + const termsUniqueMap = {}; + for (let i = 0, ii = terms.length; i < ii; ++i) { + const term = terms[i]; + const expression = term.expression; term.frequencies = []; - for (const meta of await this.database.findTermMeta(term.expression, titles)) { - if (meta.mode === 'freq') { - term.frequencies.push({ - expression: meta.expression, - frequency: meta.data, - dictionary: meta.dictionary - }); - } + + if (termsUniqueMap.hasOwnProperty(expression)) { + termsUniqueMap[expression].push(term); + } else { + const termList = [term]; + expressionsUnique.push(expression); + termsUnique.push(termList); + termsUniqueMap[expression] = termList; + } + } + + const metas = await this.database.findTermMetaBulk(expressionsUnique, titles); + for (const meta of metas) { + if (meta.mode !== 'freq') { + continue; + } + + for (const term of termsUnique[meta.index]) { + term.frequencies.push({ + expression: meta.expression, + frequency: meta.data, + dictionary: meta.dictionary + }); } } } @@ -298,15 +373,13 @@ class Translator { const tags = []; for (const name of names) { const base = Translator.getNameBase(name); - const meta = await this.database.findTagForTitle(base, title); - - const tag = {name}; - for (const prop in meta || {}) { - if (prop !== 'name') { - tag[prop] = meta[prop]; - } + let meta = this.database.findTagForTitleCached(base, title); + if (typeof meta === 'undefined') { + meta = await this.database.findTagForTitle(base, title); } + const tag = Object.assign({}, meta !== null ? meta : {}, {name}); + tags.push(dictTagSanitize(tag)); } @@ -317,16 +390,18 @@ class Translator { const stats = {}; for (const name in items) { const base = Translator.getNameBase(name); - const meta = await this.database.findTagForTitle(base, title); - const group = stats[meta.category] = stats[meta.category] || []; - - const stat = {name, value: items[name]}; - for (const prop in meta || {}) { - if (prop !== 'name') { - stat[prop] = meta[prop]; + let meta = this.database.findTagForTitleCached(base, title); + if (typeof meta === 'undefined') { + meta = await this.database.findTagForTitle(base, title); + if (meta === null) { + continue; } } + const group = stats[meta.category] = stats[meta.category] || []; + + const stat = Object.assign({}, meta, {name, value: items[name]}); + group.push(dictTagSanitize(stat)); } |