diff options
author | Alex Yatskov <alex@foosoft.net> | 2019-10-20 11:23:20 -0700 |
---|---|---|
committer | Alex Yatskov <alex@foosoft.net> | 2019-10-20 11:23:20 -0700 |
commit | 438498435227cfa59cf9ed3430045b288cd2a7c0 (patch) | |
tree | 6a05520e5d6fa8d26d372673a9ed3e5d2da7e3fd /ext/bg/js/translator.js | |
parent | 06d7713189be9eb51669d3842b78278371e6cfa4 (diff) | |
parent | d32fd1381b6cd5141a21c22f9ef639b2fe9774fb (diff) |
Merge branch 'master' into testing
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r-- | ext/bg/js/translator.js | 306 |
1 files changed, 185 insertions, 121 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 601ee30c..ee012d96 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -21,6 +21,7 @@ class Translator { constructor() { this.database = null; this.deinflector = null; + this.tagCache = {}; } async prepare() { @@ -36,6 +37,11 @@ class Translator { } } + async purgeDatabase() { + this.tagCache = {}; + await this.database.purge(); + } + async findTermsGrouped(text, dictionaries, alphanumeric, options) { const titles = Object.keys(dictionaries); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); @@ -52,94 +58,121 @@ class Translator { return {length, definitions: definitionsGrouped}; } - async findTermsMerged(text, dictionaries, alphanumeric, options) { - const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches); - const titles = Object.keys(dictionaries); - const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); + async getSequencedDefinitions(definitions, mainDictionary) { + const definitionsBySequence = dictTermsMergeBySequence(definitions, mainDictionary); + const defaultDefinitions = definitionsBySequence['-1']; - const definitionsBySequence = dictTermsMergeBySequence(definitions, options.general.mainDictionary); + const sequenceList = Object.keys(definitionsBySequence).map(v => Number(v)).filter(v => v >= 0); + const sequencedDefinitions = sequenceList.map((key) => ({ + definitions: definitionsBySequence[key], + rawDefinitions: [] + })); - const definitionsMerged = []; - const mergedByTermIndices = new Set(); - for (const sequence in definitionsBySequence) { - if (sequence < 0) { - continue; - } + for (const definition of await this.database.findTermsBySequenceBulk(sequenceList, mainDictionary)) { + sequencedDefinitions[definition.index].rawDefinitions.push(definition); + } - const result = definitionsBySequence[sequence]; + return {sequencedDefinitions, defaultDefinitions}; + } - const rawDefinitionsBySequence = await this.database.findTermsBySequence(Number(sequence), options.general.mainDictionary); + async getMergedSecondarySearchResults(text, expressionsMap, secondarySearchTitles) { + if (secondarySearchTitles.length === 0) { + return []; + } - for (const definition of rawDefinitionsBySequence) { - const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); - definitionTags.push(dictTagBuildSource(definition.dictionary)); - definition.definitionTags = definitionTags; - const termTags = await this.expandTags(definition.termTags, definition.dictionary); - definition.termTags = termTags; + const expressionList = []; + const readingList = []; + for (const expression of expressionsMap.keys()) { + if (expression === text) { continue; } + for (const reading of expressionsMap.get(expression).keys()) { + expressionList.push(expression); + readingList.push(reading); } + } - const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence); - - const secondarySearchResults = []; - if (secondarySearchTitles.length > 0) { - for (const expression of result.expressions.keys()) { - if (expression === text) { - continue; - } - - for (const reading of result.expressions.get(expression).keys()) { - for (const definition of await this.database.findTermsExact(expression, reading, secondarySearchTitles)) { - const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); - definitionTags.push(dictTagBuildSource(definition.dictionary)); - definition.definitionTags = definitionTags; - const termTags = await this.expandTags(definition.termTags, definition.dictionary); - definition.termTags = termTags; - secondarySearchResults.push(definition); - } - } - } - } + const definitions = await this.database.findTermsExactBulk(expressionList, readingList, secondarySearchTitles); + for (const definition of definitions) { + const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); + definitionTags.push(dictTagBuildSource(definition.dictionary)); + definition.definitionTags = definitionTags; + const termTags = await this.expandTags(definition.termTags, definition.dictionary); + definition.termTags = termTags; + } - dictTermsMergeByGloss(result, definitionsBySequence['-1'].concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices); + if (definitions.length > 1) { + definitions.sort((a, b) => a.index - b.index); + } - for (const gloss in definitionsByGloss) { - const definition = definitionsByGloss[gloss]; - dictTagsSort(definition.definitionTags); - result.definitions.push(definition); - } + return definitions; + } - dictTermsSort(result.definitions, dictionaries); - - const expressions = []; - for (const expression of result.expressions.keys()) { - for (const reading of result.expressions.get(expression).keys()) { - const termTags = result.expressions.get(expression).get(reading); - expressions.push({ - expression: expression, - reading: reading, - termTags: dictTagsSort(termTags), - termFrequency: (score => { - if (score > 0) { - return 'popular'; - } else if (score < 0) { - return 'rare'; - } else { - return 'normal'; - } - })(termTags.map(tag => tag.score).reduce((p, v) => p + v, 0)) - }); - } + async getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchTitles, mergedByTermIndices) { + const result = sequencedDefinition.definitions; + const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions; + + for (const definition of rawDefinitionsBySequence) { + const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); + definitionTags.push(dictTagBuildSource(definition.dictionary)); + definition.definitionTags = definitionTags; + const termTags = await this.expandTags(definition.termTags, definition.dictionary); + definition.termTags = termTags; + } + + const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence); + const secondarySearchResults = await this.getMergedSecondarySearchResults(text, result.expressions, secondarySearchTitles); + + dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices); + + for (const gloss in definitionsByGloss) { + const definition = definitionsByGloss[gloss]; + dictTagsSort(definition.definitionTags); + result.definitions.push(definition); + } + + dictTermsSort(result.definitions, dictionaries); + + const expressions = []; + for (const expression of result.expressions.keys()) { + for (const reading of result.expressions.get(expression).keys()) { + const termTags = result.expressions.get(expression).get(reading); + const score = termTags.map(tag => tag.score).reduce((p, v) => p + v, 0); + expressions.push({ + expression: expression, + reading: reading, + termTags: dictTagsSort(termTags), + termFrequency: Translator.scoreToTermFrequency(score) + }); } + } - result.expressions = expressions; + result.expressions = expressions; + result.expression = Array.from(result.expression); + result.reading = Array.from(result.reading); - result.expression = Array.from(result.expression); - result.reading = Array.from(result.reading); + return result; + } + + async findTermsMerged(text, dictionaries, alphanumeric, options) { + const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches); + const titles = Object.keys(dictionaries); + const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); + const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary); + const definitionsMerged = []; + const mergedByTermIndices = new Set(); + for (const sequencedDefinition of sequencedDefinitions) { + const result = await this.getMergedDefinition( + text, + dictionaries, + sequencedDefinition, + defaultDefinitions, + secondarySearchTitles, + mergedByTermIndices + ); definitionsMerged.push(result); } - const strayDefinitions = definitionsBySequence['-1'].filter((definition, index) => !mergedByTermIndices.has(index)); + const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index)); for (const groupedDefinition of dictTermsGroup(strayDefinitions, dictionaries)) { groupedDefinition.expressions = [{expression: groupedDefinition.expression, reading: groupedDefinition.reading}]; definitionsMerged.push(groupedDefinition); @@ -277,33 +310,44 @@ class Translator { } async findKanji(text, dictionaries) { - let definitions = []; - const processed = {}; const titles = Object.keys(dictionaries); + const kanjiUnique = {}; + const kanjiList = []; for (const c of text) { - if (!processed[c]) { - definitions.push(...await this.database.findKanji(c, titles)); - processed[c] = true; + if (!kanjiUnique.hasOwnProperty(c)) { + kanjiList.push(c); + kanjiUnique[c] = true; } } + const definitions = await this.database.findKanjiBulk(kanjiList, titles); + if (definitions.length === 0) { + return definitions; + } + + if (definitions.length > 1) { + definitions.sort((a, b) => a.index - b.index); + } + + const kanjiList2 = []; for (const definition of definitions) { + kanjiList2.push(definition.character); + const tags = await this.expandTags(definition.tags, definition.dictionary); tags.push(dictTagBuildSource(definition.dictionary)); definition.tags = dictTagsSort(tags); definition.stats = await this.expandStats(definition.stats, definition.dictionary); - definition.frequencies = []; - for (const meta of await this.database.findKanjiMeta(definition.character, titles)) { - if (meta.mode === 'freq') { - definition.frequencies.push({ - character: meta.character, - frequency: meta.data, - dictionary: meta.dictionary - }); - } - } + } + + for (const meta of await this.database.findKanjiMetaBulk(kanjiList2, titles)) { + if (meta.mode !== 'freq') { continue; } + definitions[meta.index].frequencies.push({ + character: meta.character, + frequency: meta.data, + dictionary: meta.dictionary + }); } return definitions; @@ -359,56 +403,76 @@ class Translator { } async expandTags(names, title) { - const tags = []; - for (const name of names) { - const base = Translator.getNameBase(name); - let meta = this.database.findTagForTitleCached(base, title); - if (typeof meta === 'undefined') { - meta = await this.database.findTagForTitle(base, title); - } - - const tag = Object.assign({}, meta !== null ? meta : {}, {name}); - - tags.push(dictTagSanitize(tag)); - } - - return tags; + const tagMetaList = await this.getTagMetaList(names, title); + return tagMetaList.map((meta, index) => { + const name = names[index]; + const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name})); + return dictTagSanitize(tag); + }); } async expandStats(items, title) { - const stats = {}; - for (const name in items) { - const base = Translator.getNameBase(name); - let meta = this.database.findTagForTitleCached(base, title); - if (typeof meta === 'undefined') { - meta = await this.database.findTagForTitle(base, title); - if (meta === null) { - continue; - } - } + const names = Object.keys(items); + const tagMetaList = await this.getTagMetaList(names, title); - const group = stats[meta.category] = stats[meta.category] || []; + const stats = {}; + for (let i = 0; i < names.length; ++i) { + const name = names[i]; + const meta = tagMetaList[i]; + if (meta === null) { continue; } + + const category = meta.category; + const group = ( + stats.hasOwnProperty(category) ? + stats[category] : + (stats[category] = []) + ); const stat = Object.assign({}, meta, {name, value: items[name]}); - group.push(dictTagSanitize(stat)); } + const sortCompare = (a, b) => a.notes - b.notes; for (const category in stats) { - stats[category].sort((a, b) => { - if (a.notes < b.notes) { - return -1; - } else if (a.notes > b.notes) { - return 1; - } else { - return 0; - } - }); + stats[category].sort(sortCompare); } return stats; } + async getTagMetaList(names, title) { + const tagMetaList = []; + const cache = ( + this.tagCache.hasOwnProperty(title) ? + this.tagCache[title] : + (this.tagCache[title] = {}) + ); + + for (const name of names) { + const base = Translator.getNameBase(name); + + if (cache.hasOwnProperty(base)) { + tagMetaList.push(cache[base]); + } else { + const tagMeta = await this.database.findTagForTitle(base, title); + cache[base] = tagMeta; + tagMetaList.push(tagMeta); + } + } + + return tagMetaList; + } + + static scoreToTermFrequency(score) { + if (score > 0) { + return 'popular'; + } else if (score < 0) { + return 'rare'; + } else { + return 'normal'; + } + } + static getNameBase(name) { const pos = name.indexOf(':'); return (pos >= 0 ? name.substr(0, pos) : name); |