diff options
author | Alex Yatskov <alex@foosoft.net> | 2016-12-23 15:44:16 -0800 |
---|---|---|
committer | Alex Yatskov <alex@foosoft.net> | 2016-12-23 15:44:16 -0800 |
commit | 3047f8c4fce60fb392051572928bac04dfe10c08 (patch) | |
tree | 8e2c29b3deb9f2463890c1a07cff42d23d8327b8 /ext/bg/js | |
parent | 39fa11f72bae62985ee5b27103e5959dab30316c (diff) |
fixing various deinflection issues
Diffstat (limited to 'ext/bg/js')
-rw-r--r-- | ext/bg/js/translator.js | 81 | ||||
-rw-r--r-- | ext/bg/js/util.js | 17 |
2 files changed, 51 insertions, 47 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index f29b90c9..dc374a5e 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -43,34 +43,40 @@ class Translator { findTerm(text, dictionaries, enableSoftKatakanaSearch) { const cache = {}; - return this.findDeinflectionGroups(text, dictionaries, cache).then(groups => { + return this.findTermDeinflections(text, dictionaries, cache).then(deinfHiragana => { const textHiragana = wanakana._katakanaToHiragana(text); if (text !== textHiragana && enableSoftKatakanaSearch) { - return this.findDeinflectionGroups(textHiragana, dictionaries, cache).then(groupsHiragana => { - for (const key in groupsHiragana) { - groups[key] = groups[key] || groupsHiragana[key]; - } - - return groups; - }); + return this.findTermDeinflections(textHiragana, dictionaries, cache).then(deinfHiragana => deinfHiragana.concat(deinfHiragana)); } else { - return groups; + return deinfHiragana; } - }).then(groups => { - const definitions = []; - for (const key in groups) { - definitions.push(groups[key]); + }).then(deinflections => { + let definitions = []; + for (const deinflection of deinflections) { + for (const definition of deinflection.definitions) { + definitions.push({ + source: deinflection.source, + reasons: deinflection.reasons, + score: definition.score, + id: definition.id, + dictionary: definition.dictionary, + expression: definition.expression, + reading: definition.reading, + glossary: definition.glossary, + tags: sortTags(definition.tags.map(tag => buildTag(tag, definition.tagMeta))) + }); + } } + definitions = undupeTermDefs(definitions); + definitions = sortTermDefs(definitions); + let length = 0; - for (const result of definitions) { - length = Math.max(length, result.source.length); + for (const definition of definitions) { + length = Math.max(length, definition.source.length); } - return { - length, - definitions: sortTermDefs(definitions) - }; + return {length, definitions}; }); } @@ -86,7 +92,7 @@ class Translator { return Promise.all(promises).then(sets => this.processKanji(sets.reduce((a, b) => a.concat(b), []))); } - findDeinflectionGroups(text, dictionaries, cache) { + findTermDeinflections(text, dictionaries, cache) { const definer = term => { if (cache.hasOwnProperty(term)) { return Promise.resolve(cache[term]); @@ -95,38 +101,19 @@ class Translator { return this.database.findTerm(term, dictionaries).then(definitions => cache[term] = definitions); }; - const groups = {}, promises = []; + const promises = []; for (let i = text.length; i > 0; --i) { - promises.push( - this.deinflector.deinflect(text.slice(0, i), definer).then(deinflections => { - for (const deinflection of deinflections) { - this.processDeinflection(groups, deinflection); - } - }) - ); + promises.push(this.deinflector.deinflect(text.slice(0, i), definer)); } - return Promise.all(promises).then(() => groups); - } - - processDeinflection(groups, {source, rules, reasons, definitions}, dictionaries) { - for (const definition of definitions) { - if (definition.id in groups) { - continue; + return Promise.all(promises).then(results => { + let deinflections = []; + for (const result of results) { + deinflections = deinflections.concat(result); } - const tags = definition.tags.map(tag => buildTag(tag, definition.tagMeta)); - groups[definition.id] = { - source, - reasons, - score: definition.score, - dictionary: definition.dictionary, - expression: definition.expression, - reading: definition.reading, - glossary: definition.glossary, - tags: sortTags(tags) - }; - } + return deinflections; + }); } processKanji(definitions) { diff --git a/ext/bg/js/util.js b/ext/bg/js/util.js index a0fca270..a37e223c 100644 --- a/ext/bg/js/util.js +++ b/ext/bg/js/util.js @@ -96,6 +96,23 @@ function sortTermDefs(definitions) { }); } +function undupeTermDefs(definitions) { + const definitionGroups = {}; + for (const definition of definitions) { + const definitionExisting = definitionGroups[definition.id]; + if (!definitionGroups.hasOwnProperty(definition.id) || definition.expression.length > definitionExisting.expression.length) { + definitionGroups[definition.id] = definition; + } + } + + const definitionsUnique = []; + for (const key in definitionGroups) { + definitionsUnique.push(definitionGroups[key]); + } + + return definitionsUnique; +} + function buildTag(name, meta) { const tag = {name}; const symbol = name.split(':')[0]; |