diff options
author | siikamiika <siikamiika@users.noreply.github.com> | 2017-10-03 07:20:02 +0300 |
---|---|---|
committer | siikamiika <siikamiika@users.noreply.github.com> | 2017-10-03 07:20:02 +0300 |
commit | 69ad4a7c9b1f859733909a75534e2005a9f56178 (patch) | |
tree | 93cec595a2e8183a95fc362b5551e2e68cefc9d8 /ext/bg/js/dictionary.js | |
parent | 3b664dd908b94e1306a211e7c8b9cde74694c018 (diff) |
merged mode: implement missing stuff, refactoring
- use correct tags
- indicate popular and rare terms
- indicate definitions restricted to specific terms
- frequencies (Innocent Corpus)
Diffstat (limited to 'ext/bg/js/dictionary.js')
-rw-r--r-- | ext/bg/js/dictionary.js | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/ext/bg/js/dictionary.js b/ext/bg/js/dictionary.js index f3f573d3..2b289a23 100644 --- a/ext/bg/js/dictionary.js +++ b/ext/bg/js/dictionary.js @@ -144,6 +144,77 @@ function dictTermsGroup(definitions, dictionaries) { return dictTermsSort(results); } +function dictTermsMergeBySequence(definitions) { + const definitionsBySequence = {'-1': []}; + for (const definition of definitions) { + if (definition.sequence > 0) { + if (!definitionsBySequence[definition.sequence]) { + definitionsBySequence[definition.sequence] = { + reasons: definition.reasons, + score: Number.MIN_SAFE_INTEGER, + expression: new Set(), + reading: new Set(), + expressions: new Map(), + source: definition.source, + dictionary: definition.dictionary, + definitions: [] + }; + } + const score = Math.max(definitionsBySequence[definition.sequence].score, definition.score); + definitionsBySequence[definition.sequence].score = score; + } else { + definitionsBySequence['-1'].push(definition); + } + } + + return definitionsBySequence; +} + +function dictTermsMergeByGloss(result, definitions) { + const definitionsByGloss = {}; + for (const definition of definitions) { + + const gloss = JSON.stringify(definition.glossary); + if (!definitionsByGloss[gloss]) { + definitionsByGloss[gloss] = { + expression: new Set(), + reading: new Set(), + tags: new Set(), + source: result.source, + reasons: [], + score: definition.score, + id: definition.id, + dictionary: definition.dictionary + }; + } + + definitionsByGloss[gloss].expression.add(definition.expression); + definitionsByGloss[gloss].reading.add(definition.reading); + + result.expression.add(definition.expression); + result.reading.add(definition.reading); + + // result->expressions[ Expression1[ Reading1[ Tag1, Tag2 ] ], Expression2, ... ] + if (!result.expressions.has(definition.expression)) { + result.expressions.set(definition.expression, new Map()); + } + if (!result.expressions.get(definition.expression).has(definition.reading)) { + result.expressions.get(definition.expression).set(definition.reading, new Set()); + } + + for (const tag of definition.tags) { + if (dictIsJmdictTermTag(tag)) { + // TODO: expand tags + result.expressions.get(definition.expression).get(definition.reading).add(tag); + } else { + definitionsByGloss[gloss].tags.add(tag); + } + } + } + + return definitionsByGloss; +} + function dictTagBuildSource(name) { return dictTagSanitize({name, category: 'dictionary', order: 100}); } @@ -178,6 +249,45 @@ function dictTagsSort(tags) { }); } +function dictIsJmdictTermTag(tag) { + return [ + 'P', + 'news', + 'ichi', + 'spec', + 'gai', + 'ik', + 'iK', + 'ok', + 'oK', + 'ek', + 'eK', + 'io', + 'oik', + 'ateji', + 'gikun' + ].includes(tag); +} + +function dictJmdictTermTagsRare(tags) { + const rareTags = [ + 'ik', + 'iK', + 'ok', + 'oK', + 'ek', + 'eK', + 'io', + 'oik' + ]; + for (const tag of tags) { + if (rareTags.includes(tag)) { + return true; + } + } + return false; +} + function dictFieldSplit(field) { return field.length === 0 ? [] : field.split(' '); } |