summaryrefslogtreecommitdiff
path: root/ext/bg/js/translator.js
diff options
context:
space:
mode:
authorsiikamiika <siikamiika@users.noreply.github.com>2017-10-03 07:20:02 +0300
committersiikamiika <siikamiika@users.noreply.github.com>2017-10-03 07:20:02 +0300
commit69ad4a7c9b1f859733909a75534e2005a9f56178 (patch)
tree93cec595a2e8183a95fc362b5551e2e68cefc9d8 /ext/bg/js/translator.js
parent3b664dd908b94e1306a211e7c8b9cde74694c018 (diff)
merged mode: implement missing stuff, refactoring
- use correct tags - indicate popular and rare terms - indicate definitions restricted to specific terms - frequencies (Innocent Corpus)
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r--ext/bg/js/translator.js137
1 files changed, 74 insertions, 63 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index db287d63..81253374 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -49,77 +49,79 @@ class Translator {
}
async findTermsMerged(text, dictionaries, alphanumeric) {
- // const titles = Object.keys(dictionaries);
+ const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
- // const definitionsMerged = dictTermsMerge(definitions, dictionaries, this.database);
- // for (const definition of definitionsMerged) {
- // await this.buildTermFrequencies(definition, titles);
- // }
+ const definitionsBySequence = dictTermsMergeBySequence(definitions);
- const sequences = {};
- const stray = [];
- for (const definition of definitions) {
- if (typeof definition.sequence !== 'undefined') {
- if (!sequences[definition.sequence]) {
- sequences[definition.sequence] = {
- reasons: definition.reasons,
- score: Number.MIN_SAFE_INTEGER,
- expression: new Set(),
- reading: new Set(),
- source: definition.source,
- definitions: []
- };
- }
- const seq = sequences[definition.sequence];
- seq.score = Math.max(seq.score, definition.score);
- } else {
- stray.push(definition);
+ const definitionsMerged = dictTermsGroup(definitionsBySequence['-1'], dictionaries);
+ for (const sequence in definitionsBySequence) {
+ if (!(sequence > 0)) {
+ continue;
}
- }
- const definitionsMerged = dictTermsGroup(stray, dictionaries);
- for (const sequence in sequences) {
- const entry = await this.database.findEntry(Number(sequence));
-
- const result = sequences[sequence];
- const glossaries = new Map();
- for (const definition of entry) {
-
- const gloss = definition.glossary.join('||');
- if (!glossaries.get(gloss)) {
- const tags = await this.expandTags(definition.tags, definition.dictionary);
- tags.push(dictTagBuildSource(definition.dictionary));
- glossaries.set(gloss, {
- expressions: new Set(),
- readings: new Set(),
- tags: dictTagsSort(tags), // TODO: use correct tags
- source: result.source,
- reasons: [],
- score: definition.score,
- id: definition.id,
- dictionary: definition.dictionary
- });
+ const result = definitionsBySequence[sequence];
+
+ const rawDefinitionsBySequence = await this.database.findTermsBySequence(Number(sequence));
+ const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
+
+ // postprocess glossaries
+ for (const gloss in definitionsByGloss) {
+ const definition = definitionsByGloss[gloss];
+ definition.glossary = JSON.parse(gloss);
+
+ const tags = await this.expandTags(definition.tags, definition.dictionary);
+ tags.push(dictTagBuildSource(definition.dictionary));
+ definition.tags = dictTagsSort(tags);
+
+ definition.only = [];
+ if (!utilSetEqual(definition.expression, result.expression)) {
+ for (const expression of utilSetIntersection(definition.expression, result.expression)) {
+ definition.only.push(expression);
+ }
+ }
+ if (!utilSetEqual(definition.reading, result.reading)) {
+ for (const reading of utilSetIntersection(definition.reading, result.reading)) {
+ definition.only.push(reading);
+ }
}
- glossaries.get(gloss).expressions.add(definition.expression);
- glossaries.get(gloss).readings.add(definition.reading);
- result.expression.add(definition.expression);
- result.reading.add(definition.reading);
+ result.definitions.push(definition);
}
- for (const gloss of glossaries.keys()) {
- const definition = glossaries.get(gloss);
- definition.glossary = gloss.split('||');
- result.definitions.push(definition);
+ result.definitions.sort(definition => -definition.id);
+
+ // turn the Map()/Set() mess to [{expression: E1, reading: R1}, {...}] and tag popular/normal/rare instead of actual tags
+ const expressions = [];
+ for (const expression of result.expressions.keys()) {
+ for (const reading of result.expressions.get(expression).keys()) {
+ expressions.push({
+ expression: expression,
+ reading: reading,
+ jmdictTermFrequency: (tags => {
+ if (tags.has('P')) {
+ return 'popular';
+ } else if (dictJmdictTermTagsRare(tags)) {
+ return 'rare';
+ } else {
+ return 'normal';
+ }
+ })(result.expressions.get(expression).get(reading))
+ });
+ }
}
- //dictTermsSort(groupDefs, dictionaries)
+
+ result.expressions = expressions;
result.expression = Array.from(result.expression).join(', ');
result.reading = Array.from(result.reading).join(', ');
definitionsMerged.push(result);
}
+ for (const definition of definitionsMerged) {
+ await this.buildTermFrequencies(definition, titles);
+ }
+
return {length, definitions: dictTermsSort(definitionsMerged)};
}
@@ -234,14 +236,23 @@ class Translator {
}
async buildTermFrequencies(definition, titles) {
- definition.frequencies = [];
- for (const meta of await this.database.findTermMeta(definition.expression, titles)) {
- if (meta.mode === 'freq') {
- definition.frequencies.push({
- expression: meta.expression,
- frequency: meta.data,
- dictionary: meta.dictionary
- });
+ let terms = [];
+ if (definition.expressions) {
+ terms = terms.concat(definition.expressions);
+ } else {
+ terms.push(definition);
+ }
+
+ for (const term of terms) {
+ term.frequencies = [];
+ for (const meta of await this.database.findTermMeta(term.expression, titles)) {
+ if (meta.mode === 'freq') {
+ term.frequencies.push({
+ expression: meta.expression,
+ frequency: meta.data,
+ dictionary: meta.dictionary
+ });
+ }
}
}
}