aboutsummaryrefslogtreecommitdiff
path: root/ext/bg
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-10-04 18:10:10 -0400
committerGitHub <noreply@github.com>2020-10-04 18:10:10 -0400
commit561e36e88dab984d6f071ea888cc2b92039a86f0 (patch)
tree289fbf4541cdb543ea14b5366f3c722376ebc520 /ext/bg
parent7e31dcca3960a269be537196971f06112b6a6be9 (diff)
Translator definition source term (and other info) (#881)
* Add sourceTerm field to 'term' and 'termGrouped' definitions * Fix comparison of expressions which are always the same * Rename/restructure term info map * Add source term * Add sourceTerm to expressions array * Reuse furiganaSegments * Add helper function _createExpressionDetailsListFromTermInfoMap * Add expressions array to termMergedByGlossary * Add expressions to definition types 'term' and 'termGrouped' * Rename expressionDetails* to termDetails* * Correct the source/rawSource/sourceTerm for related sequenced definitions * Simplify structure of sequencedDefinitions * Remove TODO
Diffstat (limited to 'ext/bg')
-rw-r--r--ext/bg/js/translator.js132
1 files changed, 74 insertions, 58 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 3dbae411..4b66dd9c 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -148,24 +148,17 @@ class Translator {
for (const definition of definitions) {
const {sequence, dictionary} = definition;
if (mainDictionary === dictionary && sequence >= 0) {
- const {score} = definition;
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
if (typeof sequencedDefinition === 'undefined') {
- const {reasons, source, rawSource} = definition;
sequencedDefinition = {
- reasons,
- score,
- source,
- rawSource,
- dictionary,
- definitions: []
+ sourceDefinitions: [],
+ relatedDefinitions: []
};
sequencedDefinitionMap.set(sequence, sequencedDefinition);
sequencedDefinitions.push(sequencedDefinition);
sequenceList.push(sequence);
- } else {
- sequencedDefinition.score = Math.max(sequencedDefinition.score, score);
}
+ sequencedDefinition.sourceDefinitions.push(definition);
} else {
unsequencedDefinitions.push(definition);
}
@@ -174,9 +167,10 @@ class Translator {
if (sequenceList.length > 0) {
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
for (const databaseDefinition of databaseDefinitions) {
- const {definitions: definitions2, source, rawSource, reasons} = sequencedDefinitions[databaseDefinition.index];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
- definitions2.push(definition);
+ const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index];
+ const {expression} = databaseDefinition;
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap);
+ relatedDefinitions.push(definition);
}
}
@@ -203,26 +197,27 @@ class Translator {
const definitions = [];
for (const databaseDefinition of databaseDefinitions) {
const source = expressionList[databaseDefinition.index];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, [], secondarySearchDictionaryMap);
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap);
definitions.push(definition);
}
return definitions;
}
- async _getMergedDefinition(sequencedDefinition, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
- const {reasons, score, source, rawSource, dictionary, definitions} = sequencedDefinition;
- const definitionDetailsMap = new Map();
+ async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
+ const {reasons, source, rawSource, dictionary} = sourceDefinitions[0];
+ const score = this._getMaxDefinitionScore(sourceDefinitions);
+ const termInfoMap = new Map();
const glossaryDefinitions = [];
const glossaryDefinitionGroupMap = new Map();
- this._mergeByGlossary(definitions, glossaryDefinitionGroupMap);
- this._addDefinitionDetails(definitions, definitionDetailsMap);
+ this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
+ this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
- let secondaryDefinitions = await this._getMergedSecondarySearchResults(definitionDetailsMap, secondarySearchDictionaryMap);
+ let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
- this._removeUsedDefinitions(secondaryDefinitions, definitionDetailsMap, usedDefinitions);
+ this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
this._removeDuplicateDefinitions(secondaryDefinitions);
this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
@@ -249,14 +244,7 @@ class Translator {
this._sortDefinitions(glossaryDefinitions, true);
- const expressionDetailsList = [];
- for (const [expression, readingMap] of definitionDetailsMap.entries()) {
- for (const [reading, termTagsMap] of readingMap.entries()) {
- const termTags = [...termTagsMap.values()];
- this._sortTags(termTags);
- expressionDetailsList.push(this._createExpressionDetails(expression, reading, termTags));
- }
- }
+ const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
return this._createMergedTermDefinition(
source,
@@ -264,18 +252,18 @@ class Translator {
glossaryDefinitions,
[...allExpressions],
[...allReadings],
- expressionDetailsList,
+ termDetailsList,
reasons,
dictionary,
score
);
}
- _removeUsedDefinitions(definitions, definitionDetailsMap, usedDefinitions) {
+ _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
for (let i = 0, ii = definitions.length; i < ii; ++i) {
const definition = definitions[i];
const {expression, reading} = definition;
- const expressionMap = definitionDetailsMap.get(expression);
+ const expressionMap = termInfoMap.get(expression);
if (
typeof expressionMap !== 'undefined' &&
typeof expressionMap.get(reading) !== 'undefined'
@@ -333,9 +321,10 @@ class Translator {
const definitionsMerged = [];
const usedDefinitions = new Set();
- for (const sequencedDefinition of sequencedDefinitions) {
+ for (const {sourceDefinitions, relatedDefinitions} of sequencedDefinitions) {
const result = await this._getMergedDefinition(
- sequencedDefinition,
+ sourceDefinitions,
+ relatedDefinitions,
unsequencedDefinitions,
secondarySearchDictionaryMap,
usedDefinitions
@@ -345,15 +334,15 @@ class Translator {
const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
- const {reasons, score, expression, reading, source, rawSource, dictionary, termTags} = groupedDefinition;
- const expressionDetails = this._createExpressionDetails(expression, reading, termTags);
+ const {reasons, score, expression, reading, source, rawSource, sourceTerm, dictionary, furiganaSegments, termTags} = groupedDefinition;
+ const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
const compatibilityDefinition = this._createMergedTermDefinition(
source,
rawSource,
definitions,
[expression],
[reading],
- [expressionDetails],
+ termDetailsList,
reasons,
dictionary,
score
@@ -403,11 +392,11 @@ class Translator {
let maxLength = 0;
const definitions = [];
- for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
+ for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) {
if (databaseDefinitions.length === 0) { continue; }
maxLength = Math.max(maxLength, rawSource.length);
for (const databaseDefinition of databaseDefinitions) {
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, enabledDictionaryMap);
definitions.push(definition);
}
}
@@ -773,7 +762,7 @@ class Translator {
}
let removeIndex = i;
- if (definition.expression.length > existing[1].expression.length) {
+ if (definition.source.length > existing[1].source.length) {
definitionGroups.set(id, [i, definition]);
removeIndex = existing[0];
}
@@ -877,20 +866,25 @@ class Translator {
}
}
- _addDefinitionDetails(definitions, definitionDetailsMap) {
- for (const {expression, reading, termTags} of definitions) {
- let readingMap = definitionDetailsMap.get(expression);
+ _addUniqueTermInfos(definitions, termInfoMap) {
+ for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
+ let readingMap = termInfoMap.get(expression);
if (typeof readingMap === 'undefined') {
readingMap = new Map();
- definitionDetailsMap.set(expression, readingMap);
+ termInfoMap.set(expression, readingMap);
}
- let termTagsMap = readingMap.get(reading);
- if (typeof termTagsMap === 'undefined') {
- termTagsMap = new Map();
- readingMap.set(reading, termTagsMap);
+ let termInfo = readingMap.get(reading);
+ if (typeof termInfo === 'undefined') {
+ termInfo = {
+ sourceTerm,
+ furiganaSegments,
+ termTagsMap: new Map()
+ };
+ readingMap.set(reading, termInfo);
}
+ const {termTagsMap} = termInfo;
for (const tag of termTags) {
const {name} = tag;
if (termTagsMap.has(name)) { continue; }
@@ -973,7 +967,7 @@ class Translator {
};
}
- async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap) {
+ async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, reasons, enabledDictionaryMap) {
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
const termTagsExpanded = await this._expandTags(termTags, dictionary);
@@ -984,12 +978,14 @@ class Translator {
this._sortTags(termTagsExpanded);
const furiganaSegments = jp.distributeFurigana(expression, reading);
+ const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
return {
type: 'term',
id,
source,
rawSource,
+ sourceTerm,
reasons,
score,
sequence,
@@ -997,7 +993,7 @@ class Translator {
dictionaryPriority,
expression,
reading,
- // expressions
+ expressions: termDetailsList,
furiganaSegments,
glossary,
definitionTags: definitionTagsExpanded,
@@ -1010,14 +1006,16 @@ class Translator {
}
_createGroupedTermDefinition(definitions) {
- const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource} = definitions[0];
+ const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource, sourceTerm} = definitions[0];
const score = this._getMaxDefinitionScore(definitions);
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
+ const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
return {
type: 'termGrouped',
// id
source,
rawSource,
+ sourceTerm,
reasons: [...reasons],
score,
// sequence
@@ -1025,7 +1023,7 @@ class Translator {
dictionaryPriority,
expression,
reading,
- // expressions
+ expressions: termDetailsList,
furiganaSegments, // Contains duplicate data
// glossary
// definitionTags
@@ -1037,13 +1035,14 @@ class Translator {
};
}
- _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, expressionDetailsList, reasons, dictionary, score) {
+ _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, dictionary, score) {
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
return {
type: 'termMerged',
// id
source,
rawSource,
+ // sourceTerm
reasons,
score,
// sequence
@@ -1051,7 +1050,7 @@ class Translator {
dictionaryPriority,
expression: expressions,
reading: readings,
- expressions: expressionDetailsList,
+ expressions: termDetailsList,
// furiganaSegments
// glossary
// definitionTags
@@ -1072,6 +1071,10 @@ class Translator {
only.push(...getSetIntersection(readings, allReadings));
}
+ const termInfoMap = new Map();
+ this._addUniqueTermInfos(definitions, termInfoMap);
+ const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
+
const definitionTags = this._getUniqueDefinitionTags(definitions);
this._sortTags(definitionTags);
@@ -1083,6 +1086,7 @@ class Translator {
// id
source,
rawSource,
+ // sourceTerm
reasons: [],
score,
// sequence
@@ -1090,7 +1094,7 @@ class Translator {
dictionaryPriority,
expression: [...expressions],
reading: [...readings],
- // expressions
+ expressions: termDetailsList,
// furiganaSegments
glossary: [...glossary],
definitionTags,
@@ -1102,13 +1106,25 @@ class Translator {
};
}
- _createExpressionDetails(expression, reading, termTags) {
+ _createTermDetailsListFromTermInfoMap(termInfoMap) {
+ const termDetailsList = [];
+ for (const [expression, readingMap] of termInfoMap.entries()) {
+ for (const [reading, {termTagsMap, sourceTerm, furiganaSegments}] of readingMap.entries()) {
+ const termTags = [...termTagsMap.values()];
+ this._sortTags(termTags);
+ termDetailsList.push(this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags));
+ }
+ }
+ return termDetailsList;
+ }
+
+ _createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags) {
const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
- const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
+ sourceTerm,
expression,
reading,
- furiganaSegments,
+ furiganaSegments, // Contains duplicate data
termTags,
termFrequency,
frequencies: [],