summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-03-01 22:17:23 -0500
committerGitHub <noreply@github.com>2021-03-01 22:17:23 -0500
commit0dab38f0a62c0bde4d8c32ec16f9d2a0672b85eb (patch)
tree51b64a85450e3bb59015b222376451719d055cc9
parentb477da97d43883592d420883f20d137d06e8e2cd (diff)
Refactor translator merged mode (#1474)
* Remove sourceDefinitions * Add id * Remove related definitions from unsequencedDefinitions * Add separate _addRelatedDefinitions function * Add secondary definitions * Update how secondary definitions are added * Update expression/reading source * Move _mergeByGlossary body * Refactor _createTermDetailsListFromTermInfoMap * Move _addUniqueTermInfos body * Rename function * Organize * Simplify duplicate check * Rename relatedDefinitionIds to definitionIds * Refactor secondary definition adding * Early exit * Add matching unsequencedDefinitions to secondaryDefinitions * Clean * Fix incorrect condition * Move _addSecondaryDefinitions call * Add comments
-rw-r--r--ext/js/language/translator.js301
1 files changed, 153 insertions, 148 deletions
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index f5885c05..394e5eac 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -178,26 +178,16 @@ class Translator {
async _findTermsMerged(text, options) {
const {mainDictionary, enabledDictionaryMap} = options;
- const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
-
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
const definitionsMerged = [];
- const usedDefinitions = new Set();
-
- for (const {sourceDefinitions, relatedDefinitions} of sequencedDefinitions) {
- const result = await this._getMergedDefinition(
- sourceDefinitions,
- relatedDefinitions,
- unsequencedDefinitions,
- secondarySearchDictionaryMap,
- usedDefinitions
- );
- definitionsMerged.push(result);
+
+ for (const {relatedDefinitions, secondaryDefinitions} of sequencedDefinitions) {
+ const mergedDefinition = this._getMergedDefinition(relatedDefinitions, secondaryDefinitions);
+ definitionsMerged.push(mergedDefinition);
}
- const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
- for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
+ for (const groupedDefinition of this._groupTerms(unsequencedDefinitions, enabledDictionaryMap)) {
const {reasons, score, expression, reading, source, rawSource, sourceTerm, furiganaSegments, termTags, definitions: definitions2} = groupedDefinition;
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
const compatibilityDefinition = this._createMergedTermDefinition(
@@ -240,16 +230,19 @@ class Translator {
let maxLength = 0;
const definitions = [];
+ const definitionIds = new Set();
for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) {
if (databaseDefinitions.length === 0) { continue; }
maxLength = Math.max(maxLength, rawSource.length);
for (const databaseDefinition of databaseDefinitions) {
+ const {id} = databaseDefinition;
+ if (definitionIds.has(id)) { continue; }
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, true, enabledDictionaryMap);
definitions.push(definition);
+ definitionIds.add(id);
}
}
- this._removeDuplicateDefinitions(definitions);
return [definitions, maxLength];
}
@@ -364,104 +357,160 @@ class Translator {
* @param enabledDictionaryMap The map of enabled dictionaries and their settings.
*/
async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
+ const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
const sequenceList = [];
const sequencedDefinitionMap = new Map();
const sequencedDefinitions = [];
- const unsequencedDefinitions = [];
+ const unsequencedDefinitions = new Map();
for (const definition of definitions) {
- const {sequence, dictionary} = definition;
+ const {sequence, dictionary, id} = definition;
if (mainDictionary === dictionary && sequence >= 0) {
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
if (typeof sequencedDefinition === 'undefined') {
sequencedDefinition = {
- sourceDefinitions: [],
relatedDefinitions: [],
- relatedDefinitionIds: new Set()
+ definitionIds: new Set(),
+ secondaryDefinitions: []
};
sequencedDefinitionMap.set(sequence, sequencedDefinition);
sequencedDefinitions.push(sequencedDefinition);
sequenceList.push(sequence);
}
- sequencedDefinition.sourceDefinitions.push(definition);
sequencedDefinition.relatedDefinitions.push(definition);
- sequencedDefinition.relatedDefinitionIds.add(definition.id);
+ sequencedDefinition.definitionIds.add(id);
} else {
- unsequencedDefinitions.push(definition);
+ unsequencedDefinitions.set(id, definition);
}
}
if (sequenceList.length > 0) {
- const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
- for (const databaseDefinition of databaseDefinitions) {
- const {relatedDefinitions, relatedDefinitionIds} = sequencedDefinitions[databaseDefinition.index];
- const {id} = databaseDefinition;
- if (relatedDefinitionIds.has(id)) { continue; }
-
- const {source, rawSource, sourceTerm} = relatedDefinitions[0];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, [], false, enabledDictionaryMap);
- relatedDefinitions.push(definition);
- }
+ await this._addRelatedDefinitions(sequencedDefinitions, unsequencedDefinitions, sequenceList, mainDictionary, enabledDictionaryMap);
+ await this._addSecondaryDefinitions(sequencedDefinitions, unsequencedDefinitions, enabledDictionaryMap, secondarySearchDictionaryMap);
}
for (const {relatedDefinitions} of sequencedDefinitions) {
this._sortDefinitionsById(relatedDefinitions);
}
- return {sequencedDefinitions, unsequencedDefinitions};
+ return {sequencedDefinitions, unsequencedDefinitions: [...unsequencedDefinitions.values()]};
}
- async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
- if (secondarySearchDictionaryMap.size === 0) {
- return [];
+ async _addRelatedDefinitions(sequencedDefinitions, unsequencedDefinitions, sequenceList, mainDictionary, enabledDictionaryMap) {
+ const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
+ for (const databaseDefinition of databaseDefinitions) {
+ const {relatedDefinitions, definitionIds} = sequencedDefinitions[databaseDefinition.index];
+ const {id} = databaseDefinition;
+ if (definitionIds.has(id)) { continue; }
+
+ const {source, rawSource, sourceTerm} = relatedDefinitions[0];
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, [], false, enabledDictionaryMap);
+ relatedDefinitions.push(definition);
+ definitionIds.add(id);
+ unsequencedDefinitions.delete(id);
}
+ }
+ async _addSecondaryDefinitions(sequencedDefinitions, unsequencedDefinitions, enabledDictionaryMap, secondarySearchDictionaryMap) {
+ if (unsequencedDefinitions.length === 0 && secondarySearchDictionaryMap.size === 0) { return; }
+
+ // Prepare grouping info
const expressionList = [];
const readingList = [];
- for (const [expression, readingMap] of expressionsMap.entries()) {
- for (const reading of readingMap.keys()) {
- expressionList.push(expression);
- readingList.push(reading);
+ const targetList = [];
+ const targetMap = new Map();
+
+ for (const sequencedDefinition of sequencedDefinitions) {
+ const {relatedDefinitions} = sequencedDefinition;
+ for (const definition of relatedDefinitions) {
+ const {expressions: [{expression, reading}]} = definition;
+ const key = this._createMapKey([expression, reading]);
+ let target = targetMap.get(key);
+ if (typeof target === 'undefined') {
+ target = {
+ sequencedDefinitions: [],
+ searchSecondary: false
+ };
+ targetMap.set(key, target);
+ }
+ target.sequencedDefinitions.push(sequencedDefinition);
+ if (!definition.isPrimary && !target.searchSecondary) {
+ target.searchSecondary = true;
+ expressionList.push(expression);
+ readingList.push(reading);
+ targetList.push(target);
+ }
}
}
+ // Group unsequenced definitions with sequenced definitions that have a matching [expression, reading].
+ for (const [id, definition] of unsequencedDefinitions.entries()) {
+ const {expressions: [{expression, reading}]} = definition;
+ const key = this._createMapKey([expression, reading]);
+ const target = targetMap.get(key);
+ if (typeof target === 'undefined') { continue; }
+
+ for (const {definitionIds, secondaryDefinitions} of target.sequencedDefinitions) {
+ if (definitionIds.has(id)) { continue; }
+
+ secondaryDefinitions.push(definition);
+ definitionIds.add(id);
+ unsequencedDefinitions.delete(id);
+ break;
+ }
+ }
+
+ // Search database for additional secondary terms
+ if (expressionList.length === 0 || secondarySearchDictionaryMap.size === 0) { return; }
+
const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
- const definitions = [];
for (const databaseDefinition of databaseDefinitions) {
- const source = expressionList[databaseDefinition.index];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], false, secondarySearchDictionaryMap);
- definitions.push(definition);
+ const {index, id} = databaseDefinition;
+ const source = expressionList[index];
+ const target = targetList[index];
+ for (const {definitionIds, secondaryDefinitions} of target.sequencedDefinitions) {
+ if (definitionIds.has(id)) { continue; }
+
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], false, enabledDictionaryMap);
+ secondaryDefinitions.push(definition);
+ definitionIds.add(id);
+ unsequencedDefinitions.delete(id);
+ }
}
-
- return definitions;
}
- async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
- const {reasons, source, rawSource} = sourceDefinitions[0];
- const score = this._getMaxDefinitionScore(sourceDefinitions);
- const termInfoMap = new Map();
- const glossaryDefinitions = [];
- const glossaryDefinitionGroupMap = new Map();
-
- this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
- this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
-
- let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
- secondaryDefinitions = [...unsequencedDefinitions, ...secondaryDefinitions];
-
- this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
- this._removeDuplicateDefinitions(secondaryDefinitions);
-
- this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
+ _getMergedDefinition(relatedDefinitions, secondaryDefinitions) {
+ const {reasons, source, rawSource} = relatedDefinitions[0];
+ const allDefinitions = secondaryDefinitions.length > 0 ? [...relatedDefinitions, ...secondaryDefinitions] : relatedDefinitions;
+ const score = this._getMaxPrimaryDefinitionScore(allDefinitions);
+ // Merge by glossary
const allExpressions = new Set();
const allReadings = new Set();
- for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
- for (const expression of expressions) { allExpressions.add(expression); }
- for (const reading of readings) { allReadings.add(reading); }
+ const glossaryDefinitionGroupMap = new Map();
+ for (const definition of allDefinitions) {
+ const {dictionary, glossary, expressions: [{expression, reading}]} = definition;
+
+ const key = this._createMapKey([dictionary, ...glossary]);
+ let group = glossaryDefinitionGroupMap.get(key);
+ if (typeof group === 'undefined') {
+ group = {
+ expressions: new Set(),
+ readings: new Set(),
+ definitions: []
+ };
+ glossaryDefinitionGroupMap.set(key, group);
+ }
+
+ allExpressions.add(expression);
+ allReadings.add(reading);
+ group.expressions.add(expression);
+ group.readings.add(reading);
+ group.definitions.push(definition);
}
+ const glossaryDefinitions = [];
for (const {expressions, readings, definitions} of glossaryDefinitionGroupMap.values()) {
const glossaryDefinition = this._createMergedGlossaryTermDefinition(
source,
@@ -474,10 +523,9 @@ class Translator {
);
glossaryDefinitions.push(glossaryDefinition);
}
-
this._sortDefinitions(glossaryDefinitions);
- const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
+ const termDetailsList = this._createTermDetailsList(allDefinitions);
return this._createMergedTermDefinition(
source,
@@ -521,29 +569,6 @@ class Translator {
return [...definitionTagsMap.values()];
}
- _removeDuplicateDefinitions(definitions) {
- const definitionGroups = new Map();
- for (let i = 0, ii = definitions.length; i < ii; ++i) {
- const definition = definitions[i];
- const {id} = definition;
- const existing = definitionGroups.get(id);
- if (typeof existing === 'undefined') {
- definitionGroups.set(id, [i, definition]);
- continue;
- }
-
- let removeIndex = i;
- if (definition.source.length > existing[1].source.length) {
- definitionGroups.set(id, [i, definition]);
- removeIndex = existing[0];
- }
-
- definitions.splice(removeIndex, 1);
- --i;
- --ii;
- }
- }
-
_flagRedundantDefinitionTags(definitions) {
let lastDictionary = null;
let lastPartOfSpeech = '';
@@ -599,58 +624,6 @@ class Translator {
return results;
}
- _mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
- for (const definition of definitions) {
- const {expression, reading, dictionary, glossary, id} = definition;
-
- const key = this._createMapKey([dictionary, ...glossary]);
- let group = glossaryDefinitionGroupMap.get(key);
- if (typeof group === 'undefined') {
- group = {
- expressions: new Set(),
- readings: new Set(),
- definitions: [],
- definitionIds: new Set()
- };
- glossaryDefinitionGroupMap.set(key, group);
- }
-
- const {definitionIds} = group;
- if (definitionIds.has(id)) { continue; }
- definitionIds.add(id);
- group.expressions.add(expression);
- group.readings.add(reading);
- group.definitions.push(definition);
- }
- }
-
- _addUniqueTermInfos(definitions, termInfoMap) {
- for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
- let readingMap = termInfoMap.get(expression);
- if (typeof readingMap === 'undefined') {
- readingMap = new Map();
- termInfoMap.set(expression, readingMap);
- }
-
- let termInfo = readingMap.get(reading);
- if (typeof termInfo === 'undefined') {
- termInfo = {
- sourceTerm,
- furiganaSegments,
- termTagsMap: new Map()
- };
- readingMap.set(reading, termInfo);
- }
-
- const {termTagsMap} = termInfo;
- for (const tag of termTags) {
- const {name} = tag;
- if (termTagsMap.has(name)) { continue; }
- termTagsMap.set(name, this._cloneTag(tag));
- }
- }
- }
-
_convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions) {
const convertedDefinitions = [];
for (const definition of definitions) {
@@ -1029,6 +1002,14 @@ class Translator {
return result;
}
+ _getMaxPrimaryDefinitionScore(definitions) {
+ let result = Number.MIN_SAFE_INTEGER;
+ for (const {isPrimary, score} of definitions) {
+ if (isPrimary && score > result) { result = score; }
+ }
+ return result;
+ }
+
_getMinDictionaryOrder(definitions) {
let result = Number.MAX_SAFE_INTEGER;
for (const {dictionaryOrder} of definitions) {
@@ -1212,9 +1193,7 @@ class Translator {
const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
const dictionaryNames = this._getUniqueDictionaryNames(definitions);
- const termInfoMap = new Map();
- this._addUniqueTermInfos(definitions, termInfoMap);
- const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
+ const termDetailsList = this._createTermDetailsList(definitions);
const definitionTags = this._getUniqueDefinitionTags(definitions);
this._sortTags(definitionTags);
@@ -1250,7 +1229,33 @@ class Translator {
};
}
- _createTermDetailsListFromTermInfoMap(termInfoMap) {
+ _createTermDetailsList(definitions) {
+ const termInfoMap = new Map();
+ for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
+ let readingMap = termInfoMap.get(expression);
+ if (typeof readingMap === 'undefined') {
+ readingMap = new Map();
+ termInfoMap.set(expression, readingMap);
+ }
+
+ let termInfo = readingMap.get(reading);
+ if (typeof termInfo === 'undefined') {
+ termInfo = {
+ sourceTerm,
+ furiganaSegments,
+ termTagsMap: new Map()
+ };
+ readingMap.set(reading, termInfo);
+ }
+
+ const {termTagsMap} = termInfo;
+ for (const tag of termTags) {
+ const {name} = tag;
+ if (termTagsMap.has(name)) { continue; }
+ termTagsMap.set(name, this._cloneTag(tag));
+ }
+ }
+
const termDetailsList = [];
for (const [expression, readingMap] of termInfoMap.entries()) {
for (const [reading, {termTagsMap, sourceTerm, furiganaSegments}] of readingMap.entries()) {