aboutsummaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-10-04 11:12:15 -0400
committerGitHub <noreply@github.com>2020-10-04 11:12:15 -0400
commit86c64ac4c27279fef6bd6f49aa807a10cd9d08bf (patch)
tree976b5e8a93ed24a76b1bac703d8a514894370638 /ext
parentef333b6d72c00879b221e035c853c87298c12496 (diff)
Translator refactoring (#878)
* Add helper function _getSecondarySearchDictionaryMap * Use spreads instead of concat * Create helper function for assigning disambiguation info * Add _getTermTagsScoreSum helper * Rename variables * Move assignment of definition details * Convert databaseDefinitions to standard definitions * Move creation of definitions * Always pass definitionsByGlossary directly to _mergeByGlossary * Refactor how used definitions are tracked * Change _mergeByGlossary parameters * Simplify _mergeByGlossary * Generate definition structure at the end of the function * Add reusable function to clone a tag * Restructure merged sub-definition creation * Move setup of totalExpressionSet/totalReadingSet * Track all definitions with matching glossary * Move creation of definitionTags * Update score calculation for multiple definitions * Disambiguate function name * Rename variables * Add helper function _createMergedGlossaryTermDefinition * Remove rarely used check for secondary search results Unclear what the purpose was since it was almost never used * Update data structure * Rename function/variable * Add helper functions _createMergedTermDefinition * Assign expressionDetails to variable * Add rawSource to grouped definitions * Use shared function for creating unused definition entries * Add helper function _createMapKey * Refactor _createExpressionDetails * Change argument order * Add _getTagNamesWithCategory helper * Optimize _compressDefinitionTags * Add _createGroupedTermDefinition helper * Add type information * Use consistent order for term definition fields, add comments for omitted fields * Remove unused index field * Add helper function _createKanjiDefinition * Update types * Add frequencies/pitches arrays * Update terms array setup * Simplify for loop * Remove redundant property assignment * Simplify how deinflections with no definitions are ignored * Ensure duplicates are removed * Rename variable * Add comments about duplicate data * Refactor variable/parameter names * Move functions * Rename functions * Rename variable * Add dictionaryPriority field * Simplify priority comparison * Rename argument * Optimize comparison function when dictionary priority is not needed * Add length check * Remove unused parameter * Rename variables/parameters * Rename dictionaries to enabledDictionaryMap * Store compactTags/mainDictionary options to variables * Refactor _getSearchableText
Diffstat (limited to 'ext')
-rw-r--r--ext/bg/js/translator.js693
1 files changed, 420 insertions, 273 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 9446dfb6..34b6ca34 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -54,45 +54,36 @@ class Translator {
}
async findKanji(text, options) {
- const dictionaries = this._getEnabledDictionaryMap(options);
+ const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const kanjiUnique = new Set();
for (const c of text) {
kanjiUnique.add(c);
}
- const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries);
+ const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap);
if (databaseDefinitions.length === 0) { return []; }
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
const definitions = [];
- for (const {index, character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
+ for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
const expandedStats = await this._expandStats(stats, dictionary);
const expandedTags = await this._expandTags(tags, dictionary);
expandedTags.push(this._createDictionaryTag(dictionary));
this._sortTags(expandedTags);
- definitions.push({
- index,
- character,
- onyomi,
- kunyomi,
- tags: expandedTags,
- glossary,
- stats: expandedStats,
- dictionary,
- frequencies: []
- });
+ const definition = this._createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, expandedTags, expandedStats);
+ definitions.push(definition);
}
- await this._buildKanjiMeta(definitions, dictionaries);
+ await this._buildKanjiMeta(definitions, enabledDictionaryMap);
return definitions;
}
// Private
- async _getSequencedDefinitions(definitions, mainDictionary) {
+ async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
const sequenceList = [];
const sequencedDefinitionMap = new Map();
const sequencedDefinitions = [];
@@ -103,13 +94,14 @@ class Translator {
const {score} = definition;
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
if (typeof sequencedDefinition === 'undefined') {
- const {reasons, source} = definition;
+ const {reasons, source, rawSource} = definition;
sequencedDefinition = {
reasons,
score,
source,
+ rawSource,
dictionary,
- databaseDefinitions: []
+ definitions: []
};
sequencedDefinitionMap.set(sequence, sequencedDefinition);
sequencedDefinitions.push(sequencedDefinition);
@@ -122,155 +114,202 @@ class Translator {
}
}
- const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
- for (const databaseDefinition of databaseDefinitions) {
- sequencedDefinitions[databaseDefinition.index].databaseDefinitions.push(databaseDefinition);
+ if (sequenceList.length > 0) {
+ const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
+ for (const databaseDefinition of databaseDefinitions) {
+ const {definitions: definitions2, source, rawSource, reasons} = sequencedDefinitions[databaseDefinition.index];
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
+ definitions2.push(definition);
+ }
}
return {sequencedDefinitions, unsequencedDefinitions};
}
- async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
- if (secondarySearchDictionaries.size === 0) {
+ async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
+ if (secondarySearchDictionaryMap.size === 0) {
return [];
}
const expressionList = [];
const readingList = [];
for (const [expression, readingMap] of expressionsMap.entries()) {
- if (expression === text) { continue; }
for (const reading of readingMap.keys()) {
expressionList.push(expression);
readingList.push(reading);
}
}
- const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
+ const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
const definitions = [];
for (const databaseDefinition of databaseDefinitions) {
const source = expressionList[databaseDefinition.index];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, []);
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, [], secondarySearchDictionaryMap);
definitions.push(definition);
}
return definitions;
}
- async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
- const {reasons, score, source, dictionary, databaseDefinitions} = sequencedDefinition;
- const result = {
- reasons,
- score,
- expression: new Set(),
- reading: new Set(),
- expressions: new Map(),
- source,
- dictionary,
- definitions: []
- };
+ async _getMergedDefinition(sequencedDefinition, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
+ const {reasons, score, source, rawSource, dictionary, definitions} = sequencedDefinition;
+ const definitionDetailsMap = new Map();
+ const glossaryDefinitions = [];
+ const glossaryDefinitionGroupMap = new Map();
- for (const definition of databaseDefinitions) {
- const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
- definitionTags.push(this._createDictionaryTag(definition.dictionary));
- definition.definitionTags = definitionTags;
- const termTags = await this._expandTags(definition.termTags, definition.dictionary);
- definition.termTags = termTags;
- }
+ this._mergeByGlossary(definitions, glossaryDefinitionGroupMap);
+ this._addDefinitionDetails(definitions, definitionDetailsMap);
+
+ let secondaryDefinitions = await this._getMergedSecondarySearchResults(definitionDetailsMap, secondarySearchDictionaryMap);
+ secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
- const definitionsByGloss = this._mergeByGlossary(result, databaseDefinitions);
- const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
+ this._removeUsedDefinitions(secondaryDefinitions, definitionDetailsMap, usedDefinitions);
+ this._removeDuplicateDefinitions(secondaryDefinitions);
- this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
+ this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
- for (const definition of definitionsByGloss.values()) {
- this._sortTags(definition.definitionTags);
- result.definitions.push(definition);
+ const allExpressions = new Set();
+ const allReadings = new Set();
+ for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
+ for (const expression of expressions) { allExpressions.add(expression); }
+ for (const reading of readings) { allReadings.add(reading); }
}
- this._sortDefinitions(result.definitions, dictionaries);
+ for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) {
+ const glossaryDefinition = this._createMergedGlossaryTermDefinition(
+ source,
+ rawSource,
+ definitions2,
+ expressions,
+ readings,
+ allExpressions,
+ allReadings
+ );
+ glossaryDefinitions.push(glossaryDefinition);
+ }
- const expressions = [];
- for (const [expression, readingMap] of result.expressions.entries()) {
+ this._sortDefinitions(glossaryDefinitions, true);
+
+ const expressionDetailsList = [];
+ for (const [expression, readingMap] of definitionDetailsMap.entries()) {
for (const [reading, termTagsMap] of readingMap.entries()) {
const termTags = [...termTagsMap.values()];
- const score2 = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
this._sortTags(termTags);
- expressions.push(this._createExpression(expression, reading, termTags, this._scoreToTermFrequency(score2)));
+ expressionDetailsList.push(this._createExpressionDetails(expression, reading, termTags));
+ }
+ }
+
+ return this._createMergedTermDefinition(
+ source,
+ rawSource,
+ glossaryDefinitions,
+ [...allExpressions],
+ [...allReadings],
+ expressionDetailsList,
+ reasons,
+ dictionary,
+ score
+ );
+ }
+
+ _removeUsedDefinitions(definitions, definitionDetailsMap, usedDefinitions) {
+ for (let i = 0, ii = definitions.length; i < ii; ++i) {
+ const definition = definitions[i];
+ const {expression, reading} = definition;
+ const expressionMap = definitionDetailsMap.get(expression);
+ if (
+ typeof expressionMap !== 'undefined' &&
+ typeof expressionMap.get(reading) !== 'undefined'
+ ) {
+ usedDefinitions.add(definition);
+ } else {
+ definitions.splice(i, 1);
+ --i;
+ --ii;
}
}
+ }
- result.expressions = expressions;
- result.expression = Array.from(result.expression);
- result.reading = Array.from(result.reading);
+ _getUniqueDefinitionTags(definitions) {
+ const definitionTagsMap = new Map();
+ for (const {definitionTags} of definitions) {
+ for (const tag of definitionTags) {
+ const {name} = tag;
+ if (definitionTagsMap.has(name)) { continue; }
+ definitionTagsMap.set(name, this._cloneTag(tag));
+ }
+ }
+ return [...definitionTagsMap.values()];
+ }
+ _getTermTagsScoreSum(termTags) {
+ let result = 0;
+ for (const {score} of termTags) { result += score; }
return result;
}
async _findTermsGrouped(text, details, options) {
- const dictionaries = this._getEnabledDictionaryMap(options);
- const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
+ const {general: {compactTags}} = options;
+ const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
- const definitionsGrouped = this._groupTerms(definitions, dictionaries);
- await this._buildTermMeta(definitionsGrouped, dictionaries);
- this._sortDefinitions(definitionsGrouped, null);
+ const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
+ await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
+ this._sortDefinitions(groupedDefinitions, false);
- if (options.general.compactTags) {
- for (const definition of definitionsGrouped) {
+ if (compactTags) {
+ for (const definition of groupedDefinitions) {
this._compressDefinitionTags(definition.definitions);
}
}
- return [definitionsGrouped, length];
+ return [groupedDefinitions, length];
}
async _findTermsMerged(text, details, options) {
- const dictionaries = this._getEnabledDictionaryMap(options);
- const secondarySearchDictionaries = new Map();
- for (const [title, dictionary] of dictionaries.entries()) {
- if (!dictionary.allowSecondarySearches) { continue; }
- secondarySearchDictionaries.set(title, dictionary);
- }
+ const {general: {compactTags, mainDictionary}} = options;
+ const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
+ const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
- const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
- const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary);
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
+ const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
const definitionsMerged = [];
- const mergedByTermIndices = new Set();
+ const usedDefinitions = new Set();
for (const sequencedDefinition of sequencedDefinitions) {
const result = await this._getMergedDefinition(
- text,
- dictionaries,
sequencedDefinition,
unsequencedDefinitions,
- secondarySearchDictionaries,
- mergedByTermIndices
+ secondarySearchDictionaryMap,
+ usedDefinitions
);
definitionsMerged.push(result);
}
- const strayDefinitions = unsequencedDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
- for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) {
- // from dictTermsMergeBySequence
- const {reasons, score, expression, reading, source, dictionary} = groupedDefinition;
- const compatibilityDefinition = {
- reasons,
- score,
- expression: [expression],
- reading: [reading],
- expressions: [this._createExpression(groupedDefinition.expression, groupedDefinition.reading)],
+ const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
+ for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
+ const {reasons, score, expression, reading, source, rawSource, dictionary, termTags} = groupedDefinition;
+ const expressionDetails = this._createExpressionDetails(expression, reading, termTags);
+ const compatibilityDefinition = this._createMergedTermDefinition(
source,
+ rawSource,
+ definitions,
+ [expression],
+ [reading],
+ [expressionDetails],
+ reasons,
dictionary,
- definitions: groupedDefinition.definitions
- };
+ score
+ );
definitionsMerged.push(compatibilityDefinition);
}
- await this._buildTermMeta(definitionsMerged, dictionaries);
- this._sortDefinitions(definitionsMerged, null);
+ await this._buildTermMeta(definitionsMerged, enabledDictionaryMap);
+ this._sortDefinitions(definitionsMerged, false);
- if (options.general.compactTags) {
+ if (compactTags) {
for (const definition of definitionsMerged) {
this._compressDefinitionTags(definition.definitions);
}
@@ -280,38 +319,39 @@ class Translator {
}
async _findTermsSplit(text, details, options) {
- const dictionaries = this._getEnabledDictionaryMap(options);
- const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
- await this._buildTermMeta(definitions, dictionaries);
- this._sortDefinitions(definitions, dictionaries);
+ const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
+ await this._buildTermMeta(definitions, enabledDictionaryMap);
+ this._sortDefinitions(definitions, true);
return [definitions, length];
}
async _findTermsSimple(text, details, options) {
- const dictionaries = this._getEnabledDictionaryMap(options);
- const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
- this._sortDefinitions(definitions, null);
+ const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
+ this._sortDefinitions(definitions, false);
return [definitions, length];
}
- async _findTermsInternal(text, dictionaries, details, options) {
- text = this._getSearchableText(text, options);
+ async _findTermsInternal(text, enabledDictionaryMap, details, options) {
+ text = this._getSearchableText(text, options.scanning.alphanumeric);
if (text.length === 0) {
return [[], 0];
}
const deinflections = (
details.wildcard ?
- await this._findTermWildcard(text, dictionaries, details.wildcard) :
- await this._findTermDeinflections(text, dictionaries, options)
+ await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) :
+ await this._findTermDeinflections(text, enabledDictionaryMap, options)
);
let maxLength = 0;
const definitions = [];
for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
+ if (databaseDefinitions.length === 0) { continue; }
maxLength = Math.max(maxLength, rawSource.length);
for (const databaseDefinition of databaseDefinitions) {
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons);
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
definitions.push(definition);
}
}
@@ -320,8 +360,8 @@ class Translator {
return [definitions, maxLength];
}
- async _findTermWildcard(text, dictionaries, wildcard) {
- const databaseDefinitions = await this._database.findTermsBulk([text], dictionaries, wildcard);
+ async _findTermWildcard(text, enabledDictionaryMap, wildcard) {
+ const databaseDefinitions = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard);
if (databaseDefinitions.length === 0) {
return [];
}
@@ -336,7 +376,7 @@ class Translator {
}];
}
- async _findTermDeinflections(text, dictionaries, options) {
+ async _findTermDeinflections(text, enabledDictionaryMap, options) {
const deinflections = this._getAllDeinflections(text, options);
if (deinflections.length === 0) {
@@ -358,7 +398,7 @@ class Translator {
deinflectionArray.push(deinflection);
}
- const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
+ const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null);
for (const databaseDefinition of databaseDefinitions) {
const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
@@ -370,7 +410,7 @@ class Translator {
}
}
- return deinflections.filter((e) => e.databaseDefinitions.length > 0);
+ return deinflections;
}
_getAllDeinflections(text, options) {
@@ -438,13 +478,17 @@ class Translator {
}
}
- async _buildTermMeta(definitions, dictionaries) {
+ async _buildTermMeta(definitions, enabledDictionaryMap) {
const terms = [];
for (const definition of definitions) {
- if (definition.expressions) {
- terms.push(...definition.expressions);
- } else {
- terms.push(definition);
+ switch (definition.type) {
+ case 'term':
+ case 'termGrouped':
+ terms.push(definition);
+ break;
+ case 'termMerged':
+ terms.push(...definition.expressions);
+ break;
}
}
@@ -456,9 +500,8 @@ class Translator {
const expressionsUnique = [];
const termsUnique = [];
const termsUniqueMap = new Map();
- for (let i = 0, ii = terms.length; i < ii; ++i) {
- const term = terms[i];
- const expression = term.expression;
+ for (const term of terms) {
+ const {expression} = term;
let termList = termsUniqueMap.get(expression);
if (typeof termList === 'undefined') {
termList = [];
@@ -467,13 +510,9 @@ class Translator {
termsUniqueMap.set(expression, termList);
}
termList.push(term);
-
- // New data
- term.frequencies = [];
- term.pitches = [];
}
- const metas = await this._database.findTermMetaBulk(expressionsUnique, dictionaries);
+ const metas = await this._database.findTermMetaBulk(expressionsUnique, enabledDictionaryMap);
for (const {expression, mode, data, dictionary, index} of metas) {
switch (mode) {
case 'freq':
@@ -494,13 +533,13 @@ class Translator {
}
}
- async _buildKanjiMeta(definitions, dictionaries) {
+ async _buildKanjiMeta(definitions, enabledDictionaryMap) {
const kanjiList = [];
for (const {character} of definitions) {
kanjiList.push(character);
}
- const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries);
+ const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
for (const {character, mode, data, dictionary, index} of metas) {
switch (mode) {
case 'freq':
@@ -638,19 +677,19 @@ class Translator {
}
}
- _getSearchableText(text, options) {
- if (!options.scanning.alphanumeric) {
- let newText = '';
- for (const c of text) {
- if (!jp.isCodePointJapanese(c.codePointAt(0))) {
- break;
- }
- newText += c;
- }
- text = newText;
+ _getSearchableText(text, allowAlphanumericCharacters) {
+ if (allowAlphanumericCharacters) {
+ return text;
}
- return text;
+ let newText = '';
+ for (const c of text) {
+ if (!jp.isCodePointJapanese(c.codePointAt(0))) {
+ break;
+ }
+ newText += c;
+ }
+ return newText;
}
async _fetchJsonAsset(url) {
@@ -677,6 +716,20 @@ class Translator {
return enabledDictionaryMap;
}
+ _getSecondarySearchDictionaryMap(enabledDictionaryMap) {
+ const secondarySearchDictionaryMap = new Map();
+ for (const [title, dictionary] of enabledDictionaryMap.entries()) {
+ if (!dictionary.allowSecondarySearches) { continue; }
+ secondarySearchDictionaryMap.set(title, dictionary);
+ }
+ return secondarySearchDictionaryMap;
+ }
+
+ _getDictionaryPriority(dictionary, enabledDictionaryMap) {
+ const info = enabledDictionaryMap.get(dictionary);
+ return typeof info !== 'undefined' ? info.priority : 0;
+ }
+
_removeDuplicateDefinitions(definitions) {
const definitionGroups = new Map();
for (let i = 0, ii = definitions.length; i < ii; ++i) {
@@ -703,43 +756,60 @@ class Translator {
_compressDefinitionTags(definitions) {
let lastDictionary = '';
let lastPartOfSpeech = '';
+ const removeCategoriesSet = new Set();
- for (const definition of definitions) {
- const dictionary = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'dictionary').map((tag) => tag.name).sort());
- const partOfSpeech = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'partOfSpeech').map((tag) => tag.name).sort());
-
- const filterOutCategories = [];
+ for (const {definitionTags} of definitions) {
+ const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary'));
+ const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
if (lastDictionary === dictionary) {
- filterOutCategories.push('dictionary');
+ removeCategoriesSet.add('dictionary');
} else {
lastDictionary = dictionary;
lastPartOfSpeech = '';
}
if (lastPartOfSpeech === partOfSpeech) {
- filterOutCategories.push('partOfSpeech');
+ removeCategoriesSet.add('partOfSpeech');
} else {
lastPartOfSpeech = partOfSpeech;
}
- definition.definitionTags = definition.definitionTags.filter((tag) => !filterOutCategories.includes(tag.category));
+ if (removeCategoriesSet.size > 0) {
+ this._removeTagsWithCategory(definitionTags, removeCategoriesSet);
+ removeCategoriesSet.clear();
+ }
+ }
+ }
+
+ _getTagNamesWithCategory(tags, category) {
+ const results = [];
+ for (const tag of tags) {
+ if (tag.category !== category) { continue; }
+ results.push(tag.name);
+ }
+ results.sort();
+ return results;
+ }
+
+ _removeTagsWithCategory(tags, removeCategoriesSet) {
+ for (let i = 0, ii = tags.length; i < ii; ++i) {
+ const {category} = tags[i];
+ if (!removeCategoriesSet.has(category)) { continue; }
+ tags.splice(i, 1);
+ --i;
+ --ii;
}
}
- _groupTerms(definitions, dictionaries) {
+ _groupTerms(definitions) {
const groups = new Map();
for (const definition of definitions) {
- const key = [definition.source, definition.expression, ...definition.reasons];
- if (definition.reading) {
- key.push(definition.reading);
- }
-
- const keyString = key.toString();
- let groupDefinitions = groups.get(keyString);
+ const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]);
+ let groupDefinitions = groups.get(key);
if (typeof groupDefinitions === 'undefined') {
groupDefinitions = [];
- groups.set(keyString, groupDefinitions);
+ groups.set(key, groupDefinitions);
}
groupDefinitions.push(definition);
@@ -747,124 +817,88 @@ class Translator {
const results = [];
for (const groupDefinitions of groups.values()) {
- const firstDef = groupDefinitions[0];
- this._sortDefinitions(groupDefinitions, dictionaries);
- results.push({
- definitions: groupDefinitions,
- expression: firstDef.expression,
- reading: firstDef.reading,
- furiganaSegments: firstDef.furiganaSegments,
- reasons: firstDef.reasons,
- termTags: firstDef.termTags,
- score: groupDefinitions.reduce((p, v) => v.score > p ? v.score : p, Number.MIN_SAFE_INTEGER),
- source: firstDef.source
- });
+ this._sortDefinitions(groupDefinitions, true);
+ const definition = this._createGroupedTermDefinition(groupDefinitions);
+ results.push(definition);
}
return results;
}
- _mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) {
- const definitionsByGlossary = appendTo !== null ? appendTo : new Map();
-
- const resultExpressionsMap = result.expressions;
- const resultExpressionSet = result.expression;
- const resultReadingSet = result.reading;
- const resultSource = result.source;
-
- for (let i = 0, ii = definitions.length; i < ii; ++i) {
- const definition = definitions[i];
- const {expression, reading} = definition;
-
- if (mergedIndices !== null) {
- const expressionMap = resultExpressionsMap.get(expression);
- if (
- typeof expressionMap !== 'undefined' &&
- typeof expressionMap.get(reading) !== 'undefined'
- ) {
- mergedIndices.add(i);
- } else {
- continue;
- }
- }
+ _mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
+ for (const definition of definitions) {
+ const {expression, reading, dictionary, glossary} = definition;
- const gloss = JSON.stringify(definition.glossary.concat(definition.dictionary));
- let glossDefinition = definitionsByGlossary.get(gloss);
- if (typeof glossDefinition === 'undefined') {
- glossDefinition = {
- expression: new Set(),
- reading: new Set(),
- definitionTags: [],
- glossary: definition.glossary,
- source: resultSource,
- reasons: [],
- score: definition.score,
- id: definition.id,
- dictionary: definition.dictionary
+ const key = this._createMapKey([dictionary, ...glossary]);
+ let group = glossaryDefinitionGroupMap.get(key);
+ if (typeof group === 'undefined') {
+ group = {
+ expressions: new Set(),
+ readings: new Set(),
+ definitions: []
};
- definitionsByGlossary.set(gloss, glossDefinition);
+ glossaryDefinitionGroupMap.set(key, group);
}
- glossDefinition.expression.add(expression);
- glossDefinition.reading.add(reading);
+ group.expressions.add(expression);
+ group.readings.add(reading);
+ group.definitions.push(definition);
+ }
+ }
- resultExpressionSet.add(expression);
- resultReadingSet.add(reading);
+ _addDefinitionDetails(definitions, definitionDetailsMap) {
+ for (const {expression, reading, termTags} of definitions) {
+ let readingMap = definitionDetailsMap.get(expression);
+ if (typeof readingMap === 'undefined') {
+ readingMap = new Map();
+ definitionDetailsMap.set(expression, readingMap);
+ }
- for (const tag of definition.definitionTags) {
- if (!glossDefinition.definitionTags.find((existingTag) => existingTag.name === tag.name)) {
- glossDefinition.definitionTags.push(tag);
- }
+ let termTagsMap = readingMap.get(reading);
+ if (typeof termTagsMap === 'undefined') {
+ termTagsMap = new Map();
+ readingMap.set(reading, termTagsMap);
}
- if (appendTo === null) {
- /*
- Data layout:
- resultExpressionsMap = new Map([
- [expression, new Map([
- [reading, new Map([
- [tagName, tagInfo],
- ...
- ])],
- ...
- ])],
- ...
- ]);
- */
- let readingMap = resultExpressionsMap.get(expression);
- if (typeof readingMap === 'undefined') {
- readingMap = new Map();
- resultExpressionsMap.set(expression, readingMap);
- }
+ for (const tag of termTags) {
+ const {name} = tag;
+ if (termTagsMap.has(name)) { continue; }
+ termTagsMap.set(name, this._cloneTag(tag));
+ }
+ }
+ }
- let termTagsMap = readingMap.get(reading);
- if (typeof termTagsMap === 'undefined') {
- termTagsMap = new Map();
- readingMap.set(reading, termTagsMap);
- }
+ _getMaxDefinitionScore(definitions) {
+ let result = Number.MIN_SAFE_INTEGER;
+ for (const {score} of definitions) {
+ if (score > result) { result = score; }
+ }
+ return result;
+ }
- for (const tag of definition.termTags) {
- if (!termTagsMap.has(tag.name)) {
- termTagsMap.set(tag.name, tag);
- }
- }
- }
+ _getMaxDictionaryPriority(definitions) {
+ let result = Number.MIN_SAFE_INTEGER;
+ for (const {dictionaryPriority} of definitions) {
+ if (dictionaryPriority > result) { result = dictionaryPriority; }
}
+ return result;
+ }
- for (const definition of definitionsByGlossary.values()) {
- const only = [];
- const expressionSet = definition.expression;
- const readingSet = definition.reading;
- if (!areSetsEqual(expressionSet, resultExpressionSet)) {
- only.push(...getSetIntersection(expressionSet, resultExpressionSet));
- }
- if (!areSetsEqual(readingSet, resultReadingSet)) {
- only.push(...getSetIntersection(readingSet, resultReadingSet));
- }
- definition.only = only;
+ _cloneTag(tag) {
+ const {name, category, notes, order, score, dictionary} = tag;
+ return this._createTag(name, category, notes, order, score, dictionary);
+ }
+
+ _cloneTags(tags) {
+ const results = [];
+ for (const tag of tags) {
+ results.push(this._cloneTag(tag));
}
+ return results;
+ }
- return definitionsByGlossary;
+ _createMapKey(array) {
+ return JSON.stringify(array);
}
_createDictionaryTag(name) {
@@ -894,8 +928,23 @@ class Translator {
};
}
- async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons) {
+ _createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, tags, stats) {
+ return {
+ type: 'kanji',
+ character,
+ dictionary,
+ onyomi,
+ kunyomi,
+ glossary,
+ tags,
+ stats,
+ frequencies: []
+ };
+ }
+
+ async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap) {
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
+ const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
const termTagsExpanded = await this._expandTags(termTags, dictionary);
const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
definitionTagsExpanded.push(this._createDictionaryTag(dictionary));
@@ -906,30 +955,133 @@ class Translator {
const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
+ type: 'term',
+ id,
source,
rawSource,
reasons,
score,
- id,
+ sequence,
dictionary,
+ dictionaryPriority,
expression,
reading,
+ // expressions
furiganaSegments,
glossary,
definitionTags: definitionTagsExpanded,
termTags: termTagsExpanded,
- sequence
+ // definitions
+ frequencies: [],
+ pitches: []
+ // only
};
}
- _createExpression(expression, reading, termTags=null, termFrequency=null) {
+ _createGroupedTermDefinition(definitions) {
+ const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource} = definitions[0];
+ const score = this._getMaxDefinitionScore(definitions);
+ const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
+ return {
+ type: 'termGrouped',
+ // id
+ source,
+ rawSource,
+ reasons: [...reasons],
+ score,
+ // sequence
+ // dictionary
+ dictionaryPriority,
+ expression,
+ reading,
+ // expressions
+ furiganaSegments, // Contains duplicate data
+ // glossary
+ // definitionTags
+ termTags: this._cloneTags(termTags),
+ definitions,
+ frequencies: [],
+ pitches: []
+ // only
+ };
+ }
+
+ _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, expressionDetailsList, reasons, dictionary, score) {
+ const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
+ return {
+ type: 'termMerged',
+ // id
+ source,
+ rawSource,
+ reasons,
+ score,
+ // sequence
+ dictionary,
+ dictionaryPriority,
+ expression: expressions,
+ reading: readings,
+ expressions: expressionDetailsList,
+ // furiganaSegments
+ // glossary
+ // definitionTags
+ // termTags
+ definitions,
+ frequencies: [],
+ pitches: []
+ // only
+ };
+ }
+
+ _createMergedGlossaryTermDefinition(source, rawSource, definitions, expressions, readings, allExpressions, allReadings) {
+ const only = [];
+ if (!areSetsEqual(expressions, allExpressions)) {
+ only.push(...getSetIntersection(expressions, allExpressions));
+ }
+ if (!areSetsEqual(readings, allReadings)) {
+ only.push(...getSetIntersection(readings, allReadings));
+ }
+
+ const definitionTags = this._getUniqueDefinitionTags(definitions);
+ this._sortTags(definitionTags);
+
+ const {glossary, dictionary} = definitions[0];
+ const score = this._getMaxDefinitionScore(definitions);
+ const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
+ return {
+ type: 'termMergedByGlossary',
+ // id
+ source,
+ rawSource,
+ reasons: [],
+ score,
+ // sequence
+ dictionary,
+ dictionaryPriority,
+ expression: [...expressions],
+ reading: [...readings],
+ // expressions
+ // furiganaSegments
+ glossary: [...glossary],
+ definitionTags,
+ // termTags
+ definitions, // Contains duplicate data
+ frequencies: [],
+ pitches: [],
+ only
+ };
+ }
+
+ _createExpressionDetails(expression, reading, termTags) {
+ const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
expression,
reading,
furiganaSegments,
termTags,
- termFrequency
+ termFrequency,
+ frequencies: [],
+ pitches: []
};
}
@@ -944,21 +1096,11 @@ class Translator {
});
}
- _sortDefinitions(definitions, dictionaries) {
+ _sortDefinitions(definitions, useDictionaryPriority) {
if (definitions.length <= 1) { return; }
const stringComparer = this._stringComparer;
- definitions.sort((v1, v2) => {
- let i;
- if (dictionaries !== null) {
- const dictionaryInfo1 = dictionaries.get(v1.dictionary);
- const dictionaryInfo2 = dictionaries.get(v2.dictionary);
- const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
- const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
- i = priority2 - priority1;
- if (i !== 0) { return i; }
- }
-
- i = v2.source.length - v1.source.length;
+ const compareFunction1 = (v1, v2) => {
+ let i = v2.source.length - v1.source.length;
if (i !== 0) { return i; }
i = v1.reasons.length - v2.reasons.length;
@@ -973,7 +1115,12 @@ class Translator {
if (i !== 0) { return i; }
return stringComparer.compare(expression1, expression2);
- });
+ };
+ const compareFunction2 = (v1, v2) => {
+ const i = v2.dictionaryPriority - v1.dictionaryPriority;
+ return (i !== 0) ? i : compareFunction1(v1, v2);
+ };
+ definitions.sort(useDictionaryPriority ? compareFunction2 : compareFunction1);
}
_sortDatabaseDefinitionsByIndex(definitions) {