summaryrefslogtreecommitdiff
path: root/ext/bg/js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/bg/js')
-rw-r--r--ext/bg/js/translator.js630
1 files changed, 323 insertions, 307 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 5f91205d..883e035a 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -138,167 +138,21 @@ class Translator {
return definitions;
}
- // Private
+ // Find terms core functions
- async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
- const sequenceList = [];
- const sequencedDefinitionMap = new Map();
- const sequencedDefinitions = [];
- const unsequencedDefinitions = [];
- for (const definition of definitions) {
- const {sequence, dictionary} = definition;
- if (mainDictionary === dictionary && sequence >= 0) {
- let sequencedDefinition = sequencedDefinitionMap.get(sequence);
- if (typeof sequencedDefinition === 'undefined') {
- sequencedDefinition = {
- sourceDefinitions: [],
- relatedDefinitions: []
- };
- sequencedDefinitionMap.set(sequence, sequencedDefinition);
- sequencedDefinitions.push(sequencedDefinition);
- sequenceList.push(sequence);
- }
- sequencedDefinition.sourceDefinitions.push(definition);
- } else {
- unsequencedDefinitions.push(definition);
- }
- }
-
- if (sequenceList.length > 0) {
- const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
- for (const databaseDefinition of databaseDefinitions) {
- const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index];
- const {expression} = databaseDefinition;
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap);
- relatedDefinitions.push(definition);
- }
- }
-
- return {sequencedDefinitions, unsequencedDefinitions};
- }
-
- async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
- if (secondarySearchDictionaryMap.size === 0) {
- return [];
- }
-
- const expressionList = [];
- const readingList = [];
- for (const [expression, readingMap] of expressionsMap.entries()) {
- for (const reading of readingMap.keys()) {
- expressionList.push(expression);
- readingList.push(reading);
- }
- }
-
- const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
- this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
-
- const definitions = [];
- for (const databaseDefinition of databaseDefinitions) {
- const source = expressionList[databaseDefinition.index];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap);
- definitions.push(definition);
- }
-
- return definitions;
- }
-
- async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
- const {reasons, source, rawSource, dictionary} = sourceDefinitions[0];
- const score = this._getMaxDefinitionScore(sourceDefinitions);
- const termInfoMap = new Map();
- const glossaryDefinitions = [];
- const glossaryDefinitionGroupMap = new Map();
-
- this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
- this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
-
- let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
- secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
-
- this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
- this._removeDuplicateDefinitions(secondaryDefinitions);
-
- this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
-
- const allExpressions = new Set();
- const allReadings = new Set();
- for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
- for (const expression of expressions) { allExpressions.add(expression); }
- for (const reading of readings) { allReadings.add(reading); }
- }
-
- for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) {
- const glossaryDefinition = this._createMergedGlossaryTermDefinition(
- source,
- rawSource,
- definitions2,
- expressions,
- readings,
- allExpressions,
- allReadings
- );
- glossaryDefinitions.push(glossaryDefinition);
- }
-
- this._sortDefinitions(glossaryDefinitions, true);
-
- const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
-
- return this._createMergedTermDefinition(
- source,
- rawSource,
- glossaryDefinitions,
- [...allExpressions],
- [...allReadings],
- termDetailsList,
- reasons,
- dictionary,
- score
- );
- }
-
- _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
- for (let i = 0, ii = definitions.length; i < ii; ++i) {
- const definition = definitions[i];
- const {expression, reading} = definition;
- const expressionMap = termInfoMap.get(expression);
- if (
- typeof expressionMap !== 'undefined' &&
- typeof expressionMap.get(reading) !== 'undefined'
- ) {
- usedDefinitions.add(definition);
- } else {
- definitions.splice(i, 1);
- --i;
- --ii;
- }
- }
- }
-
- _getUniqueDefinitionTags(definitions) {
- const definitionTagsMap = new Map();
- for (const {definitionTags} of definitions) {
- for (const tag of definitionTags) {
- const {name} = tag;
- if (definitionTagsMap.has(name)) { continue; }
- definitionTagsMap.set(name, this._cloneTag(tag));
- }
- }
- return [...definitionTagsMap.values()];
- }
-
- _getTermTagsScoreSum(termTags) {
- let result = 0;
- for (const {score} of termTags) { result += score; }
- return result;
+ async _findTermsSimple(text, options) {
+ const {enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
+ this._sortDefinitions(definitions, false);
+ return [definitions, length];
}
- _getSourceTermMatchCountSum(definitions) {
- let result = 0;
- for (const {sourceTermExactMatchCount} of definitions) { result += sourceTermExactMatchCount; }
- return result;
+ async _findTermsSplit(text, options) {
+ const {enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
+ await this._buildTermMeta(definitions, enabledDictionaryMap);
+ this._sortDefinitions(definitions, true);
+ return [definitions, length];
}
async _findTermsGrouped(text, options) {
@@ -368,20 +222,7 @@ class Translator {
return [definitionsMerged, length];
}
- async _findTermsSplit(text, options) {
- const {enabledDictionaryMap} = options;
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
- await this._buildTermMeta(definitions, enabledDictionaryMap);
- this._sortDefinitions(definitions, true);
- return [definitions, length];
- }
-
- async _findTermsSimple(text, options) {
- const {enabledDictionaryMap} = options;
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
- this._sortDefinitions(definitions, false);
- return [definitions, length];
- }
+ // Find terms internal implementation
async _findTermsInternal(text, enabledDictionaryMap, options) {
const {alphanumeric, wildcard} = options;
@@ -520,14 +361,280 @@ class Translator {
return deinflections;
}
- _getTextOptionEntryVariants(value) {
- switch (value) {
- case 'true': return [true];
- case 'variant': return [false, true];
- default: return [false];
+ async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
+ const sequenceList = [];
+ const sequencedDefinitionMap = new Map();
+ const sequencedDefinitions = [];
+ const unsequencedDefinitions = [];
+ for (const definition of definitions) {
+ const {sequence, dictionary} = definition;
+ if (mainDictionary === dictionary && sequence >= 0) {
+ let sequencedDefinition = sequencedDefinitionMap.get(sequence);
+ if (typeof sequencedDefinition === 'undefined') {
+ sequencedDefinition = {
+ sourceDefinitions: [],
+ relatedDefinitions: []
+ };
+ sequencedDefinitionMap.set(sequence, sequencedDefinition);
+ sequencedDefinitions.push(sequencedDefinition);
+ sequenceList.push(sequence);
+ }
+ sequencedDefinition.sourceDefinitions.push(definition);
+ } else {
+ unsequencedDefinitions.push(definition);
+ }
+ }
+
+ if (sequenceList.length > 0) {
+ const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
+ for (const databaseDefinition of databaseDefinitions) {
+ const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index];
+ const {expression} = databaseDefinition;
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap);
+ relatedDefinitions.push(definition);
+ }
+ }
+
+ return {sequencedDefinitions, unsequencedDefinitions};
+ }
+
+ async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
+ if (secondarySearchDictionaryMap.size === 0) {
+ return [];
+ }
+
+ const expressionList = [];
+ const readingList = [];
+ for (const [expression, readingMap] of expressionsMap.entries()) {
+ for (const reading of readingMap.keys()) {
+ expressionList.push(expression);
+ readingList.push(reading);
+ }
+ }
+
+ const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
+ this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
+
+ const definitions = [];
+ for (const databaseDefinition of databaseDefinitions) {
+ const source = expressionList[databaseDefinition.index];
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap);
+ definitions.push(definition);
+ }
+
+ return definitions;
+ }
+
+ async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
+ const {reasons, source, rawSource, dictionary} = sourceDefinitions[0];
+ const score = this._getMaxDefinitionScore(sourceDefinitions);
+ const termInfoMap = new Map();
+ const glossaryDefinitions = [];
+ const glossaryDefinitionGroupMap = new Map();
+
+ this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
+ this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
+
+ let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
+ secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
+
+ this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
+ this._removeDuplicateDefinitions(secondaryDefinitions);
+
+ this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
+
+ const allExpressions = new Set();
+ const allReadings = new Set();
+ for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
+ for (const expression of expressions) { allExpressions.add(expression); }
+ for (const reading of readings) { allReadings.add(reading); }
+ }
+
+ for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) {
+ const glossaryDefinition = this._createMergedGlossaryTermDefinition(
+ source,
+ rawSource,
+ definitions2,
+ expressions,
+ readings,
+ allExpressions,
+ allReadings
+ );
+ glossaryDefinitions.push(glossaryDefinition);
+ }
+
+ this._sortDefinitions(glossaryDefinitions, true);
+
+ const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
+
+ return this._createMergedTermDefinition(
+ source,
+ rawSource,
+ glossaryDefinitions,
+ [...allExpressions],
+ [...allReadings],
+ termDetailsList,
+ reasons,
+ dictionary,
+ score
+ );
+ }
+
+ _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
+ for (let i = 0, ii = definitions.length; i < ii; ++i) {
+ const definition = definitions[i];
+ const {expression, reading} = definition;
+ const expressionMap = termInfoMap.get(expression);
+ if (
+ typeof expressionMap !== 'undefined' &&
+ typeof expressionMap.get(reading) !== 'undefined'
+ ) {
+ usedDefinitions.add(definition);
+ } else {
+ definitions.splice(i, 1);
+ --i;
+ --ii;
+ }
+ }
+ }
+
+ _getUniqueDefinitionTags(definitions) {
+ const definitionTagsMap = new Map();
+ for (const {definitionTags} of definitions) {
+ for (const tag of definitionTags) {
+ const {name} = tag;
+ if (definitionTagsMap.has(name)) { continue; }
+ definitionTagsMap.set(name, this._cloneTag(tag));
+ }
+ }
+ return [...definitionTagsMap.values()];
+ }
+
+ _removeDuplicateDefinitions(definitions) {
+ const definitionGroups = new Map();
+ for (let i = 0, ii = definitions.length; i < ii; ++i) {
+ const definition = definitions[i];
+ const {id} = definition;
+ const existing = definitionGroups.get(id);
+ if (typeof existing === 'undefined') {
+ definitionGroups.set(id, [i, definition]);
+ continue;
+ }
+
+ let removeIndex = i;
+ if (definition.source.length > existing[1].source.length) {
+ definitionGroups.set(id, [i, definition]);
+ removeIndex = existing[0];
+ }
+
+ definitions.splice(removeIndex, 1);
+ --i;
+ --ii;
+ }
+ }
+
+ _compressDefinitionTags(definitions) {
+ let lastDictionary = '';
+ let lastPartOfSpeech = '';
+ const removeCategoriesSet = new Set();
+
+ for (const {definitionTags} of definitions) {
+ const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary'));
+ const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
+
+ if (lastDictionary === dictionary) {
+ removeCategoriesSet.add('dictionary');
+ } else {
+ lastDictionary = dictionary;
+ lastPartOfSpeech = '';
+ }
+
+ if (lastPartOfSpeech === partOfSpeech) {
+ removeCategoriesSet.add('partOfSpeech');
+ } else {
+ lastPartOfSpeech = partOfSpeech;
+ }
+
+ if (removeCategoriesSet.size > 0) {
+ this._removeTagsWithCategory(definitionTags, removeCategoriesSet);
+ removeCategoriesSet.clear();
+ }
+ }
+ }
+
+ _groupTerms(definitions) {
+ const groups = new Map();
+ for (const definition of definitions) {
+ const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]);
+ let groupDefinitions = groups.get(key);
+ if (typeof groupDefinitions === 'undefined') {
+ groupDefinitions = [];
+ groups.set(key, groupDefinitions);
+ }
+
+ groupDefinitions.push(definition);
+ }
+
+ const results = [];
+ for (const groupDefinitions of groups.values()) {
+ this._sortDefinitions(groupDefinitions, true);
+ const definition = this._createGroupedTermDefinition(groupDefinitions);
+ results.push(definition);
}
+
+ return results;
}
+ _mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
+ for (const definition of definitions) {
+ const {expression, reading, dictionary, glossary} = definition;
+
+ const key = this._createMapKey([dictionary, ...glossary]);
+ let group = glossaryDefinitionGroupMap.get(key);
+ if (typeof group === 'undefined') {
+ group = {
+ expressions: new Set(),
+ readings: new Set(),
+ definitions: []
+ };
+ glossaryDefinitionGroupMap.set(key, group);
+ }
+
+ group.expressions.add(expression);
+ group.readings.add(reading);
+ group.definitions.push(definition);
+ }
+ }
+
+ _addUniqueTermInfos(definitions, termInfoMap) {
+ for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
+ let readingMap = termInfoMap.get(expression);
+ if (typeof readingMap === 'undefined') {
+ readingMap = new Map();
+ termInfoMap.set(expression, readingMap);
+ }
+
+ let termInfo = readingMap.get(reading);
+ if (typeof termInfo === 'undefined') {
+ termInfo = {
+ sourceTerm,
+ furiganaSegments,
+ termTagsMap: new Map()
+ };
+ readingMap.set(reading, termInfo);
+ }
+
+ const {termTagsMap} = termInfo;
+ for (const tag of termTags) {
+ const {name} = tag;
+ if (termTagsMap.has(name)) { continue; }
+ termTagsMap.set(name, this._cloneTag(tag));
+ }
+ }
+ }
+
+ // Metadata building
+
async _buildTermMeta(definitions, enabledDictionaryMap) {
const terms = [];
for (const definition of definitions) {
@@ -692,6 +799,8 @@ class Translator {
return {reading, pitches, dictionary};
}
+ // Simple helpers
+
_scoreToTermFrequency(score) {
if (score > 0) {
return 'popular';
@@ -707,26 +816,6 @@ class Translator {
return (pos >= 0 ? name.substring(0, pos) : name);
}
- *_getArrayVariants(arrayVariants) {
- const ii = arrayVariants.length;
-
- let total = 1;
- for (let i = 0; i < ii; ++i) {
- total *= arrayVariants[i].length;
- }
-
- for (let a = 0; a < total; ++a) {
- const variant = [];
- let index = a;
- for (let i = 0; i < ii; ++i) {
- const entryVariants = arrayVariants[i];
- variant.push(entryVariants[index % entryVariants.length]);
- index = Math.floor(index / entryVariants.length);
- }
- yield variant;
- }
- }
-
_getSearchableText(text, allowAlphanumericCharacters) {
if (allowAlphanumericCharacters) {
return text;
@@ -742,6 +831,14 @@ class Translator {
return newText;
}
+ _getTextOptionEntryVariants(value) {
+ switch (value) {
+ case 'true': return [true];
+ case 'variant': return [false, true];
+ default: return [false];
+ }
+ }
+
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
const secondarySearchDictionaryMap = new Map();
for (const [title, dictionary] of enabledDictionaryMap.entries()) {
@@ -756,58 +853,6 @@ class Translator {
return typeof info !== 'undefined' ? info.priority : 0;
}
- _removeDuplicateDefinitions(definitions) {
- const definitionGroups = new Map();
- for (let i = 0, ii = definitions.length; i < ii; ++i) {
- const definition = definitions[i];
- const {id} = definition;
- const existing = definitionGroups.get(id);
- if (typeof existing === 'undefined') {
- definitionGroups.set(id, [i, definition]);
- continue;
- }
-
- let removeIndex = i;
- if (definition.source.length > existing[1].source.length) {
- definitionGroups.set(id, [i, definition]);
- removeIndex = existing[0];
- }
-
- definitions.splice(removeIndex, 1);
- --i;
- --ii;
- }
- }
-
- _compressDefinitionTags(definitions) {
- let lastDictionary = '';
- let lastPartOfSpeech = '';
- const removeCategoriesSet = new Set();
-
- for (const {definitionTags} of definitions) {
- const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary'));
- const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
-
- if (lastDictionary === dictionary) {
- removeCategoriesSet.add('dictionary');
- } else {
- lastDictionary = dictionary;
- lastPartOfSpeech = '';
- }
-
- if (lastPartOfSpeech === partOfSpeech) {
- removeCategoriesSet.add('partOfSpeech');
- } else {
- lastPartOfSpeech = partOfSpeech;
- }
-
- if (removeCategoriesSet.size > 0) {
- this._removeTagsWithCategory(definitionTags, removeCategoriesSet);
- removeCategoriesSet.clear();
- }
- }
- }
-
_getTagNamesWithCategory(tags, category) {
const results = [];
for (const tag of tags) {
@@ -828,75 +873,42 @@ class Translator {
}
}
- _groupTerms(definitions) {
- const groups = new Map();
- for (const definition of definitions) {
- const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]);
- let groupDefinitions = groups.get(key);
- if (typeof groupDefinitions === 'undefined') {
- groupDefinitions = [];
- groups.set(key, groupDefinitions);
- }
+ *_getArrayVariants(arrayVariants) {
+ const ii = arrayVariants.length;
- groupDefinitions.push(definition);
+ let total = 1;
+ for (let i = 0; i < ii; ++i) {
+ total *= arrayVariants[i].length;
}
- const results = [];
- for (const groupDefinitions of groups.values()) {
- this._sortDefinitions(groupDefinitions, true);
- const definition = this._createGroupedTermDefinition(groupDefinitions);
- results.push(definition);
+ for (let a = 0; a < total; ++a) {
+ const variant = [];
+ let index = a;
+ for (let i = 0; i < ii; ++i) {
+ const entryVariants = arrayVariants[i];
+ variant.push(entryVariants[index % entryVariants.length]);
+ index = Math.floor(index / entryVariants.length);
+ }
+ yield variant;
}
-
- return results;
}
- _mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
- for (const definition of definitions) {
- const {expression, reading, dictionary, glossary} = definition;
+ // Reduction functions
- const key = this._createMapKey([dictionary, ...glossary]);
- let group = glossaryDefinitionGroupMap.get(key);
- if (typeof group === 'undefined') {
- group = {
- expressions: new Set(),
- readings: new Set(),
- definitions: []
- };
- glossaryDefinitionGroupMap.set(key, group);
- }
-
- group.expressions.add(expression);
- group.readings.add(reading);
- group.definitions.push(definition);
+ _getTermTagsScoreSum(termTags) {
+ let result = 0;
+ for (const {score} of termTags) {
+ result += score;
}
+ return result;
}
- _addUniqueTermInfos(definitions, termInfoMap) {
- for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
- let readingMap = termInfoMap.get(expression);
- if (typeof readingMap === 'undefined') {
- readingMap = new Map();
- termInfoMap.set(expression, readingMap);
- }
-
- let termInfo = readingMap.get(reading);
- if (typeof termInfo === 'undefined') {
- termInfo = {
- sourceTerm,
- furiganaSegments,
- termTagsMap: new Map()
- };
- readingMap.set(reading, termInfo);
- }
-
- const {termTagsMap} = termInfo;
- for (const tag of termTags) {
- const {name} = tag;
- if (termTagsMap.has(name)) { continue; }
- termTagsMap.set(name, this._cloneTag(tag));
- }
+ _getSourceTermMatchCountSum(definitions) {
+ let result = 0;
+ for (const {sourceTermExactMatchCount} of definitions) {
+ result += sourceTermExactMatchCount;
}
+ return result;
}
_getMaxDefinitionScore(definitions) {
@@ -915,6 +927,8 @@ class Translator {
return result;
}
+ // Common data creation and cloning functions
+
_cloneTag(tag) {
const {name, category, notes, order, score, dictionary} = tag;
return this._createTag(name, category, notes, order, score, dictionary);
@@ -1147,6 +1161,8 @@ class Translator {
};
}
+ // Sorting functions
+
_sortTags(tags) {
if (tags.length <= 1) { return; }
const stringComparer = this._stringComparer;