diff options
Diffstat (limited to 'ext/bg/js')
-rw-r--r-- | ext/bg/js/translator.js | 630 |
1 files changed, 323 insertions, 307 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 5f91205d..883e035a 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -138,167 +138,21 @@ class Translator { return definitions; } - // Private + // Find terms core functions - async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) { - const sequenceList = []; - const sequencedDefinitionMap = new Map(); - const sequencedDefinitions = []; - const unsequencedDefinitions = []; - for (const definition of definitions) { - const {sequence, dictionary} = definition; - if (mainDictionary === dictionary && sequence >= 0) { - let sequencedDefinition = sequencedDefinitionMap.get(sequence); - if (typeof sequencedDefinition === 'undefined') { - sequencedDefinition = { - sourceDefinitions: [], - relatedDefinitions: [] - }; - sequencedDefinitionMap.set(sequence, sequencedDefinition); - sequencedDefinitions.push(sequencedDefinition); - sequenceList.push(sequence); - } - sequencedDefinition.sourceDefinitions.push(definition); - } else { - unsequencedDefinitions.push(definition); - } - } - - if (sequenceList.length > 0) { - const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary); - for (const databaseDefinition of databaseDefinitions) { - const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index]; - const {expression} = databaseDefinition; - const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap); - relatedDefinitions.push(definition); - } - } - - return {sequencedDefinitions, unsequencedDefinitions}; - } - - async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) { - if (secondarySearchDictionaryMap.size === 0) { - return []; - } - - const expressionList = []; - const readingList = []; - for (const [expression, readingMap] of expressionsMap.entries()) { - for (const reading of readingMap.keys()) { - expressionList.push(expression); - readingList.push(reading); - } - } - - const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap); - this._sortDatabaseDefinitionsByIndex(databaseDefinitions); - - const definitions = []; - for (const databaseDefinition of databaseDefinitions) { - const source = expressionList[databaseDefinition.index]; - const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap); - definitions.push(definition); - } - - return definitions; - } - - async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) { - const {reasons, source, rawSource, dictionary} = sourceDefinitions[0]; - const score = this._getMaxDefinitionScore(sourceDefinitions); - const termInfoMap = new Map(); - const glossaryDefinitions = []; - const glossaryDefinitionGroupMap = new Map(); - - this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap); - this._addUniqueTermInfos(relatedDefinitions, termInfoMap); - - let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap); - secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions]; - - this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions); - this._removeDuplicateDefinitions(secondaryDefinitions); - - this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap); - - const allExpressions = new Set(); - const allReadings = new Set(); - for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) { - for (const expression of expressions) { allExpressions.add(expression); } - for (const reading of readings) { allReadings.add(reading); } - } - - for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) { - const glossaryDefinition = this._createMergedGlossaryTermDefinition( - source, - rawSource, - definitions2, - expressions, - readings, - allExpressions, - allReadings - ); - glossaryDefinitions.push(glossaryDefinition); - } - - this._sortDefinitions(glossaryDefinitions, true); - - const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap); - - return this._createMergedTermDefinition( - source, - rawSource, - glossaryDefinitions, - [...allExpressions], - [...allReadings], - termDetailsList, - reasons, - dictionary, - score - ); - } - - _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) { - for (let i = 0, ii = definitions.length; i < ii; ++i) { - const definition = definitions[i]; - const {expression, reading} = definition; - const expressionMap = termInfoMap.get(expression); - if ( - typeof expressionMap !== 'undefined' && - typeof expressionMap.get(reading) !== 'undefined' - ) { - usedDefinitions.add(definition); - } else { - definitions.splice(i, 1); - --i; - --ii; - } - } - } - - _getUniqueDefinitionTags(definitions) { - const definitionTagsMap = new Map(); - for (const {definitionTags} of definitions) { - for (const tag of definitionTags) { - const {name} = tag; - if (definitionTagsMap.has(name)) { continue; } - definitionTagsMap.set(name, this._cloneTag(tag)); - } - } - return [...definitionTagsMap.values()]; - } - - _getTermTagsScoreSum(termTags) { - let result = 0; - for (const {score} of termTags) { result += score; } - return result; + async _findTermsSimple(text, options) { + const {enabledDictionaryMap} = options; + const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); + this._sortDefinitions(definitions, false); + return [definitions, length]; } - _getSourceTermMatchCountSum(definitions) { - let result = 0; - for (const {sourceTermExactMatchCount} of definitions) { result += sourceTermExactMatchCount; } - return result; + async _findTermsSplit(text, options) { + const {enabledDictionaryMap} = options; + const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); + await this._buildTermMeta(definitions, enabledDictionaryMap); + this._sortDefinitions(definitions, true); + return [definitions, length]; } async _findTermsGrouped(text, options) { @@ -368,20 +222,7 @@ class Translator { return [definitionsMerged, length]; } - async _findTermsSplit(text, options) { - const {enabledDictionaryMap} = options; - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); - await this._buildTermMeta(definitions, enabledDictionaryMap); - this._sortDefinitions(definitions, true); - return [definitions, length]; - } - - async _findTermsSimple(text, options) { - const {enabledDictionaryMap} = options; - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); - this._sortDefinitions(definitions, false); - return [definitions, length]; - } + // Find terms internal implementation async _findTermsInternal(text, enabledDictionaryMap, options) { const {alphanumeric, wildcard} = options; @@ -520,14 +361,280 @@ class Translator { return deinflections; } - _getTextOptionEntryVariants(value) { - switch (value) { - case 'true': return [true]; - case 'variant': return [false, true]; - default: return [false]; + async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) { + const sequenceList = []; + const sequencedDefinitionMap = new Map(); + const sequencedDefinitions = []; + const unsequencedDefinitions = []; + for (const definition of definitions) { + const {sequence, dictionary} = definition; + if (mainDictionary === dictionary && sequence >= 0) { + let sequencedDefinition = sequencedDefinitionMap.get(sequence); + if (typeof sequencedDefinition === 'undefined') { + sequencedDefinition = { + sourceDefinitions: [], + relatedDefinitions: [] + }; + sequencedDefinitionMap.set(sequence, sequencedDefinition); + sequencedDefinitions.push(sequencedDefinition); + sequenceList.push(sequence); + } + sequencedDefinition.sourceDefinitions.push(definition); + } else { + unsequencedDefinitions.push(definition); + } + } + + if (sequenceList.length > 0) { + const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary); + for (const databaseDefinition of databaseDefinitions) { + const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index]; + const {expression} = databaseDefinition; + const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap); + relatedDefinitions.push(definition); + } + } + + return {sequencedDefinitions, unsequencedDefinitions}; + } + + async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) { + if (secondarySearchDictionaryMap.size === 0) { + return []; + } + + const expressionList = []; + const readingList = []; + for (const [expression, readingMap] of expressionsMap.entries()) { + for (const reading of readingMap.keys()) { + expressionList.push(expression); + readingList.push(reading); + } + } + + const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap); + this._sortDatabaseDefinitionsByIndex(databaseDefinitions); + + const definitions = []; + for (const databaseDefinition of databaseDefinitions) { + const source = expressionList[databaseDefinition.index]; + const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap); + definitions.push(definition); + } + + return definitions; + } + + async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) { + const {reasons, source, rawSource, dictionary} = sourceDefinitions[0]; + const score = this._getMaxDefinitionScore(sourceDefinitions); + const termInfoMap = new Map(); + const glossaryDefinitions = []; + const glossaryDefinitionGroupMap = new Map(); + + this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap); + this._addUniqueTermInfos(relatedDefinitions, termInfoMap); + + let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap); + secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions]; + + this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions); + this._removeDuplicateDefinitions(secondaryDefinitions); + + this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap); + + const allExpressions = new Set(); + const allReadings = new Set(); + for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) { + for (const expression of expressions) { allExpressions.add(expression); } + for (const reading of readings) { allReadings.add(reading); } + } + + for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) { + const glossaryDefinition = this._createMergedGlossaryTermDefinition( + source, + rawSource, + definitions2, + expressions, + readings, + allExpressions, + allReadings + ); + glossaryDefinitions.push(glossaryDefinition); + } + + this._sortDefinitions(glossaryDefinitions, true); + + const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap); + + return this._createMergedTermDefinition( + source, + rawSource, + glossaryDefinitions, + [...allExpressions], + [...allReadings], + termDetailsList, + reasons, + dictionary, + score + ); + } + + _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) { + for (let i = 0, ii = definitions.length; i < ii; ++i) { + const definition = definitions[i]; + const {expression, reading} = definition; + const expressionMap = termInfoMap.get(expression); + if ( + typeof expressionMap !== 'undefined' && + typeof expressionMap.get(reading) !== 'undefined' + ) { + usedDefinitions.add(definition); + } else { + definitions.splice(i, 1); + --i; + --ii; + } + } + } + + _getUniqueDefinitionTags(definitions) { + const definitionTagsMap = new Map(); + for (const {definitionTags} of definitions) { + for (const tag of definitionTags) { + const {name} = tag; + if (definitionTagsMap.has(name)) { continue; } + definitionTagsMap.set(name, this._cloneTag(tag)); + } + } + return [...definitionTagsMap.values()]; + } + + _removeDuplicateDefinitions(definitions) { + const definitionGroups = new Map(); + for (let i = 0, ii = definitions.length; i < ii; ++i) { + const definition = definitions[i]; + const {id} = definition; + const existing = definitionGroups.get(id); + if (typeof existing === 'undefined') { + definitionGroups.set(id, [i, definition]); + continue; + } + + let removeIndex = i; + if (definition.source.length > existing[1].source.length) { + definitionGroups.set(id, [i, definition]); + removeIndex = existing[0]; + } + + definitions.splice(removeIndex, 1); + --i; + --ii; + } + } + + _compressDefinitionTags(definitions) { + let lastDictionary = ''; + let lastPartOfSpeech = ''; + const removeCategoriesSet = new Set(); + + for (const {definitionTags} of definitions) { + const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary')); + const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech')); + + if (lastDictionary === dictionary) { + removeCategoriesSet.add('dictionary'); + } else { + lastDictionary = dictionary; + lastPartOfSpeech = ''; + } + + if (lastPartOfSpeech === partOfSpeech) { + removeCategoriesSet.add('partOfSpeech'); + } else { + lastPartOfSpeech = partOfSpeech; + } + + if (removeCategoriesSet.size > 0) { + this._removeTagsWithCategory(definitionTags, removeCategoriesSet); + removeCategoriesSet.clear(); + } + } + } + + _groupTerms(definitions) { + const groups = new Map(); + for (const definition of definitions) { + const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]); + let groupDefinitions = groups.get(key); + if (typeof groupDefinitions === 'undefined') { + groupDefinitions = []; + groups.set(key, groupDefinitions); + } + + groupDefinitions.push(definition); + } + + const results = []; + for (const groupDefinitions of groups.values()) { + this._sortDefinitions(groupDefinitions, true); + const definition = this._createGroupedTermDefinition(groupDefinitions); + results.push(definition); } + + return results; } + _mergeByGlossary(definitions, glossaryDefinitionGroupMap) { + for (const definition of definitions) { + const {expression, reading, dictionary, glossary} = definition; + + const key = this._createMapKey([dictionary, ...glossary]); + let group = glossaryDefinitionGroupMap.get(key); + if (typeof group === 'undefined') { + group = { + expressions: new Set(), + readings: new Set(), + definitions: [] + }; + glossaryDefinitionGroupMap.set(key, group); + } + + group.expressions.add(expression); + group.readings.add(reading); + group.definitions.push(definition); + } + } + + _addUniqueTermInfos(definitions, termInfoMap) { + for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) { + let readingMap = termInfoMap.get(expression); + if (typeof readingMap === 'undefined') { + readingMap = new Map(); + termInfoMap.set(expression, readingMap); + } + + let termInfo = readingMap.get(reading); + if (typeof termInfo === 'undefined') { + termInfo = { + sourceTerm, + furiganaSegments, + termTagsMap: new Map() + }; + readingMap.set(reading, termInfo); + } + + const {termTagsMap} = termInfo; + for (const tag of termTags) { + const {name} = tag; + if (termTagsMap.has(name)) { continue; } + termTagsMap.set(name, this._cloneTag(tag)); + } + } + } + + // Metadata building + async _buildTermMeta(definitions, enabledDictionaryMap) { const terms = []; for (const definition of definitions) { @@ -692,6 +799,8 @@ class Translator { return {reading, pitches, dictionary}; } + // Simple helpers + _scoreToTermFrequency(score) { if (score > 0) { return 'popular'; @@ -707,26 +816,6 @@ class Translator { return (pos >= 0 ? name.substring(0, pos) : name); } - *_getArrayVariants(arrayVariants) { - const ii = arrayVariants.length; - - let total = 1; - for (let i = 0; i < ii; ++i) { - total *= arrayVariants[i].length; - } - - for (let a = 0; a < total; ++a) { - const variant = []; - let index = a; - for (let i = 0; i < ii; ++i) { - const entryVariants = arrayVariants[i]; - variant.push(entryVariants[index % entryVariants.length]); - index = Math.floor(index / entryVariants.length); - } - yield variant; - } - } - _getSearchableText(text, allowAlphanumericCharacters) { if (allowAlphanumericCharacters) { return text; @@ -742,6 +831,14 @@ class Translator { return newText; } + _getTextOptionEntryVariants(value) { + switch (value) { + case 'true': return [true]; + case 'variant': return [false, true]; + default: return [false]; + } + } + _getSecondarySearchDictionaryMap(enabledDictionaryMap) { const secondarySearchDictionaryMap = new Map(); for (const [title, dictionary] of enabledDictionaryMap.entries()) { @@ -756,58 +853,6 @@ class Translator { return typeof info !== 'undefined' ? info.priority : 0; } - _removeDuplicateDefinitions(definitions) { - const definitionGroups = new Map(); - for (let i = 0, ii = definitions.length; i < ii; ++i) { - const definition = definitions[i]; - const {id} = definition; - const existing = definitionGroups.get(id); - if (typeof existing === 'undefined') { - definitionGroups.set(id, [i, definition]); - continue; - } - - let removeIndex = i; - if (definition.source.length > existing[1].source.length) { - definitionGroups.set(id, [i, definition]); - removeIndex = existing[0]; - } - - definitions.splice(removeIndex, 1); - --i; - --ii; - } - } - - _compressDefinitionTags(definitions) { - let lastDictionary = ''; - let lastPartOfSpeech = ''; - const removeCategoriesSet = new Set(); - - for (const {definitionTags} of definitions) { - const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary')); - const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech')); - - if (lastDictionary === dictionary) { - removeCategoriesSet.add('dictionary'); - } else { - lastDictionary = dictionary; - lastPartOfSpeech = ''; - } - - if (lastPartOfSpeech === partOfSpeech) { - removeCategoriesSet.add('partOfSpeech'); - } else { - lastPartOfSpeech = partOfSpeech; - } - - if (removeCategoriesSet.size > 0) { - this._removeTagsWithCategory(definitionTags, removeCategoriesSet); - removeCategoriesSet.clear(); - } - } - } - _getTagNamesWithCategory(tags, category) { const results = []; for (const tag of tags) { @@ -828,75 +873,42 @@ class Translator { } } - _groupTerms(definitions) { - const groups = new Map(); - for (const definition of definitions) { - const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]); - let groupDefinitions = groups.get(key); - if (typeof groupDefinitions === 'undefined') { - groupDefinitions = []; - groups.set(key, groupDefinitions); - } + *_getArrayVariants(arrayVariants) { + const ii = arrayVariants.length; - groupDefinitions.push(definition); + let total = 1; + for (let i = 0; i < ii; ++i) { + total *= arrayVariants[i].length; } - const results = []; - for (const groupDefinitions of groups.values()) { - this._sortDefinitions(groupDefinitions, true); - const definition = this._createGroupedTermDefinition(groupDefinitions); - results.push(definition); + for (let a = 0; a < total; ++a) { + const variant = []; + let index = a; + for (let i = 0; i < ii; ++i) { + const entryVariants = arrayVariants[i]; + variant.push(entryVariants[index % entryVariants.length]); + index = Math.floor(index / entryVariants.length); + } + yield variant; } - - return results; } - _mergeByGlossary(definitions, glossaryDefinitionGroupMap) { - for (const definition of definitions) { - const {expression, reading, dictionary, glossary} = definition; + // Reduction functions - const key = this._createMapKey([dictionary, ...glossary]); - let group = glossaryDefinitionGroupMap.get(key); - if (typeof group === 'undefined') { - group = { - expressions: new Set(), - readings: new Set(), - definitions: [] - }; - glossaryDefinitionGroupMap.set(key, group); - } - - group.expressions.add(expression); - group.readings.add(reading); - group.definitions.push(definition); + _getTermTagsScoreSum(termTags) { + let result = 0; + for (const {score} of termTags) { + result += score; } + return result; } - _addUniqueTermInfos(definitions, termInfoMap) { - for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) { - let readingMap = termInfoMap.get(expression); - if (typeof readingMap === 'undefined') { - readingMap = new Map(); - termInfoMap.set(expression, readingMap); - } - - let termInfo = readingMap.get(reading); - if (typeof termInfo === 'undefined') { - termInfo = { - sourceTerm, - furiganaSegments, - termTagsMap: new Map() - }; - readingMap.set(reading, termInfo); - } - - const {termTagsMap} = termInfo; - for (const tag of termTags) { - const {name} = tag; - if (termTagsMap.has(name)) { continue; } - termTagsMap.set(name, this._cloneTag(tag)); - } + _getSourceTermMatchCountSum(definitions) { + let result = 0; + for (const {sourceTermExactMatchCount} of definitions) { + result += sourceTermExactMatchCount; } + return result; } _getMaxDefinitionScore(definitions) { @@ -915,6 +927,8 @@ class Translator { return result; } + // Common data creation and cloning functions + _cloneTag(tag) { const {name, category, notes, order, score, dictionary} = tag; return this._createTag(name, category, notes, order, score, dictionary); @@ -1147,6 +1161,8 @@ class Translator { }; } + // Sorting functions + _sortTags(tags) { if (tags.length <= 1) { return; } const stringComparer = this._stringComparer; |