diff options
| -rw-r--r-- | ext/bg/js/translator.js | 630 | 
1 files changed, 323 insertions, 307 deletions
| diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 5f91205d..883e035a 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -138,167 +138,21 @@ class Translator {          return definitions;      } -    // Private +    // Find terms core functions -    async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) { -        const sequenceList = []; -        const sequencedDefinitionMap = new Map(); -        const sequencedDefinitions = []; -        const unsequencedDefinitions = []; -        for (const definition of definitions) { -            const {sequence, dictionary} = definition; -            if (mainDictionary === dictionary && sequence >= 0) { -                let sequencedDefinition = sequencedDefinitionMap.get(sequence); -                if (typeof sequencedDefinition === 'undefined') { -                    sequencedDefinition = { -                        sourceDefinitions: [], -                        relatedDefinitions: [] -                    }; -                    sequencedDefinitionMap.set(sequence, sequencedDefinition); -                    sequencedDefinitions.push(sequencedDefinition); -                    sequenceList.push(sequence); -                } -                sequencedDefinition.sourceDefinitions.push(definition); -            } else { -                unsequencedDefinitions.push(definition); -            } -        } - -        if (sequenceList.length > 0) { -            const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary); -            for (const databaseDefinition of databaseDefinitions) { -                const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index]; -                const {expression} = databaseDefinition; -                const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap); -                relatedDefinitions.push(definition); -            } -        } - -        return {sequencedDefinitions, unsequencedDefinitions}; -    } - -    async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) { -        if (secondarySearchDictionaryMap.size === 0) { -            return []; -        } - -        const expressionList = []; -        const readingList = []; -        for (const [expression, readingMap] of expressionsMap.entries()) { -            for (const reading of readingMap.keys()) { -                expressionList.push(expression); -                readingList.push(reading); -            } -        } - -        const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap); -        this._sortDatabaseDefinitionsByIndex(databaseDefinitions); - -        const definitions = []; -        for (const databaseDefinition of databaseDefinitions) { -            const source = expressionList[databaseDefinition.index]; -            const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap); -            definitions.push(definition); -        } - -        return definitions; -    } - -    async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) { -        const {reasons, source, rawSource, dictionary} = sourceDefinitions[0]; -        const score = this._getMaxDefinitionScore(sourceDefinitions); -        const termInfoMap = new Map(); -        const glossaryDefinitions = []; -        const glossaryDefinitionGroupMap = new Map(); - -        this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap); -        this._addUniqueTermInfos(relatedDefinitions, termInfoMap); - -        let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap); -        secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions]; - -        this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions); -        this._removeDuplicateDefinitions(secondaryDefinitions); - -        this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap); - -        const allExpressions = new Set(); -        const allReadings = new Set(); -        for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) { -            for (const expression of expressions) { allExpressions.add(expression); } -            for (const reading of readings) { allReadings.add(reading); } -        } - -        for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) { -            const glossaryDefinition = this._createMergedGlossaryTermDefinition( -                source, -                rawSource, -                definitions2, -                expressions, -                readings, -                allExpressions, -                allReadings -            ); -            glossaryDefinitions.push(glossaryDefinition); -        } - -        this._sortDefinitions(glossaryDefinitions, true); - -        const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap); - -        return this._createMergedTermDefinition( -            source, -            rawSource, -            glossaryDefinitions, -            [...allExpressions], -            [...allReadings], -            termDetailsList, -            reasons, -            dictionary, -            score -        ); -    } - -    _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) { -        for (let i = 0, ii = definitions.length; i < ii; ++i) { -            const definition = definitions[i]; -            const {expression, reading} = definition; -            const expressionMap = termInfoMap.get(expression); -            if ( -                typeof expressionMap !== 'undefined' && -                typeof expressionMap.get(reading) !== 'undefined' -            ) { -                usedDefinitions.add(definition); -            } else { -                definitions.splice(i, 1); -                --i; -                --ii; -            } -        } -    } - -    _getUniqueDefinitionTags(definitions) { -        const definitionTagsMap = new Map(); -        for (const {definitionTags} of definitions) { -            for (const tag of definitionTags) { -                const {name} = tag; -                if (definitionTagsMap.has(name)) { continue; } -                definitionTagsMap.set(name, this._cloneTag(tag)); -            } -        } -        return [...definitionTagsMap.values()]; -    } - -    _getTermTagsScoreSum(termTags) { -        let result = 0; -        for (const {score} of termTags) { result += score; } -        return result; +    async _findTermsSimple(text, options) { +        const {enabledDictionaryMap} = options; +        const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); +        this._sortDefinitions(definitions, false); +        return [definitions, length];      } -    _getSourceTermMatchCountSum(definitions) { -        let result = 0; -        for (const {sourceTermExactMatchCount} of definitions) { result += sourceTermExactMatchCount; } -        return result; +    async _findTermsSplit(text, options) { +        const {enabledDictionaryMap} = options; +        const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); +        await this._buildTermMeta(definitions, enabledDictionaryMap); +        this._sortDefinitions(definitions, true); +        return [definitions, length];      }      async _findTermsGrouped(text, options) { @@ -368,20 +222,7 @@ class Translator {          return [definitionsMerged, length];      } -    async _findTermsSplit(text, options) { -        const {enabledDictionaryMap} = options; -        const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); -        await this._buildTermMeta(definitions, enabledDictionaryMap); -        this._sortDefinitions(definitions, true); -        return [definitions, length]; -    } - -    async _findTermsSimple(text, options) { -        const {enabledDictionaryMap} = options; -        const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); -        this._sortDefinitions(definitions, false); -        return [definitions, length]; -    } +    // Find terms internal implementation      async _findTermsInternal(text, enabledDictionaryMap, options) {          const {alphanumeric, wildcard} = options; @@ -520,14 +361,280 @@ class Translator {          return deinflections;      } -    _getTextOptionEntryVariants(value) { -        switch (value) { -            case 'true': return [true]; -            case 'variant': return [false, true]; -            default: return [false]; +    async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) { +        const sequenceList = []; +        const sequencedDefinitionMap = new Map(); +        const sequencedDefinitions = []; +        const unsequencedDefinitions = []; +        for (const definition of definitions) { +            const {sequence, dictionary} = definition; +            if (mainDictionary === dictionary && sequence >= 0) { +                let sequencedDefinition = sequencedDefinitionMap.get(sequence); +                if (typeof sequencedDefinition === 'undefined') { +                    sequencedDefinition = { +                        sourceDefinitions: [], +                        relatedDefinitions: [] +                    }; +                    sequencedDefinitionMap.set(sequence, sequencedDefinition); +                    sequencedDefinitions.push(sequencedDefinition); +                    sequenceList.push(sequence); +                } +                sequencedDefinition.sourceDefinitions.push(definition); +            } else { +                unsequencedDefinitions.push(definition); +            } +        } + +        if (sequenceList.length > 0) { +            const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary); +            for (const databaseDefinition of databaseDefinitions) { +                const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index]; +                const {expression} = databaseDefinition; +                const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap); +                relatedDefinitions.push(definition); +            } +        } + +        return {sequencedDefinitions, unsequencedDefinitions}; +    } + +    async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) { +        if (secondarySearchDictionaryMap.size === 0) { +            return []; +        } + +        const expressionList = []; +        const readingList = []; +        for (const [expression, readingMap] of expressionsMap.entries()) { +            for (const reading of readingMap.keys()) { +                expressionList.push(expression); +                readingList.push(reading); +            } +        } + +        const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap); +        this._sortDatabaseDefinitionsByIndex(databaseDefinitions); + +        const definitions = []; +        for (const databaseDefinition of databaseDefinitions) { +            const source = expressionList[databaseDefinition.index]; +            const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap); +            definitions.push(definition); +        } + +        return definitions; +    } + +    async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) { +        const {reasons, source, rawSource, dictionary} = sourceDefinitions[0]; +        const score = this._getMaxDefinitionScore(sourceDefinitions); +        const termInfoMap = new Map(); +        const glossaryDefinitions = []; +        const glossaryDefinitionGroupMap = new Map(); + +        this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap); +        this._addUniqueTermInfos(relatedDefinitions, termInfoMap); + +        let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap); +        secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions]; + +        this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions); +        this._removeDuplicateDefinitions(secondaryDefinitions); + +        this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap); + +        const allExpressions = new Set(); +        const allReadings = new Set(); +        for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) { +            for (const expression of expressions) { allExpressions.add(expression); } +            for (const reading of readings) { allReadings.add(reading); } +        } + +        for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) { +            const glossaryDefinition = this._createMergedGlossaryTermDefinition( +                source, +                rawSource, +                definitions2, +                expressions, +                readings, +                allExpressions, +                allReadings +            ); +            glossaryDefinitions.push(glossaryDefinition); +        } + +        this._sortDefinitions(glossaryDefinitions, true); + +        const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap); + +        return this._createMergedTermDefinition( +            source, +            rawSource, +            glossaryDefinitions, +            [...allExpressions], +            [...allReadings], +            termDetailsList, +            reasons, +            dictionary, +            score +        ); +    } + +    _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) { +        for (let i = 0, ii = definitions.length; i < ii; ++i) { +            const definition = definitions[i]; +            const {expression, reading} = definition; +            const expressionMap = termInfoMap.get(expression); +            if ( +                typeof expressionMap !== 'undefined' && +                typeof expressionMap.get(reading) !== 'undefined' +            ) { +                usedDefinitions.add(definition); +            } else { +                definitions.splice(i, 1); +                --i; +                --ii; +            } +        } +    } + +    _getUniqueDefinitionTags(definitions) { +        const definitionTagsMap = new Map(); +        for (const {definitionTags} of definitions) { +            for (const tag of definitionTags) { +                const {name} = tag; +                if (definitionTagsMap.has(name)) { continue; } +                definitionTagsMap.set(name, this._cloneTag(tag)); +            } +        } +        return [...definitionTagsMap.values()]; +    } + +    _removeDuplicateDefinitions(definitions) { +        const definitionGroups = new Map(); +        for (let i = 0, ii = definitions.length; i < ii; ++i) { +            const definition = definitions[i]; +            const {id} = definition; +            const existing = definitionGroups.get(id); +            if (typeof existing === 'undefined') { +                definitionGroups.set(id, [i, definition]); +                continue; +            } + +            let removeIndex = i; +            if (definition.source.length > existing[1].source.length) { +                definitionGroups.set(id, [i, definition]); +                removeIndex = existing[0]; +            } + +            definitions.splice(removeIndex, 1); +            --i; +            --ii; +        } +    } + +    _compressDefinitionTags(definitions) { +        let lastDictionary = ''; +        let lastPartOfSpeech = ''; +        const removeCategoriesSet = new Set(); + +        for (const {definitionTags} of definitions) { +            const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary')); +            const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech')); + +            if (lastDictionary === dictionary) { +                removeCategoriesSet.add('dictionary'); +            } else { +                lastDictionary = dictionary; +                lastPartOfSpeech = ''; +            } + +            if (lastPartOfSpeech === partOfSpeech) { +                removeCategoriesSet.add('partOfSpeech'); +            } else { +                lastPartOfSpeech = partOfSpeech; +            } + +            if (removeCategoriesSet.size > 0) { +                this._removeTagsWithCategory(definitionTags, removeCategoriesSet); +                removeCategoriesSet.clear(); +            } +        } +    } + +    _groupTerms(definitions) { +        const groups = new Map(); +        for (const definition of definitions) { +            const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]); +            let groupDefinitions = groups.get(key); +            if (typeof groupDefinitions === 'undefined') { +                groupDefinitions = []; +                groups.set(key, groupDefinitions); +            } + +            groupDefinitions.push(definition); +        } + +        const results = []; +        for (const groupDefinitions of groups.values()) { +            this._sortDefinitions(groupDefinitions, true); +            const definition = this._createGroupedTermDefinition(groupDefinitions); +            results.push(definition);          } + +        return results;      } +    _mergeByGlossary(definitions, glossaryDefinitionGroupMap) { +        for (const definition of definitions) { +            const {expression, reading, dictionary, glossary} = definition; + +            const key = this._createMapKey([dictionary, ...glossary]); +            let group = glossaryDefinitionGroupMap.get(key); +            if (typeof group === 'undefined') { +                group = { +                    expressions: new Set(), +                    readings: new Set(), +                    definitions: [] +                }; +                glossaryDefinitionGroupMap.set(key, group); +            } + +            group.expressions.add(expression); +            group.readings.add(reading); +            group.definitions.push(definition); +        } +    } + +    _addUniqueTermInfos(definitions, termInfoMap) { +        for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) { +            let readingMap = termInfoMap.get(expression); +            if (typeof readingMap === 'undefined') { +                readingMap = new Map(); +                termInfoMap.set(expression, readingMap); +            } + +            let termInfo = readingMap.get(reading); +            if (typeof termInfo === 'undefined') { +                termInfo = { +                    sourceTerm, +                    furiganaSegments, +                    termTagsMap: new Map() +                }; +                readingMap.set(reading, termInfo); +            } + +            const {termTagsMap} = termInfo; +            for (const tag of termTags) { +                const {name} = tag; +                if (termTagsMap.has(name)) { continue; } +                termTagsMap.set(name, this._cloneTag(tag)); +            } +        } +    } + +    // Metadata building +      async _buildTermMeta(definitions, enabledDictionaryMap) {          const terms = [];          for (const definition of definitions) { @@ -692,6 +799,8 @@ class Translator {          return {reading, pitches, dictionary};      } +    // Simple helpers +      _scoreToTermFrequency(score) {          if (score > 0) {              return 'popular'; @@ -707,26 +816,6 @@ class Translator {          return (pos >= 0 ? name.substring(0, pos) : name);      } -    *_getArrayVariants(arrayVariants) { -        const ii = arrayVariants.length; - -        let total = 1; -        for (let i = 0; i < ii; ++i) { -            total *= arrayVariants[i].length; -        } - -        for (let a = 0; a < total; ++a) { -            const variant = []; -            let index = a; -            for (let i = 0; i < ii; ++i) { -                const entryVariants = arrayVariants[i]; -                variant.push(entryVariants[index % entryVariants.length]); -                index = Math.floor(index / entryVariants.length); -            } -            yield variant; -        } -    } -      _getSearchableText(text, allowAlphanumericCharacters) {          if (allowAlphanumericCharacters) {              return text; @@ -742,6 +831,14 @@ class Translator {          return newText;      } +    _getTextOptionEntryVariants(value) { +        switch (value) { +            case 'true': return [true]; +            case 'variant': return [false, true]; +            default: return [false]; +        } +    } +      _getSecondarySearchDictionaryMap(enabledDictionaryMap) {          const secondarySearchDictionaryMap = new Map();          for (const [title, dictionary] of enabledDictionaryMap.entries()) { @@ -756,58 +853,6 @@ class Translator {          return typeof info !== 'undefined' ? info.priority : 0;      } -    _removeDuplicateDefinitions(definitions) { -        const definitionGroups = new Map(); -        for (let i = 0, ii = definitions.length; i < ii; ++i) { -            const definition = definitions[i]; -            const {id} = definition; -            const existing = definitionGroups.get(id); -            if (typeof existing === 'undefined') { -                definitionGroups.set(id, [i, definition]); -                continue; -            } - -            let removeIndex = i; -            if (definition.source.length > existing[1].source.length) { -                definitionGroups.set(id, [i, definition]); -                removeIndex = existing[0]; -            } - -            definitions.splice(removeIndex, 1); -            --i; -            --ii; -        } -    } - -    _compressDefinitionTags(definitions) { -        let lastDictionary = ''; -        let lastPartOfSpeech = ''; -        const removeCategoriesSet = new Set(); - -        for (const {definitionTags} of definitions) { -            const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary')); -            const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech')); - -            if (lastDictionary === dictionary) { -                removeCategoriesSet.add('dictionary'); -            } else { -                lastDictionary = dictionary; -                lastPartOfSpeech = ''; -            } - -            if (lastPartOfSpeech === partOfSpeech) { -                removeCategoriesSet.add('partOfSpeech'); -            } else { -                lastPartOfSpeech = partOfSpeech; -            } - -            if (removeCategoriesSet.size > 0) { -                this._removeTagsWithCategory(definitionTags, removeCategoriesSet); -                removeCategoriesSet.clear(); -            } -        } -    } -      _getTagNamesWithCategory(tags, category) {          const results = [];          for (const tag of tags) { @@ -828,75 +873,42 @@ class Translator {          }      } -    _groupTerms(definitions) { -        const groups = new Map(); -        for (const definition of definitions) { -            const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]); -            let groupDefinitions = groups.get(key); -            if (typeof groupDefinitions === 'undefined') { -                groupDefinitions = []; -                groups.set(key, groupDefinitions); -            } +    *_getArrayVariants(arrayVariants) { +        const ii = arrayVariants.length; -            groupDefinitions.push(definition); +        let total = 1; +        for (let i = 0; i < ii; ++i) { +            total *= arrayVariants[i].length;          } -        const results = []; -        for (const groupDefinitions of groups.values()) { -            this._sortDefinitions(groupDefinitions, true); -            const definition = this._createGroupedTermDefinition(groupDefinitions); -            results.push(definition); +        for (let a = 0; a < total; ++a) { +            const variant = []; +            let index = a; +            for (let i = 0; i < ii; ++i) { +                const entryVariants = arrayVariants[i]; +                variant.push(entryVariants[index % entryVariants.length]); +                index = Math.floor(index / entryVariants.length); +            } +            yield variant;          } - -        return results;      } -    _mergeByGlossary(definitions, glossaryDefinitionGroupMap) { -        for (const definition of definitions) { -            const {expression, reading, dictionary, glossary} = definition; +    // Reduction functions -            const key = this._createMapKey([dictionary, ...glossary]); -            let group = glossaryDefinitionGroupMap.get(key); -            if (typeof group === 'undefined') { -                group = { -                    expressions: new Set(), -                    readings: new Set(), -                    definitions: [] -                }; -                glossaryDefinitionGroupMap.set(key, group); -            } - -            group.expressions.add(expression); -            group.readings.add(reading); -            group.definitions.push(definition); +    _getTermTagsScoreSum(termTags) { +        let result = 0; +        for (const {score} of termTags) { +            result += score;          } +        return result;      } -    _addUniqueTermInfos(definitions, termInfoMap) { -        for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) { -            let readingMap = termInfoMap.get(expression); -            if (typeof readingMap === 'undefined') { -                readingMap = new Map(); -                termInfoMap.set(expression, readingMap); -            } - -            let termInfo = readingMap.get(reading); -            if (typeof termInfo === 'undefined') { -                termInfo = { -                    sourceTerm, -                    furiganaSegments, -                    termTagsMap: new Map() -                }; -                readingMap.set(reading, termInfo); -            } - -            const {termTagsMap} = termInfo; -            for (const tag of termTags) { -                const {name} = tag; -                if (termTagsMap.has(name)) { continue; } -                termTagsMap.set(name, this._cloneTag(tag)); -            } +    _getSourceTermMatchCountSum(definitions) { +        let result = 0; +        for (const {sourceTermExactMatchCount} of definitions) { +            result += sourceTermExactMatchCount;          } +        return result;      }      _getMaxDefinitionScore(definitions) { @@ -915,6 +927,8 @@ class Translator {          return result;      } +    // Common data creation and cloning functions +      _cloneTag(tag) {          const {name, category, notes, order, score, dictionary} = tag;          return this._createTag(name, category, notes, order, score, dictionary); @@ -1147,6 +1161,8 @@ class Translator {          };      } +    // Sorting functions +      _sortTags(tags) {          if (tags.length <= 1) { return; }          const stringComparer = this._stringComparer; |