aboutsummaryrefslogtreecommitdiff
path: root/ext/bg/js/translator.js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r--ext/bg/js/translator.js1397
1 files changed, 0 insertions, 1397 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
deleted file mode 100644
index 729c8294..00000000
--- a/ext/bg/js/translator.js
+++ /dev/null
@@ -1,1397 +0,0 @@
-/*
- * Copyright (C) 2016-2021 Yomichan Authors
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-/* global
- * Deinflector
- * TextSourceMap
- */
-
-/**
- * Class which finds term and kanji definitions for text.
- */
-class Translator {
- /**
- * Creates a new Translator instance.
- * @param database An instance of DictionaryDatabase.
- */
- constructor({japaneseUtil, database}) {
- this._japaneseUtil = japaneseUtil;
- this._database = database;
- this._deinflector = null;
- this._tagCache = new Map();
- this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
- }
-
- /**
- * Initializes the instance for use. The public API should not be used until
- * this function has been called.
- * @param deinflectionReasons The raw deinflections reasons data that the Deinflector uses.
- */
- prepare(deinflectionReasons) {
- this._deinflector = new Deinflector(deinflectionReasons);
- }
-
- /**
- * Clears the database tag cache. This should be executed if the database is changed.
- */
- clearDatabaseCaches() {
- this._tagCache.clear();
- }
-
- /**
- * Finds term definitions for the given text.
- * @param mode The mode to use for finding terms, which determines the format of the resulting array.
- * One of: 'group', 'merge', 'split', 'simple'
- * @param text The text to find terms for.
- * @param options An object using the following structure:
- * {
- * wildcard: (enum: null, 'prefix', 'suffix'),
- * mainDictionary: (string),
- * alphanumeric: (boolean),
- * convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'),
- * convertNumericCharacters: (enum: 'false', 'true', 'variant'),
- * convertAlphabeticCharacters: (enum: 'false', 'true', 'variant'),
- * convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'),
- * convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'),
- * collapseEmphaticSequences: (enum: 'false', 'true', 'full'),
- * textReplacements: [
- * (null or [
- * {pattern: (RegExp), replacement: (string)}
- * ...
- * ])
- * ...
- * ],
- * enabledDictionaryMap: (Map of [
- * (string),
- * {
- * priority: (number),
- * allowSecondarySearches: (boolean)
- * }
- * ])
- * }
- * @returns An array of [definitions, textLength]. The structure of each definition depends on the
- * mode parameter, see the _create?TermDefinition?() functions for structure details.
- */
- async findTerms(mode, text, options) {
- switch (mode) {
- case 'group':
- return await this._findTermsGrouped(text, options);
- case 'merge':
- return await this._findTermsMerged(text, options);
- case 'split':
- return await this._findTermsSplit(text, options);
- case 'simple':
- return await this._findTermsSimple(text, options);
- default:
- return [[], 0];
- }
- }
-
- /**
- * Finds kanji definitions for the given text.
- * @param text The text to find kanji definitions for. This string can be of any length,
- * but is typically just one character, which is a single kanji. If the string is multiple
- * characters long, each character will be searched in the database.
- * @param options An object using the following structure:
- * {
- * enabledDictionaryMap: (Map of [
- * (string),
- * {
- * priority: (number)
- * }
- * ])
- * }
- * @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
- */
- async findKanji(text, options) {
- const {enabledDictionaryMap} = options;
- const kanjiUnique = new Set();
- for (const c of text) {
- kanjiUnique.add(c);
- }
-
- const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap);
- if (databaseDefinitions.length === 0) { return []; }
-
- this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
-
- const definitions = [];
- for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
- const expandedStats = await this._expandStats(stats, dictionary);
- const expandedTags = await this._expandTags(tags, dictionary);
- this._sortTags(expandedTags);
-
- const definition = this._createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, expandedTags, expandedStats);
- definitions.push(definition);
- }
-
- await this._buildKanjiMeta(definitions, enabledDictionaryMap);
-
- return definitions;
- }
-
- // Find terms core functions
-
- async _findTermsSimple(text, options) {
- const {enabledDictionaryMap} = options;
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
- this._sortDefinitions(definitions, false);
- return [definitions, length];
- }
-
- async _findTermsSplit(text, options) {
- const {enabledDictionaryMap} = options;
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
- await this._buildTermMeta(definitions, enabledDictionaryMap);
- this._sortDefinitions(definitions, true);
- return [definitions, length];
- }
-
- async _findTermsGrouped(text, options) {
- const {enabledDictionaryMap} = options;
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
-
- const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
- await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
- this._sortDefinitions(groupedDefinitions, false);
-
- for (const definition of groupedDefinitions) {
- this._flagRedundantDefinitionTags(definition.definitions);
- }
-
- return [groupedDefinitions, length];
- }
-
- async _findTermsMerged(text, options) {
- const {mainDictionary, enabledDictionaryMap} = options;
- const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
-
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
- const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
- const definitionsMerged = [];
- const usedDefinitions = new Set();
-
- for (const {sourceDefinitions, relatedDefinitions} of sequencedDefinitions) {
- const result = await this._getMergedDefinition(
- sourceDefinitions,
- relatedDefinitions,
- unsequencedDefinitions,
- secondarySearchDictionaryMap,
- usedDefinitions
- );
- definitionsMerged.push(result);
- }
-
- const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
- for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
- const {reasons, score, expression, reading, source, rawSource, sourceTerm, furiganaSegments, termTags, definitions: definitions2} = groupedDefinition;
- const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
- const compatibilityDefinition = this._createMergedTermDefinition(
- source,
- rawSource,
- this._convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions2),
- [expression],
- [reading],
- termDetailsList,
- reasons,
- score
- );
- definitionsMerged.push(compatibilityDefinition);
- }
-
- await this._buildTermMeta(definitionsMerged, enabledDictionaryMap);
- this._sortDefinitions(definitionsMerged, false);
-
- for (const definition of definitionsMerged) {
- this._flagRedundantDefinitionTags(definition.definitions);
- }
-
- return [definitionsMerged, length];
- }
-
- // Find terms internal implementation
-
- async _findTermsInternal(text, enabledDictionaryMap, options) {
- const {alphanumeric, wildcard} = options;
- text = this._getSearchableText(text, alphanumeric);
- if (text.length === 0) {
- return [[], 0];
- }
-
- const deinflections = (
- wildcard ?
- await this._findTermWildcard(text, enabledDictionaryMap, wildcard) :
- await this._findTermDeinflections(text, enabledDictionaryMap, options)
- );
-
- let maxLength = 0;
- const definitions = [];
- for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) {
- if (databaseDefinitions.length === 0) { continue; }
- maxLength = Math.max(maxLength, rawSource.length);
- for (const databaseDefinition of databaseDefinitions) {
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, enabledDictionaryMap);
- definitions.push(definition);
- }
- }
-
- this._removeDuplicateDefinitions(definitions);
- return [definitions, maxLength];
- }
-
- async _findTermWildcard(text, enabledDictionaryMap, wildcard) {
- const databaseDefinitions = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard);
- if (databaseDefinitions.length === 0) {
- return [];
- }
-
- return [{
- source: text,
- rawSource: text,
- term: text,
- rules: 0,
- reasons: [],
- databaseDefinitions
- }];
- }
-
- async _findTermDeinflections(text, enabledDictionaryMap, options) {
- const deinflections = this._getAllDeinflections(text, options);
-
- if (deinflections.length === 0) {
- return [];
- }
-
- const uniqueDeinflectionTerms = [];
- const uniqueDeinflectionArrays = [];
- const uniqueDeinflectionsMap = new Map();
- for (const deinflection of deinflections) {
- const term = deinflection.term;
- let deinflectionArray = uniqueDeinflectionsMap.get(term);
- if (typeof deinflectionArray === 'undefined') {
- deinflectionArray = [];
- uniqueDeinflectionTerms.push(term);
- uniqueDeinflectionArrays.push(deinflectionArray);
- uniqueDeinflectionsMap.set(term, deinflectionArray);
- }
- deinflectionArray.push(deinflection);
- }
-
- const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null);
-
- for (const databaseDefinition of databaseDefinitions) {
- const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
- for (const deinflection of uniqueDeinflectionArrays[databaseDefinition.index]) {
- const deinflectionRules = deinflection.rules;
- if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
- deinflection.databaseDefinitions.push(databaseDefinition);
- }
- }
- }
-
- return deinflections;
- }
-
- _getAllDeinflections(text, options) {
- const textOptionVariantArray = [
- this._getTextReplacementsVariants(options),
- this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
- this._getTextOptionEntryVariants(options.convertNumericCharacters),
- this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
- this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
- this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
- this._getCollapseEmphaticOptions(options)
- ];
-
- const jp = this._japaneseUtil;
- const deinflections = [];
- const used = new Set();
- for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
- let text2 = text;
- const sourceMap = new TextSourceMap(text2);
- if (textReplacements !== null) {
- text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
- }
- if (halfWidth) {
- text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
- }
- if (numeric) {
- text2 = jp.convertNumericToFullWidth(text2);
- }
- if (alphabetic) {
- text2 = jp.convertAlphabeticToKana(text2, sourceMap);
- }
- if (katakana) {
- text2 = jp.convertHiraganaToKatakana(text2);
- }
- if (hiragana) {
- text2 = jp.convertKatakanaToHiragana(text2);
- }
- if (collapseEmphatic) {
- text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
- }
-
- for (let i = text2.length; i > 0; --i) {
- const text2Substring = text2.substring(0, i);
- if (used.has(text2Substring)) { break; }
- used.add(text2Substring);
- const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
- for (const deinflection of this._deinflector.deinflect(text2Substring, rawSource)) {
- deinflections.push(deinflection);
- }
- }
- }
- return deinflections;
- }
-
- async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
- const sequenceList = [];
- const sequencedDefinitionMap = new Map();
- const sequencedDefinitions = [];
- const unsequencedDefinitions = [];
- for (const definition of definitions) {
- const {sequence, dictionary} = definition;
- if (mainDictionary === dictionary && sequence >= 0) {
- let sequencedDefinition = sequencedDefinitionMap.get(sequence);
- if (typeof sequencedDefinition === 'undefined') {
- sequencedDefinition = {
- sourceDefinitions: [],
- relatedDefinitions: [],
- relatedDefinitionIds: new Set()
- };
- sequencedDefinitionMap.set(sequence, sequencedDefinition);
- sequencedDefinitions.push(sequencedDefinition);
- sequenceList.push(sequence);
- }
- sequencedDefinition.sourceDefinitions.push(definition);
- sequencedDefinition.relatedDefinitions.push(definition);
- sequencedDefinition.relatedDefinitionIds.add(definition.id);
- } else {
- unsequencedDefinitions.push(definition);
- }
- }
-
- if (sequenceList.length > 0) {
- const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
- for (const databaseDefinition of databaseDefinitions) {
- const {relatedDefinitions, relatedDefinitionIds} = sequencedDefinitions[databaseDefinition.index];
- const {id} = databaseDefinition;
- if (relatedDefinitionIds.has(id)) { continue; }
-
- const {source, rawSource, sourceTerm} = relatedDefinitions[0];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, [], enabledDictionaryMap);
- relatedDefinitions.push(definition);
- }
- }
-
- for (const {relatedDefinitions} of sequencedDefinitions) {
- this._sortDefinitionsById(relatedDefinitions);
- }
-
- return {sequencedDefinitions, unsequencedDefinitions};
- }
-
- async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
- if (secondarySearchDictionaryMap.size === 0) {
- return [];
- }
-
- const expressionList = [];
- const readingList = [];
- for (const [expression, readingMap] of expressionsMap.entries()) {
- for (const reading of readingMap.keys()) {
- expressionList.push(expression);
- readingList.push(reading);
- }
- }
-
- const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
- this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
-
- const definitions = [];
- for (const databaseDefinition of databaseDefinitions) {
- const source = expressionList[databaseDefinition.index];
- const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap);
- definitions.push(definition);
- }
-
- return definitions;
- }
-
- async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
- const {reasons, source, rawSource} = sourceDefinitions[0];
- const score = this._getMaxDefinitionScore(sourceDefinitions);
- const termInfoMap = new Map();
- const glossaryDefinitions = [];
- const glossaryDefinitionGroupMap = new Map();
-
- this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
- this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
-
- let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
- secondaryDefinitions = [...unsequencedDefinitions, ...secondaryDefinitions];
-
- this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
- this._removeDuplicateDefinitions(secondaryDefinitions);
-
- this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
-
- const allExpressions = new Set();
- const allReadings = new Set();
- for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
- for (const expression of expressions) { allExpressions.add(expression); }
- for (const reading of readings) { allReadings.add(reading); }
- }
-
- for (const {expressions, readings, definitions} of glossaryDefinitionGroupMap.values()) {
- const glossaryDefinition = this._createMergedGlossaryTermDefinition(
- source,
- rawSource,
- definitions,
- expressions,
- readings,
- allExpressions,
- allReadings
- );
- glossaryDefinitions.push(glossaryDefinition);
- }
-
- this._sortDefinitions(glossaryDefinitions, true);
-
- const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
-
- return this._createMergedTermDefinition(
- source,
- rawSource,
- glossaryDefinitions,
- [...allExpressions],
- [...allReadings],
- termDetailsList,
- reasons,
- score
- );
- }
-
- _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
- for (let i = 0, ii = definitions.length; i < ii; ++i) {
- const definition = definitions[i];
- const {expression, reading} = definition;
- const expressionMap = termInfoMap.get(expression);
- if (
- typeof expressionMap !== 'undefined' &&
- typeof expressionMap.get(reading) !== 'undefined'
- ) {
- usedDefinitions.add(definition);
- } else {
- definitions.splice(i, 1);
- --i;
- --ii;
- }
- }
- }
-
- _getUniqueDefinitionTags(definitions) {
- const definitionTagsMap = new Map();
- for (const {definitionTags} of definitions) {
- for (const tag of definitionTags) {
- const {name} = tag;
- if (definitionTagsMap.has(name)) { continue; }
- definitionTagsMap.set(name, this._cloneTag(tag));
- }
- }
- return [...definitionTagsMap.values()];
- }
-
- _removeDuplicateDefinitions(definitions) {
- const definitionGroups = new Map();
- for (let i = 0, ii = definitions.length; i < ii; ++i) {
- const definition = definitions[i];
- const {id} = definition;
- const existing = definitionGroups.get(id);
- if (typeof existing === 'undefined') {
- definitionGroups.set(id, [i, definition]);
- continue;
- }
-
- let removeIndex = i;
- if (definition.source.length > existing[1].source.length) {
- definitionGroups.set(id, [i, definition]);
- removeIndex = existing[0];
- }
-
- definitions.splice(removeIndex, 1);
- --i;
- --ii;
- }
- }
-
- _flagRedundantDefinitionTags(definitions) {
- let lastDictionary = null;
- let lastPartOfSpeech = '';
- const removeCategoriesSet = new Set();
-
- for (const {dictionary, definitionTags} of definitions) {
- const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
-
- if (lastDictionary !== dictionary) {
- lastDictionary = dictionary;
- lastPartOfSpeech = '';
- }
-
- if (lastPartOfSpeech === partOfSpeech) {
- removeCategoriesSet.add('partOfSpeech');
- } else {
- lastPartOfSpeech = partOfSpeech;
- }
-
- if (removeCategoriesSet.size > 0) {
- this._flagTagsWithCategoryAsRedundant(definitionTags, removeCategoriesSet);
- removeCategoriesSet.clear();
- }
- }
- }
-
- _groupTerms(definitions) {
- const groups = new Map();
- for (const definition of definitions) {
- const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]);
- let groupDefinitions = groups.get(key);
- if (typeof groupDefinitions === 'undefined') {
- groupDefinitions = [];
- groups.set(key, groupDefinitions);
- }
-
- groupDefinitions.push(definition);
- }
-
- const results = [];
- for (const groupDefinitions of groups.values()) {
- this._sortDefinitions(groupDefinitions, true);
- const definition = this._createGroupedTermDefinition(groupDefinitions);
- results.push(definition);
- }
-
- return results;
- }
-
- _mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
- for (const definition of definitions) {
- const {expression, reading, dictionary, glossary, id} = definition;
-
- const key = this._createMapKey([dictionary, ...glossary]);
- let group = glossaryDefinitionGroupMap.get(key);
- if (typeof group === 'undefined') {
- group = {
- expressions: new Set(),
- readings: new Set(),
- definitions: [],
- definitionIds: new Set()
- };
- glossaryDefinitionGroupMap.set(key, group);
- }
-
- const {definitionIds} = group;
- if (definitionIds.has(id)) { continue; }
- definitionIds.add(id);
- group.expressions.add(expression);
- group.readings.add(reading);
- group.definitions.push(definition);
- }
- }
-
- _addUniqueTermInfos(definitions, termInfoMap) {
- for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
- let readingMap = termInfoMap.get(expression);
- if (typeof readingMap === 'undefined') {
- readingMap = new Map();
- termInfoMap.set(expression, readingMap);
- }
-
- let termInfo = readingMap.get(reading);
- if (typeof termInfo === 'undefined') {
- termInfo = {
- sourceTerm,
- furiganaSegments,
- termTagsMap: new Map()
- };
- readingMap.set(reading, termInfo);
- }
-
- const {termTagsMap} = termInfo;
- for (const tag of termTags) {
- const {name} = tag;
- if (termTagsMap.has(name)) { continue; }
- termTagsMap.set(name, this._cloneTag(tag));
- }
- }
- }
-
- _convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions) {
- const convertedDefinitions = [];
- for (const definition of definitions) {
- const {source, rawSource, expression, reading} = definition;
- const expressions = new Set([expression]);
- const readings = new Set([reading]);
- const convertedDefinition = this._createMergedGlossaryTermDefinition(source, rawSource, [definition], expressions, readings, expressions, readings);
- convertedDefinitions.push(convertedDefinition);
- }
- return convertedDefinitions;
- }
-
- // Metadata building
-
- async _buildTermMeta(definitions, enabledDictionaryMap) {
- const addMetadataTargetInfo = (targetMap1, target, parents) => {
- let {expression, reading} = target;
- if (!reading) { reading = expression; }
-
- let targetMap2 = targetMap1.get(expression);
- if (typeof targetMap2 === 'undefined') {
- targetMap2 = new Map();
- targetMap1.set(expression, targetMap2);
- }
-
- let targets = targetMap2.get(reading);
- if (typeof targets === 'undefined') {
- targets = new Set([target, ...parents]);
- targetMap2.set(reading, targets);
- } else {
- targets.add(target);
- for (const parent of parents) {
- targets.add(parent);
- }
- }
- };
-
- const targetMap = new Map();
- const definitionsQueue = definitions.map((definition) => ({definition, parents: []}));
- while (definitionsQueue.length > 0) {
- const {definition, parents} = definitionsQueue.shift();
- const childDefinitions = definition.definitions;
- if (Array.isArray(childDefinitions)) {
- for (const definition2 of childDefinitions) {
- definitionsQueue.push({definition: definition2, parents: [...parents, definition]});
- }
- } else {
- addMetadataTargetInfo(targetMap, definition, parents);
- }
-
- for (const target of definition.expressions) {
- addMetadataTargetInfo(targetMap, target, []);
- }
- }
- const targetMapEntries = [...targetMap.entries()];
- const uniqueExpressions = targetMapEntries.map(([expression]) => expression);
-
- const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap);
- for (const {expression, mode, data, dictionary, index} of metas) {
- const targetMap2 = targetMapEntries[index][1];
- for (const [reading, targets] of targetMap2) {
- switch (mode) {
- case 'freq':
- {
- const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data);
- if (frequencyData === null) { continue; }
- for (const {frequencies} of targets) { frequencies.push(frequencyData); }
- }
- break;
- case 'pitch':
- {
- const pitchData = await this._getPitchData(expression, reading, dictionary, data);
- if (pitchData === null) { continue; }
- for (const {pitches} of targets) { pitches.push(pitchData); }
- }
- break;
- }
- }
- }
- }
-
- async _buildKanjiMeta(definitions, enabledDictionaryMap) {
- const kanjiList = [];
- for (const {character} of definitions) {
- kanjiList.push(character);
- }
-
- const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
- for (const {character, mode, data, dictionary, index} of metas) {
- switch (mode) {
- case 'freq':
- {
- const frequencyData = this._getKanjiFrequencyData(character, dictionary, data);
- definitions[index].frequencies.push(frequencyData);
- }
- break;
- }
- }
- }
-
- async _expandTags(names, dictionary) {
- const tagMetaList = await this._getTagMetaList(names, dictionary);
- const results = [];
- for (let i = 0, ii = tagMetaList.length; i < ii; ++i) {
- const meta = tagMetaList[i];
- const name = names[i];
- const {category, notes, order, score} = (meta !== null ? meta : {});
- const tag = this._createTag(name, category, notes, order, score, dictionary, false);
- results.push(tag);
- }
- return results;
- }
-
- async _expandStats(items, dictionary) {
- const names = Object.keys(items);
- const tagMetaList = await this._getTagMetaList(names, dictionary);
-
- const statsGroups = new Map();
- for (let i = 0; i < names.length; ++i) {
- const name = names[i];
- const meta = tagMetaList[i];
- if (meta === null) { continue; }
-
- const {category, notes, order, score} = meta;
- let group = statsGroups.get(category);
- if (typeof group === 'undefined') {
- group = [];
- statsGroups.set(category, group);
- }
-
- const value = items[name];
- const stat = this._createKanjiStat(name, category, notes, order, score, dictionary, value);
- group.push(stat);
- }
-
- const stats = {};
- for (const [category, group] of statsGroups.entries()) {
- this._sortKanjiStats(group);
- stats[category] = group;
- }
- return stats;
- }
-
- async _getTagMetaList(names, dictionary) {
- const tagMetaList = [];
- let cache = this._tagCache.get(dictionary);
- if (typeof cache === 'undefined') {
- cache = new Map();
- this._tagCache.set(dictionary, cache);
- }
-
- for (const name of names) {
- const base = this._getNameBase(name);
-
- let tagMeta = cache.get(base);
- if (typeof tagMeta === 'undefined') {
- tagMeta = await this._database.findTagForTitle(base, dictionary);
- cache.set(base, tagMeta);
- }
-
- tagMetaList.push(tagMeta);
- }
-
- return tagMetaList;
- }
-
- _getTermFrequencyData(expression, reading, dictionary, data) {
- let frequency = data;
- const hasReading = (data !== null && typeof data === 'object');
- if (hasReading) {
- if (data.reading !== reading) { return null; }
- frequency = data.frequency;
- }
- return {dictionary, expression, reading, hasReading, frequency};
- }
-
- _getKanjiFrequencyData(character, dictionary, data) {
- return {dictionary, character, frequency: data};
- }
-
- async _getPitchData(expression, reading, dictionary, data) {
- if (data.reading !== reading) { return null; }
-
- const pitches = [];
- for (let {position, tags} of data.pitches) {
- tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
- pitches.push({position, tags});
- }
-
- return {expression, reading, dictionary, pitches};
- }
-
- // Simple helpers
-
- _scoreToTermFrequency(score) {
- if (score > 0) {
- return 'popular';
- } else if (score < 0) {
- return 'rare';
- } else {
- return 'normal';
- }
- }
-
- _getNameBase(name) {
- const pos = name.indexOf(':');
- return (pos >= 0 ? name.substring(0, pos) : name);
- }
-
- _getSearchableText(text, allowAlphanumericCharacters) {
- if (allowAlphanumericCharacters) {
- return text;
- }
-
- const jp = this._japaneseUtil;
- let newText = '';
- for (const c of text) {
- if (!jp.isCodePointJapanese(c.codePointAt(0))) {
- break;
- }
- newText += c;
- }
- return newText;
- }
-
- _getTextOptionEntryVariants(value) {
- switch (value) {
- case 'true': return [true];
- case 'variant': return [false, true];
- default: return [false];
- }
- }
-
- _getCollapseEmphaticOptions(options) {
- const collapseEmphaticOptions = [[false, false]];
- switch (options.collapseEmphaticSequences) {
- case 'true':
- collapseEmphaticOptions.push([true, false]);
- break;
- case 'full':
- collapseEmphaticOptions.push([true, false], [true, true]);
- break;
- }
- return collapseEmphaticOptions;
- }
-
- _getTextReplacementsVariants(options) {
- return options.textReplacements;
- }
-
- _getSecondarySearchDictionaryMap(enabledDictionaryMap) {
- const secondarySearchDictionaryMap = new Map();
- for (const [dictionary, details] of enabledDictionaryMap.entries()) {
- if (!details.allowSecondarySearches) { continue; }
- secondarySearchDictionaryMap.set(dictionary, details);
- }
- return secondarySearchDictionaryMap;
- }
-
- _getDictionaryPriority(dictionary, enabledDictionaryMap) {
- const info = enabledDictionaryMap.get(dictionary);
- return typeof info !== 'undefined' ? info.priority : 0;
- }
-
- _getTagNamesWithCategory(tags, category) {
- const results = [];
- for (const tag of tags) {
- if (tag.category !== category) { continue; }
- results.push(tag.name);
- }
- results.sort();
- return results;
- }
-
- _flagTagsWithCategoryAsRedundant(tags, removeCategoriesSet) {
- for (const tag of tags) {
- if (removeCategoriesSet.has(tag.category)) {
- tag.redundant = true;
- }
- }
- }
-
- _getUniqueDictionaryNames(definitions) {
- const uniqueDictionaryNames = new Set();
- for (const {dictionaryNames} of definitions) {
- for (const dictionaryName of dictionaryNames) {
- uniqueDictionaryNames.add(dictionaryName);
- }
- }
- return [...uniqueDictionaryNames];
- }
-
- _getUniqueTermTags(definitions) {
- const newTermTags = [];
- if (definitions.length <= 1) {
- for (const {termTags} of definitions) {
- for (const tag of termTags) {
- newTermTags.push(this._cloneTag(tag));
- }
- }
- } else {
- const tagsSet = new Set();
- let checkTagsMap = false;
- for (const {termTags} of definitions) {
- for (const tag of termTags) {
- const key = this._getTagMapKey(tag);
- if (checkTagsMap && tagsSet.has(key)) { continue; }
- tagsSet.add(key);
- newTermTags.push(this._cloneTag(tag));
- }
- checkTagsMap = true;
- }
- }
- return newTermTags;
- }
-
- *_getArrayVariants(arrayVariants) {
- const ii = arrayVariants.length;
-
- let total = 1;
- for (let i = 0; i < ii; ++i) {
- total *= arrayVariants[i].length;
- }
-
- for (let a = 0; a < total; ++a) {
- const variant = [];
- let index = a;
- for (let i = 0; i < ii; ++i) {
- const entryVariants = arrayVariants[i];
- variant.push(entryVariants[index % entryVariants.length]);
- index = Math.floor(index / entryVariants.length);
- }
- yield variant;
- }
- }
-
- _areSetsEqual(set1, set2) {
- if (set1.size !== set2.size) {
- return false;
- }
-
- for (const value of set1) {
- if (!set2.has(value)) {
- return false;
- }
- }
-
- return true;
- }
-
- _getSetIntersection(set1, set2) {
- const result = [];
- for (const value of set1) {
- if (set2.has(value)) {
- result.push(value);
- }
- }
- return result;
- }
-
- // Reduction functions
-
- _getTermTagsScoreSum(termTags) {
- let result = 0;
- for (const {score} of termTags) {
- result += score;
- }
- return result;
- }
-
- _getSourceTermMatchCountSum(definitions) {
- let result = 0;
- for (const {sourceTermExactMatchCount} of definitions) {
- result += sourceTermExactMatchCount;
- }
- return result;
- }
-
- _getMaxDefinitionScore(definitions) {
- let result = Number.MIN_SAFE_INTEGER;
- for (const {score} of definitions) {
- if (score > result) { result = score; }
- }
- return result;
- }
-
- _getMaxDictionaryPriority(definitions) {
- let result = Number.MIN_SAFE_INTEGER;
- for (const {dictionaryPriority} of definitions) {
- if (dictionaryPriority > result) { result = dictionaryPriority; }
- }
- return result;
- }
-
- // Common data creation and cloning functions
-
- _cloneTag(tag) {
- const {name, category, notes, order, score, dictionary, redundant} = tag;
- return this._createTag(name, category, notes, order, score, dictionary, redundant);
- }
-
- _getTagMapKey(tag) {
- const {name, category, notes} = tag;
- return this._createMapKey([name, category, notes]);
- }
-
- _createMapKey(array) {
- return JSON.stringify(array);
- }
-
- _createTag(name, category, notes, order, score, dictionary, redundant) {
- return {
- name,
- category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
- notes: (typeof notes === 'string' ? notes : ''),
- order: (typeof order === 'number' ? order : 0),
- score: (typeof score === 'number' ? score : 0),
- dictionary: (typeof dictionary === 'string' ? dictionary : null),
- redundant
- };
- }
-
- _createKanjiStat(name, category, notes, order, score, dictionary, value) {
- return {
- name,
- category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
- notes: (typeof notes === 'string' ? notes : ''),
- order: (typeof order === 'number' ? order : 0),
- score: (typeof score === 'number' ? score : 0),
- dictionary: (typeof dictionary === 'string' ? dictionary : null),
- value
- };
- }
-
- _createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, tags, stats) {
- return {
- type: 'kanji',
- character,
- dictionary,
- onyomi,
- kunyomi,
- glossary,
- tags,
- stats,
- frequencies: []
- };
- }
-
- async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, reasons, enabledDictionaryMap) {
- const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
- const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
- const termTagsExpanded = await this._expandTags(termTags, dictionary);
- const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
-
- this._sortTags(definitionTagsExpanded);
- this._sortTags(termTagsExpanded);
-
- const furiganaSegments = this._japaneseUtil.distributeFurigana(expression, reading);
- const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTagsExpanded)];
- const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
-
- return {
- type: 'term',
- id,
- source,
- rawSource,
- sourceTerm,
- reasons,
- score,
- sequence,
- dictionary,
- dictionaryPriority,
- dictionaryNames: [dictionary],
- expression,
- reading,
- expressions: termDetailsList,
- furiganaSegments,
- glossary,
- definitionTags: definitionTagsExpanded,
- termTags: termTagsExpanded,
- // definitions
- frequencies: [],
- pitches: [],
- // only
- sourceTermExactMatchCount
- };
- }
-
- _createGroupedTermDefinition(definitions) {
- const {expression, reading, furiganaSegments, reasons, source, rawSource, sourceTerm} = definitions[0];
- const score = this._getMaxDefinitionScore(definitions);
- const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
- const dictionaryNames = this._getUniqueDictionaryNames(definitions);
- const termTags = this._getUniqueTermTags(definitions);
- const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
- const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
- return {
- type: 'termGrouped',
- // id
- source,
- rawSource,
- sourceTerm,
- reasons: [...reasons],
- score,
- // sequence
- dictionary: dictionaryNames[0],
- dictionaryPriority,
- dictionaryNames,
- expression,
- reading,
- expressions: termDetailsList,
- furiganaSegments, // Contains duplicate data
- // glossary
- // definitionTags
- termTags,
- definitions, // type: 'term'
- frequencies: [],
- pitches: [],
- // only
- sourceTermExactMatchCount
- };
- }
-
- _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, score) {
- const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
- const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
- const dictionaryNames = this._getUniqueDictionaryNames(definitions);
- return {
- type: 'termMerged',
- // id
- source,
- rawSource,
- // sourceTerm
- reasons,
- score,
- // sequence
- dictionary: dictionaryNames[0],
- dictionaryPriority,
- dictionaryNames,
- expression: expressions,
- reading: readings,
- expressions: termDetailsList,
- // furiganaSegments
- // glossary
- // definitionTags
- // termTags
- definitions, // type: 'termMergedByGlossary'
- frequencies: [],
- pitches: [],
- // only
- sourceTermExactMatchCount
- };
- }
-
- _createMergedGlossaryTermDefinition(source, rawSource, definitions, expressions, readings, allExpressions, allReadings) {
- const only = [];
- if (!this._areSetsEqual(expressions, allExpressions)) {
- only.push(...this._getSetIntersection(expressions, allExpressions));
- }
- if (!this._areSetsEqual(readings, allReadings)) {
- only.push(...this._getSetIntersection(readings, allReadings));
- }
-
- const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
- const dictionaryNames = this._getUniqueDictionaryNames(definitions);
-
- const termInfoMap = new Map();
- this._addUniqueTermInfos(definitions, termInfoMap);
- const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
-
- const definitionTags = this._getUniqueDefinitionTags(definitions);
- this._sortTags(definitionTags);
-
- const {glossary} = definitions[0];
- const score = this._getMaxDefinitionScore(definitions);
- const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
- return {
- type: 'termMergedByGlossary',
- // id
- source,
- rawSource,
- // sourceTerm
- reasons: [],
- score,
- // sequence
- dictionary: dictionaryNames[0],
- dictionaryPriority,
- dictionaryNames,
- expression: [...expressions],
- reading: [...readings],
- expressions: termDetailsList,
- // furiganaSegments
- glossary: [...glossary],
- definitionTags,
- // termTags
- definitions, // type: 'term'; contains duplicate data
- frequencies: [],
- pitches: [],
- only,
- sourceTermExactMatchCount
- };
- }
-
- _createTermDetailsListFromTermInfoMap(termInfoMap) {
- const termDetailsList = [];
- for (const [expression, readingMap] of termInfoMap.entries()) {
- for (const [reading, {termTagsMap, sourceTerm, furiganaSegments}] of readingMap.entries()) {
- const termTags = [...termTagsMap.values()];
- this._sortTags(termTags);
- termDetailsList.push(this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags));
- }
- }
- return termDetailsList;
- }
-
- _createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags) {
- const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
- return {
- sourceTerm,
- expression,
- reading,
- furiganaSegments, // Contains duplicate data
- termTags,
- termFrequency,
- frequencies: [],
- pitches: []
- };
- }
-
- // Sorting functions
-
- _sortTags(tags) {
- if (tags.length <= 1) { return; }
- const stringComparer = this._stringComparer;
- tags.sort((v1, v2) => {
- const i = v1.order - v2.order;
- if (i !== 0) { return i; }
-
- return stringComparer.compare(v1.name, v2.name);
- });
- }
-
- _sortDefinitions(definitions, useDictionaryPriority) {
- if (definitions.length <= 1) { return; }
- const stringComparer = this._stringComparer;
- const compareFunction1 = (v1, v2) => {
- let i = v2.source.length - v1.source.length;
- if (i !== 0) { return i; }
-
- i = v1.reasons.length - v2.reasons.length;
- if (i !== 0) { return i; }
-
- i = v2.sourceTermExactMatchCount - v1.sourceTermExactMatchCount;
- if (i !== 0) { return i; }
-
- i = v2.score - v1.score;
- if (i !== 0) { return i; }
-
- const expression1 = v1.expression;
- const expression2 = v2.expression;
- if (typeof expression1 !== 'string' || typeof expression2 !== 'string') { return 0; } // Skip if either is not a string (array)
-
- i = expression2.length - expression1.length;
- if (i !== 0) { return i; }
-
- return stringComparer.compare(expression1, expression2);
- };
- const compareFunction2 = (v1, v2) => {
- const i = v2.dictionaryPriority - v1.dictionaryPriority;
- return (i !== 0) ? i : compareFunction1(v1, v2);
- };
- definitions.sort(useDictionaryPriority ? compareFunction2 : compareFunction1);
- }
-
- _sortDatabaseDefinitionsByIndex(definitions) {
- if (definitions.length <= 1) { return; }
- definitions.sort((a, b) => a.index - b.index);
- }
-
- _sortDefinitionsById(definitions) {
- if (definitions.length <= 1) { return; }
- definitions.sort((a, b) => a.id - b.id);
- }
-
- _sortKanjiStats(stats) {
- if (stats.length <= 1) { return; }
- const stringComparer = this._stringComparer;
- stats.sort((v1, v2) => {
- const i = v1.order - v2.order;
- if (i !== 0) { return i; }
-
- return stringComparer.compare(v1.notes, v2.notes);
- });
- }
-
- // Regex functions
-
- _applyTextReplacements(text, sourceMap, replacements) {
- for (const {pattern, replacement} of replacements) {
- text = this._applyTextReplacement(text, sourceMap, pattern, replacement);
- }
- return text;
- }
-
- _applyTextReplacement(text, sourceMap, pattern, replacement) {
- const isGlobal = pattern.global;
- if (isGlobal) { pattern.lastIndex = 0; }
- for (let loop = true; loop; loop = isGlobal) {
- const match = pattern.exec(text);
- if (match === null) { break; }
-
- const matchText = match[0];
- const index = match.index;
- const actualReplacement = this._applyMatchReplacement(replacement, match);
- const actualReplacementLength = actualReplacement.length;
- const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1);
-
- text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`;
- pattern.lastIndex += delta;
-
- if (actualReplacementLength > 0) {
- sourceMap.combine(Math.max(0, index - 1), matchText.length);
- sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0)));
- } else {
- sourceMap.combine(index, matchText.length);
- }
- }
- return text;
- }
-
- _applyMatchReplacement(replacement, match) {
- const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g;
- return replacement.replace(pattern, (g0, g1, g2) => {
- if (typeof g1 !== 'undefined') {
- const matchIndex = Number.parseInt(g1, 10);
- if (matchIndex >= 1 && matchIndex <= match.length) {
- return match[matchIndex];
- }
- } else if (typeof g2 !== 'undefined') {
- const {groups} = match;
- if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) {
- return groups[g2];
- }
- } else {
- switch (g0) {
- case '$': return '$';
- case '&': return match[0];
- case '`': return replacement.substring(0, match.index);
- case '\'': return replacement.substring(match.index + g0.length);
- }
- }
- return g0;
- });
- }
-}