From 8e548a17eba180b5773a9900de3f3cb3a92ec6ff Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 17 Dec 2021 16:44:14 -0500 Subject: Dictionary database term source info (#2039) * Update DictionaryDatabase._findMultiBulk's createResult callback signature * Simplify _splitField use * Update sequence * Expose new fields 'matchType' and 'matchSource' as part of term data * Expose matchType and matchSource as part of TermSource * Update sourceTermExactMatchCount calculation * Update test data * Expose matchType and matchSource info in HTML attributes * Add primaryMatchTypes attribute --- ext/js/display/display-generator.js | 17 +++++++++-- ext/js/language/dictionary-database.js | 54 +++++++++++++++++++++------------- ext/js/language/translator.js | 20 +++++++------ 3 files changed, 58 insertions(+), 33 deletions(-) (limited to 'ext/js') diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js index fc377df1..409706c0 100644 --- a/ext/js/display/display-generator.js +++ b/ext/js/display/display-generator.js @@ -64,9 +64,14 @@ class DisplayGenerator { const uniqueTerms = new Set(); const uniqueReadings = new Set(); - for (const {term, reading} of headwords) { + const primaryMatchTypes = new Set(); + for (const {term, reading, sources} of headwords) { uniqueTerms.add(term); uniqueReadings.add(reading); + for (const {matchType, isPrimary} of sources) { + if (!isPrimary) { continue; } + primaryMatchTypes.add(matchType); + } } node.dataset.format = type; @@ -78,6 +83,7 @@ class DisplayGenerator { node.dataset.uniqueReadingCount = `${uniqueReadings.size}`; node.dataset.frequencyCount = `${frequencies.length}`; node.dataset.groupedFrequencyCount = `${groupedFrequencies.length}`; + node.dataset.primaryMatchTypes = [...primaryMatchTypes].join(' '); for (let i = 0, ii = headwords.length; i < ii; ++i) { const node2 = this._createTermHeadword(headwords[i], i, pronunciations); @@ -235,11 +241,14 @@ class DisplayGenerator { const {term, reading, tags, sources} = headword; let isPrimaryAny = false; - for (const {isPrimary} of sources) { + const matchTypes = new Set(); + const matchSources = new Set(); + for (const {matchType, matchSource, isPrimary} of sources) { if (isPrimary) { isPrimaryAny = true; - break; } + matchTypes.add(matchType); + matchSources.add(matchSource); } const node = this._templates.instantiate('headword'); @@ -249,6 +258,8 @@ class DisplayGenerator { node.dataset.isPrimary = `${isPrimaryAny}`; node.dataset.readingIsSame = `${reading === term}`; node.dataset.frequency = DictionaryDataUtil.getTermFrequency(tags); + node.dataset.matchTypes = [...matchTypes].join(' '); + node.dataset.matchSources = [...matchSources].join(' '); const {wordClasses} = headword; const pronunciationCategories = this._getPronunciationCategories(reading, pronunciations, wordClasses, headwordIndex); diff --git a/ext/js/language/dictionary-database.js b/ext/js/language/dictionary-database.js index c20921b5..fa37ec34 100644 --- a/ext/js/language/dictionary-database.js +++ b/ext/js/language/dictionary-database.js @@ -30,7 +30,8 @@ class DictionaryDatabase { this._createOnlyQuery4 = (item) => IDBKeyRange.only(item.path); this._createBoundQuery1 = (item) => IDBKeyRange.bound(item, `${item}\uffff`, false, false); this._createBoundQuery2 = (item) => { item = stringReverse(item); return IDBKeyRange.bound(item, `${item}\uffff`, false, false); }; - this._createTermBind = this._createTerm.bind(this); + this._createTermBind1 = this._createTerm.bind(this, 'term', 'exact'); + this._createTermBind2 = this._createTerm.bind(this, 'sequence', 'exact'); this._createTermMetaBind = this._createTermMeta.bind(this); this._createKanjiBind = this._createKanji.bind(this); this._createKanjiMetaBind = this._createKanjiMeta.bind(this); @@ -208,7 +209,7 @@ class DictionaryDatabase { const indexNames = (matchType === 'suffix') ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; - let createQuery; + let createQuery = this._createOnlyQuery1; switch (matchType) { case 'prefix': createQuery = this._createBoundQuery1; @@ -216,22 +217,21 @@ class DictionaryDatabase { case 'suffix': createQuery = this._createBoundQuery2; break; - default: // 'exact' - createQuery = this._createOnlyQuery1; - break; } - return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, this._createTermBind); + const createResult = this._createTermGeneric.bind(this, matchType); + + return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, createResult); } findTermsExactBulk(termList, dictionaries) { const predicate = (row, item) => (row.reading === item.reading && dictionaries.has(row.dictionary)); - return this._findMultiBulk('terms', ['expression'], termList, this._createOnlyQuery3, predicate, this._createTermBind); + return this._findMultiBulk('terms', ['expression'], termList, this._createOnlyQuery3, predicate, this._createTermBind1); } findTermsBySequenceBulk(items) { const predicate = (row, item) => (row.dictionary === item.dictionary); - return this._findMultiBulk('terms', ['sequence'], items, this._createOnlyQuery2, predicate, this._createTermBind); + return this._findMultiBulk('terms', ['sequence'], items, this._createOnlyQuery2, predicate, this._createTermBind2); } findTermMetaBulk(termList, dictionaries) { @@ -352,10 +352,10 @@ class DictionaryDatabase { } let completeCount = 0; const requiredCompleteCount = itemCount * indexCount; - const onGetAll = (rows, {item, itemIndex}) => { + const onGetAll = (rows, data) => { for (const row of rows) { - if (predicate(row, item)) { - results.push(createResult(row, itemIndex)); + if (predicate(row, data.item)) { + results.push(createResult(row, data)); } } if (++completeCount >= requiredCompleteCount) { @@ -366,7 +366,7 @@ class DictionaryDatabase { const item = items[i]; const query = createQuery(item); for (let j = 0; j < indexCount; ++j) { - this._db.getAll(indexList[j], query, onGetAll, reject, {item, itemIndex: i}); + this._db.getAll(indexList[j], query, onGetAll, reject, {item, itemIndex: i, indexIndex: j}); } } }); @@ -399,23 +399,35 @@ class DictionaryDatabase { }); } - _createTerm(row, index) { + _createTermGeneric(matchType, row, data) { + const matchSourceIsTerm = (data.indexIndex === 0); + const matchSource = (matchSourceIsTerm ? 'term' : 'reading'); + if ((matchSourceIsTerm ? row.expression : row.reading) === data.item) { + matchType = 'exact'; + } + return this._createTerm(matchSource, matchType, row, data); + } + + _createTerm(matchSource, matchType, row, {itemIndex: index}) { + const {sequence} = row; return { index, + matchType, + matchSource, term: row.expression, reading: row.reading, - definitionTags: this._splitField(row.definitionTags || row.tags || ''), - termTags: this._splitField(row.termTags || ''), + definitionTags: this._splitField(row.definitionTags || row.tags), + termTags: this._splitField(row.termTags), rules: this._splitField(row.rules), definitions: row.glossary, score: row.score, dictionary: row.dictionary, id: row.id, - sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence + sequence: typeof sequence === 'number' ? sequence : -1 }; } - _createKanji(row, index) { + _createKanji(row, {itemIndex: index}) { return { index, character: row.character, @@ -428,19 +440,19 @@ class DictionaryDatabase { }; } - _createTermMeta({expression: term, mode, data, dictionary}, index) { + _createTermMeta({expression: term, mode, data, dictionary}, {itemIndex: index}) { return {term, mode, data, dictionary, index}; } - _createKanjiMeta({character, mode, data, dictionary}, index) { + _createKanjiMeta({character, mode, data, dictionary}, {itemIndex: index}) { return {character, mode, data, dictionary, index}; } - _createMedia(row, index) { + _createMedia(row, {itemIndex: index}) { return Object.assign({}, row, {index}); } _splitField(field) { - return field.length === 0 ? [] : field.split(' '); + return typeof field === 'string' && field.length > 0 ? field.split(' ') : []; } } diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 5db781dd..dd7d54a0 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -1114,8 +1114,8 @@ class Translator { return {dictionary, tagNames}; } - _createSource(originalText, transformedText, deinflectedText, isPrimary) { - return {originalText, transformedText, deinflectedText, isPrimary}; + _createSource(originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary) { + return {originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary}; } _createTermHeadword(index, term, reading, sources, tags, wordClasses) { @@ -1166,11 +1166,11 @@ class Translator { } _createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, reasons, isPrimary, enabledDictionaryMap) { - const {term, reading: rawReading, definitionTags, termTags, definitions, score, dictionary, id, sequence: rawSequence, rules} = databaseEntry; + const {matchType, matchSource, term, reading: rawReading, definitionTags, termTags, definitions, score, dictionary, id, sequence: rawSequence, rules} = databaseEntry; const reading = (rawReading.length > 0 ? rawReading : term); const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap); const sourceTermExactMatchCount = (isPrimary && deinflectedText === term ? 1 : 0); - const source = this._createSource(originalText, transformedText, deinflectedText, isPrimary); + const source = this._createSource(originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary); const maxTransformedTextLength = transformedText.length; const hasSequence = (rawSequence >= 0); const sequence = hasSequence ? rawSequence : -1; @@ -1239,9 +1239,9 @@ class Translator { const headwordsArray = [...headwords.values()]; let sourceTermExactMatchCount = 0; - for (const {term, sources} of headwordsArray) { - for (const {deinflectedText, isPrimary: isPrimary2} of sources) { - if (isPrimary2 && deinflectedText === term) { + for (const {sources} of headwordsArray) { + for (const source of sources) { + if (source.isPrimary && source.matchSource === 'term') { ++sourceTermExactMatchCount; break; } @@ -1278,13 +1278,15 @@ class Translator { return; } for (const newSource of newSources) { - const {originalText, transformedText, deinflectedText, isPrimary} = newSource; + const {originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary} = newSource; let has = false; for (const source of sources) { if ( source.deinflectedText === deinflectedText && source.transformedText === transformedText && - source.originalText === originalText + source.originalText === originalText && + source.matchType === matchType && + source.matchSource === matchSource ) { if (isPrimary) { source.isPrimary = true; } has = true; -- cgit v1.2.3