aboutsummaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-12-17 16:44:14 -0500
committerGitHub <noreply@github.com>2021-12-17 16:44:14 -0500
commit8e548a17eba180b5773a9900de3f3cb3a92ec6ff (patch)
treea0aa76481432fd5c510c3769ccadade86c7f15fa /ext
parent19ab9df6e4110ef7d5927c95993141a1f8960c53 (diff)
Dictionary database term source info (#2039)
* Update DictionaryDatabase._findMultiBulk's createResult callback signature * Simplify _splitField use * Update sequence * Expose new fields 'matchType' and 'matchSource' as part of term data * Expose matchType and matchSource as part of TermSource * Update sourceTermExactMatchCount calculation * Update test data * Expose matchType and matchSource info in HTML attributes * Add primaryMatchTypes attribute
Diffstat (limited to 'ext')
-rw-r--r--ext/js/display/display-generator.js17
-rw-r--r--ext/js/language/dictionary-database.js54
-rw-r--r--ext/js/language/translator.js20
3 files changed, 58 insertions, 33 deletions
diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js
index fc377df1..409706c0 100644
--- a/ext/js/display/display-generator.js
+++ b/ext/js/display/display-generator.js
@@ -64,9 +64,14 @@ class DisplayGenerator {
const uniqueTerms = new Set();
const uniqueReadings = new Set();
- for (const {term, reading} of headwords) {
+ const primaryMatchTypes = new Set();
+ for (const {term, reading, sources} of headwords) {
uniqueTerms.add(term);
uniqueReadings.add(reading);
+ for (const {matchType, isPrimary} of sources) {
+ if (!isPrimary) { continue; }
+ primaryMatchTypes.add(matchType);
+ }
}
node.dataset.format = type;
@@ -78,6 +83,7 @@ class DisplayGenerator {
node.dataset.uniqueReadingCount = `${uniqueReadings.size}`;
node.dataset.frequencyCount = `${frequencies.length}`;
node.dataset.groupedFrequencyCount = `${groupedFrequencies.length}`;
+ node.dataset.primaryMatchTypes = [...primaryMatchTypes].join(' ');
for (let i = 0, ii = headwords.length; i < ii; ++i) {
const node2 = this._createTermHeadword(headwords[i], i, pronunciations);
@@ -235,11 +241,14 @@ class DisplayGenerator {
const {term, reading, tags, sources} = headword;
let isPrimaryAny = false;
- for (const {isPrimary} of sources) {
+ const matchTypes = new Set();
+ const matchSources = new Set();
+ for (const {matchType, matchSource, isPrimary} of sources) {
if (isPrimary) {
isPrimaryAny = true;
- break;
}
+ matchTypes.add(matchType);
+ matchSources.add(matchSource);
}
const node = this._templates.instantiate('headword');
@@ -249,6 +258,8 @@ class DisplayGenerator {
node.dataset.isPrimary = `${isPrimaryAny}`;
node.dataset.readingIsSame = `${reading === term}`;
node.dataset.frequency = DictionaryDataUtil.getTermFrequency(tags);
+ node.dataset.matchTypes = [...matchTypes].join(' ');
+ node.dataset.matchSources = [...matchSources].join(' ');
const {wordClasses} = headword;
const pronunciationCategories = this._getPronunciationCategories(reading, pronunciations, wordClasses, headwordIndex);
diff --git a/ext/js/language/dictionary-database.js b/ext/js/language/dictionary-database.js
index c20921b5..fa37ec34 100644
--- a/ext/js/language/dictionary-database.js
+++ b/ext/js/language/dictionary-database.js
@@ -30,7 +30,8 @@ class DictionaryDatabase {
this._createOnlyQuery4 = (item) => IDBKeyRange.only(item.path);
this._createBoundQuery1 = (item) => IDBKeyRange.bound(item, `${item}\uffff`, false, false);
this._createBoundQuery2 = (item) => { item = stringReverse(item); return IDBKeyRange.bound(item, `${item}\uffff`, false, false); };
- this._createTermBind = this._createTerm.bind(this);
+ this._createTermBind1 = this._createTerm.bind(this, 'term', 'exact');
+ this._createTermBind2 = this._createTerm.bind(this, 'sequence', 'exact');
this._createTermMetaBind = this._createTermMeta.bind(this);
this._createKanjiBind = this._createKanji.bind(this);
this._createKanjiMetaBind = this._createKanjiMeta.bind(this);
@@ -208,7 +209,7 @@ class DictionaryDatabase {
const indexNames = (matchType === 'suffix') ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading'];
- let createQuery;
+ let createQuery = this._createOnlyQuery1;
switch (matchType) {
case 'prefix':
createQuery = this._createBoundQuery1;
@@ -216,22 +217,21 @@ class DictionaryDatabase {
case 'suffix':
createQuery = this._createBoundQuery2;
break;
- default: // 'exact'
- createQuery = this._createOnlyQuery1;
- break;
}
- return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, this._createTermBind);
+ const createResult = this._createTermGeneric.bind(this, matchType);
+
+ return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, createResult);
}
findTermsExactBulk(termList, dictionaries) {
const predicate = (row, item) => (row.reading === item.reading && dictionaries.has(row.dictionary));
- return this._findMultiBulk('terms', ['expression'], termList, this._createOnlyQuery3, predicate, this._createTermBind);
+ return this._findMultiBulk('terms', ['expression'], termList, this._createOnlyQuery3, predicate, this._createTermBind1);
}
findTermsBySequenceBulk(items) {
const predicate = (row, item) => (row.dictionary === item.dictionary);
- return this._findMultiBulk('terms', ['sequence'], items, this._createOnlyQuery2, predicate, this._createTermBind);
+ return this._findMultiBulk('terms', ['sequence'], items, this._createOnlyQuery2, predicate, this._createTermBind2);
}
findTermMetaBulk(termList, dictionaries) {
@@ -352,10 +352,10 @@ class DictionaryDatabase {
}
let completeCount = 0;
const requiredCompleteCount = itemCount * indexCount;
- const onGetAll = (rows, {item, itemIndex}) => {
+ const onGetAll = (rows, data) => {
for (const row of rows) {
- if (predicate(row, item)) {
- results.push(createResult(row, itemIndex));
+ if (predicate(row, data.item)) {
+ results.push(createResult(row, data));
}
}
if (++completeCount >= requiredCompleteCount) {
@@ -366,7 +366,7 @@ class DictionaryDatabase {
const item = items[i];
const query = createQuery(item);
for (let j = 0; j < indexCount; ++j) {
- this._db.getAll(indexList[j], query, onGetAll, reject, {item, itemIndex: i});
+ this._db.getAll(indexList[j], query, onGetAll, reject, {item, itemIndex: i, indexIndex: j});
}
}
});
@@ -399,23 +399,35 @@ class DictionaryDatabase {
});
}
- _createTerm(row, index) {
+ _createTermGeneric(matchType, row, data) {
+ const matchSourceIsTerm = (data.indexIndex === 0);
+ const matchSource = (matchSourceIsTerm ? 'term' : 'reading');
+ if ((matchSourceIsTerm ? row.expression : row.reading) === data.item) {
+ matchType = 'exact';
+ }
+ return this._createTerm(matchSource, matchType, row, data);
+ }
+
+ _createTerm(matchSource, matchType, row, {itemIndex: index}) {
+ const {sequence} = row;
return {
index,
+ matchType,
+ matchSource,
term: row.expression,
reading: row.reading,
- definitionTags: this._splitField(row.definitionTags || row.tags || ''),
- termTags: this._splitField(row.termTags || ''),
+ definitionTags: this._splitField(row.definitionTags || row.tags),
+ termTags: this._splitField(row.termTags),
rules: this._splitField(row.rules),
definitions: row.glossary,
score: row.score,
dictionary: row.dictionary,
id: row.id,
- sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
+ sequence: typeof sequence === 'number' ? sequence : -1
};
}
- _createKanji(row, index) {
+ _createKanji(row, {itemIndex: index}) {
return {
index,
character: row.character,
@@ -428,19 +440,19 @@ class DictionaryDatabase {
};
}
- _createTermMeta({expression: term, mode, data, dictionary}, index) {
+ _createTermMeta({expression: term, mode, data, dictionary}, {itemIndex: index}) {
return {term, mode, data, dictionary, index};
}
- _createKanjiMeta({character, mode, data, dictionary}, index) {
+ _createKanjiMeta({character, mode, data, dictionary}, {itemIndex: index}) {
return {character, mode, data, dictionary, index};
}
- _createMedia(row, index) {
+ _createMedia(row, {itemIndex: index}) {
return Object.assign({}, row, {index});
}
_splitField(field) {
- return field.length === 0 ? [] : field.split(' ');
+ return typeof field === 'string' && field.length > 0 ? field.split(' ') : [];
}
}
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index 5db781dd..dd7d54a0 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -1114,8 +1114,8 @@ class Translator {
return {dictionary, tagNames};
}
- _createSource(originalText, transformedText, deinflectedText, isPrimary) {
- return {originalText, transformedText, deinflectedText, isPrimary};
+ _createSource(originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary) {
+ return {originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary};
}
_createTermHeadword(index, term, reading, sources, tags, wordClasses) {
@@ -1166,11 +1166,11 @@ class Translator {
}
_createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, reasons, isPrimary, enabledDictionaryMap) {
- const {term, reading: rawReading, definitionTags, termTags, definitions, score, dictionary, id, sequence: rawSequence, rules} = databaseEntry;
+ const {matchType, matchSource, term, reading: rawReading, definitionTags, termTags, definitions, score, dictionary, id, sequence: rawSequence, rules} = databaseEntry;
const reading = (rawReading.length > 0 ? rawReading : term);
const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap);
const sourceTermExactMatchCount = (isPrimary && deinflectedText === term ? 1 : 0);
- const source = this._createSource(originalText, transformedText, deinflectedText, isPrimary);
+ const source = this._createSource(originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary);
const maxTransformedTextLength = transformedText.length;
const hasSequence = (rawSequence >= 0);
const sequence = hasSequence ? rawSequence : -1;
@@ -1239,9 +1239,9 @@ class Translator {
const headwordsArray = [...headwords.values()];
let sourceTermExactMatchCount = 0;
- for (const {term, sources} of headwordsArray) {
- for (const {deinflectedText, isPrimary: isPrimary2} of sources) {
- if (isPrimary2 && deinflectedText === term) {
+ for (const {sources} of headwordsArray) {
+ for (const source of sources) {
+ if (source.isPrimary && source.matchSource === 'term') {
++sourceTermExactMatchCount;
break;
}
@@ -1278,13 +1278,15 @@ class Translator {
return;
}
for (const newSource of newSources) {
- const {originalText, transformedText, deinflectedText, isPrimary} = newSource;
+ const {originalText, transformedText, deinflectedText, matchType, matchSource, isPrimary} = newSource;
let has = false;
for (const source of sources) {
if (
source.deinflectedText === deinflectedText &&
source.transformedText === transformedText &&
- source.originalText === originalText
+ source.originalText === originalText &&
+ source.matchType === matchType &&
+ source.matchSource === matchSource
) {
if (isPrimary) { source.isPrimary = true; }
has = true;