summaryrefslogtreecommitdiff
path: root/ext/js/language/dictionary-data-util.js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-03-25 19:55:31 -0400
committerGitHub <noreply@github.com>2021-03-25 19:55:31 -0400
commit4be5c8fd9f7860e701d0b7d3c8c0ee934bc60a4f (patch)
treedcd78316afdf00bbb67d3d1aa6555a9c8ea3efec /ext/js/language/dictionary-data-util.js
parente7035dcff41d94f20c0bc8865d413412afc7c229 (diff)
Refactor Translator and dictionary entry format (#1553)
* Update test data * Move translator.js * Create new version of Translator * Update Backend * Update DictionaryDataUtil * Update DisplayGenerator * Create AnkiNoteDataCreator * Replace AnkiNoteData with AnkiNoteDataCreator * Update tests * Remove AnkiNoteData * Update test data * Remove translator-old.js * Add TypeScript interface definitions for the new translator data format
Diffstat (limited to 'ext/js/language/dictionary-data-util.js')
-rw-r--r--ext/js/language/dictionary-data-util.js136
1 files changed, 78 insertions, 58 deletions
diff --git a/ext/js/language/dictionary-data-util.js b/ext/js/language/dictionary-data-util.js
index dff9d212..f44b81c5 100644
--- a/ext/js/language/dictionary-data-util.js
+++ b/ext/js/language/dictionary-data-util.js
@@ -16,40 +16,41 @@
*/
class DictionaryDataUtil {
- static groupTermTags(definition) {
- const {expressions} = definition;
- const expressionsLength = expressions.length;
- const uniqueCheck = (expressionsLength > 1);
- const resultsMap = new Map();
+ static groupTermTags(dictionaryEntry) {
+ const {headwords} = dictionaryEntry;
+ const headwordCount = headwords.length;
+ const uniqueCheck = (headwordCount > 1);
+ const resultsIndexMap = new Map();
const results = [];
- for (let i = 0; i < expressionsLength; ++i) {
- const {termTags, expression, reading} = expressions[i];
- for (const tag of termTags) {
+ for (let i = 0; i < headwordCount; ++i) {
+ const {tags} = headwords[i];
+ for (const tag of tags) {
if (uniqueCheck) {
const {name, category, notes, dictionary} = tag;
const key = this._createMapKey([name, category, notes, dictionary]);
- const index = resultsMap.get(key);
+ const index = resultsIndexMap.get(key);
if (typeof index !== 'undefined') {
const existingItem = results[index];
- existingItem.expressions.push({index: i, expression, reading});
+ existingItem.headwordIndices.push(i);
continue;
}
- resultsMap.set(key, results.length);
+ resultsIndexMap.set(key, results.length);
}
- const item = {
- tag,
- expressions: [{index: i, expression, reading}]
- };
+ const item = {tag, headwordIndices: [i]};
results.push(item);
}
}
return results;
}
- static groupTermFrequencies(frequencies) {
+ static groupTermFrequencies(dictionaryEntry) {
+ const {headwords, frequencies} = dictionaryEntry;
+
const map1 = new Map();
- for (const {dictionary, expression, reading, hasReading, frequency} of frequencies) {
+ for (const {headwordIndex, dictionary, hasReading, frequency} of frequencies) {
+ const {term, reading} = headwords[headwordIndex];
+
let map2 = map1.get(dictionary);
if (typeof map2 === 'undefined') {
map2 = new Map();
@@ -57,14 +58,14 @@ class DictionaryDataUtil {
}
const readingKey = hasReading ? reading : null;
- const key = this._createMapKey([expression, readingKey]);
+ const key = this._createMapKey([term, readingKey]);
let frequencyData = map2.get(key);
if (typeof frequencyData === 'undefined') {
- frequencyData = {expression, reading: readingKey, frequencies: new Set()};
+ frequencyData = {term, reading: readingKey, values: new Set()};
map2.set(key, frequencyData);
}
- frequencyData.frequencies.add(frequency);
+ frequencyData.values.add(frequency);
}
return this._createFrequencyGroupsFromMap(map1);
}
@@ -80,64 +81,66 @@ class DictionaryDataUtil {
let frequencyData = map2.get(character);
if (typeof frequencyData === 'undefined') {
- frequencyData = {character, frequencies: new Set()};
+ frequencyData = {character, values: new Set()};
map2.set(character, frequencyData);
}
- frequencyData.frequencies.add(frequency);
+ frequencyData.values.add(frequency);
}
return this._createFrequencyGroupsFromMap(map1);
}
- static getPitchAccentInfos(definition) {
- if (definition.type === 'kanji') { return []; }
+ static getPitchAccentInfos(dictionaryEntry) {
+ const {headwords, pronunciations} = dictionaryEntry;
- const results = new Map();
const allExpressions = new Set();
const allReadings = new Set();
-
- for (const {expression, reading, pitches: expressionPitches} of definition.expressions) {
- allExpressions.add(expression);
+ for (const {term, reading} of headwords) {
+ allExpressions.add(term);
allReadings.add(reading);
+ }
- for (const {pitches, dictionary} of expressionPitches) {
- let dictionaryResults = results.get(dictionary);
- if (typeof dictionaryResults === 'undefined') {
- dictionaryResults = [];
- results.set(dictionary, dictionaryResults);
- }
-
- for (const {position, tags} of pitches) {
- let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryResults);
- if (pitchAccentInfo === null) {
- pitchAccentInfo = {expressions: new Set(), reading, position, tags};
- dictionaryResults.push(pitchAccentInfo);
- }
- pitchAccentInfo.expressions.add(expression);
+ const pitchAccentInfoMap = new Map();
+ for (const {headwordIndex, dictionary, pitches} of pronunciations) {
+ const {term, reading} = headwords[headwordIndex];
+ let dictionaryPitchAccentInfoList = pitchAccentInfoMap.get(dictionary);
+ if (typeof dictionaryPitchAccentInfoList === 'undefined') {
+ dictionaryPitchAccentInfoList = [];
+ pitchAccentInfoMap.set(dictionary, dictionaryPitchAccentInfoList);
+ }
+ for (const {position, tags} of pitches) {
+ let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryPitchAccentInfoList);
+ if (pitchAccentInfo === null) {
+ pitchAccentInfo = {
+ terms: new Set(),
+ reading,
+ position,
+ tags,
+ exclusiveTerms: [],
+ exclusiveReadings: []
+ };
+ dictionaryPitchAccentInfoList.push(pitchAccentInfo);
}
+ pitchAccentInfo.terms.add(term);
}
}
const multipleReadings = (allReadings.size > 1);
- for (const dictionaryResults of results.values()) {
- for (const result of dictionaryResults) {
- const exclusiveExpressions = [];
- const exclusiveReadings = [];
- const resultExpressions = result.expressions;
- if (!this._areSetsEqual(resultExpressions, allExpressions)) {
- exclusiveExpressions.push(...this._getSetIntersection(resultExpressions, allExpressions));
+ for (const dictionaryPitchAccentInfoList of pitchAccentInfoMap.values()) {
+ for (const pitchAccentInfo of dictionaryPitchAccentInfoList) {
+ const {terms, reading, exclusiveTerms, exclusiveReadings} = pitchAccentInfo;
+ if (!this._areSetsEqual(terms, allExpressions)) {
+ exclusiveTerms.push(...this._getSetIntersection(terms, allExpressions));
}
if (multipleReadings) {
- exclusiveReadings.push(result.reading);
+ exclusiveReadings.push(reading);
}
- result.expressions = [...resultExpressions];
- result.exclusiveExpressions = exclusiveExpressions;
- result.exclusiveReadings = exclusiveReadings;
+ pitchAccentInfo.terms = [...terms];
}
}
const results2 = [];
- for (const [dictionary, pitches] of results.entries()) {
+ for (const [dictionary, pitches] of pitchAccentInfoMap.entries()) {
results2.push({dictionary, pitches});
}
return results2;
@@ -157,17 +160,34 @@ class DictionaryDataUtil {
}
}
+ static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) {
+ if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; }
+
+ const terms = new Set();
+ const readings = new Set();
+ for (const headwordIndex of headwordIndices) {
+ const {term, reading} = headwords[headwordIndex];
+ terms.add(term);
+ readings.add(reading);
+ }
+
+ const disambiguations = [];
+ if (!this._areSetsEqual(terms, allTermsSet)) { disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); }
+ if (!this._areSetsEqual(readings, allReadingsSet)) { disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); }
+ return disambiguations;
+ }
+
// Private
static _createFrequencyGroupsFromMap(map) {
const results = [];
for (const [dictionary, map2] of map.entries()) {
- const frequencyDataArray = [];
+ const frequencies = [];
for (const frequencyData of map2.values()) {
- frequencyData.frequencies = [...frequencyData.frequencies];
- frequencyDataArray.push(frequencyData);
+ frequencyData.values = [...frequencyData.values];
+ frequencies.push(frequencyData);
}
- results.push({dictionary, frequencyData: frequencyDataArray});
+ results.push({dictionary, frequencies});
}
return results;
}