aboutsummaryrefslogtreecommitdiff
path: root/ext/js/language
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-02-27 22:27:00 -0500
committerGitHub <noreply@github.com>2021-02-27 22:27:00 -0500
commite4a4e5f85f61775ff61ae741d3ba6f28924637cb (patch)
tree0508d6b777b3595fb623a6a0f292428c9fd3eab6 /ext/js/language
parent7d6915ec3b8d5185782f7001c89e2b529b4ebded (diff)
Improve term meta ordering (#1455)
* Update implementation of _buildTermMeta * Remove old implementation * Expose dictionaryPriority on frequencies and pitch accents * Update how meta data is generated; add index * Update order * Update names * Expose expressionIndex as part of pitch/frequency data * Implement meta sorting * Update test data
Diffstat (limited to 'ext/js/language')
-rw-r--r--ext/js/language/translator.js179
1 files changed, 105 insertions, 74 deletions
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index 61d76b36..117007a4 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -655,70 +655,72 @@ class Translator {
// Metadata building
async _buildTermMeta(definitions, enabledDictionaryMap) {
- const addMetadataTargetInfo = (targetMap1, target, parents) => {
- let {expression, reading} = target;
- if (!reading) { reading = expression; }
-
- let targetMap2 = targetMap1.get(expression);
- if (typeof targetMap2 === 'undefined') {
- targetMap2 = new Map();
- targetMap1.set(expression, targetMap2);
- }
-
- let targets = targetMap2.get(reading);
- if (typeof targets === 'undefined') {
- targets = new Set([target, ...parents]);
- targetMap2.set(reading, targets);
- } else {
- targets.add(target);
- for (const parent of parents) {
- targets.add(parent);
+ const allDefinitions = this._getAllDefinitions(definitions);
+ const expressionMap = new Map();
+ const expressionValues = [];
+ const expressionKeys = [];
+
+ for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) {
+ for (let i = 0, ii = expressions.length; i < ii; ++i) {
+ const {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i];
+ let readingMap = expressionMap.get(expression);
+ if (typeof readingMap === 'undefined') {
+ readingMap = new Map();
+ expressionMap.set(expression, readingMap);
+ expressionValues.push(readingMap);
+ expressionKeys.push(expression);
}
- }
- };
-
- const targetMap = new Map();
- const definitionsQueue = definitions.map((definition) => ({definition, parents: []}));
- while (definitionsQueue.length > 0) {
- const {definition, parents} = definitionsQueue.shift();
- const childDefinitions = definition.definitions;
- if (Array.isArray(childDefinitions)) {
- for (const definition2 of childDefinitions) {
- definitionsQueue.push({definition: definition2, parents: [...parents, definition]});
+ let targets = readingMap.get(reading);
+ if (typeof targets === 'undefined') {
+ targets = [];
+ readingMap.set(reading, targets);
}
- } else {
- addMetadataTargetInfo(targetMap, definition, parents);
- }
-
- for (const target of definition.expressions) {
- addMetadataTargetInfo(targetMap, target, []);
+ targets.push(
+ {frequencies: frequencies1, pitches: pitches1, index: i},
+ {frequencies: frequencies2, pitches: pitches2, index: i}
+ );
}
}
- const targetMapEntries = [...targetMap.entries()];
- const uniqueExpressions = targetMapEntries.map(([expression]) => expression);
- const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap);
+ const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap);
for (const {expression, mode, data, dictionary, index} of metas) {
- const targetMap2 = targetMapEntries[index][1];
- for (const [reading, targets] of targetMap2) {
+ const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
+ const map2 = expressionValues[index];
+ for (const [reading, targets] of map2.entries()) {
switch (mode) {
case 'freq':
{
- const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data);
- if (frequencyData === null) { continue; }
- for (const {frequencies} of targets) { frequencies.push(frequencyData); }
+ let frequency = data;
+ const hasReading = (data !== null && typeof data === 'object');
+ if (hasReading) {
+ if (data.reading !== reading) { continue; }
+ frequency = data.frequency;
+ }
+ for (const {frequencies, index: expressionIndex} of targets) {
+ frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, hasReading, frequency});
+ }
}
break;
case 'pitch':
{
- const pitchData = await this._getPitchData(expression, reading, dictionary, data);
- if (pitchData === null) { continue; }
- for (const {pitches} of targets) { pitches.push(pitchData); }
+ if (data.reading !== reading) { continue; }
+ const pitches2 = [];
+ for (let {position, tags} of data.pitches) {
+ tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
+ pitches2.push({position, tags});
+ }
+ for (const {pitches, index: expressionIndex} of targets) {
+ pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, pitches: pitches2});
+ }
}
break;
}
}
}
+
+ for (const definition of allDefinitions) {
+ this._sortTermDefinitionMeta(definition);
+ }
}
async _buildKanjiMeta(definitions, enabledDictionaryMap) {
@@ -729,15 +731,20 @@ class Translator {
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
for (const {character, mode, data, dictionary, index} of metas) {
+ const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
switch (mode) {
case 'freq':
{
- const frequencyData = this._getKanjiFrequencyData(character, dictionary, data);
- definitions[index].frequencies.push(frequencyData);
+ const {frequencies} = definitions[index];
+ frequencies.push({index: frequencies.length, dictionary, dictionaryPriority, character, frequency: data});
}
break;
}
}
+
+ for (const definition of definitions) {
+ this._sortKanjiDefinitionMeta(definition);
+ }
}
async _expandTags(names, dictionary) {
@@ -806,32 +813,6 @@ class Translator {
return tagMetaList;
}
- _getTermFrequencyData(expression, reading, dictionary, data) {
- let frequency = data;
- const hasReading = (data !== null && typeof data === 'object');
- if (hasReading) {
- if (data.reading !== reading) { return null; }
- frequency = data.frequency;
- }
- return {dictionary, expression, reading, hasReading, frequency};
- }
-
- _getKanjiFrequencyData(character, dictionary, data) {
- return {dictionary, character, frequency: data};
- }
-
- async _getPitchData(expression, reading, dictionary, data) {
- if (data.reading !== reading) { return null; }
-
- const pitches = [];
- for (let {position, tags} of data.pitches) {
- tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
- pitches.push({position, tags});
- }
-
- return {expression, reading, dictionary, pitches};
- }
-
// Simple helpers
_scoreToTermFrequency(score) {
@@ -1000,6 +981,17 @@ class Translator {
return result;
}
+ _getAllDefinitions(definitions) {
+ definitions = [...definitions];
+ for (let i = 0; i < definitions.length; ++i) {
+ const childDefinitions = definitions[i].definitions;
+ if (Array.isArray(childDefinitions)) {
+ definitions.push(...childDefinitions);
+ }
+ }
+ return definitions;
+ }
+
// Reduction functions
_getTermTagsScoreSum(termTags) {
@@ -1334,6 +1326,45 @@ class Translator {
});
}
+ _sortTermDefinitionMeta(definition) {
+ const compareFunction = (v1, v2) => {
+ // Sort by dictionary
+ let i = v2.dictionaryPriority - v1.dictionaryPriority;
+ if (i !== 0) { return i; }
+
+ // Sory by expression order
+ i = v1.expressionIndex - v2.expressionIndex;
+ if (i !== 0) { return i; }
+
+ // Default order
+ i = v1.index - v2.index;
+ return i;
+ };
+
+ const {expressions, frequencies: frequencies1, pitches: pitches1} = definition;
+ frequencies1.sort(compareFunction);
+ pitches1.sort(compareFunction);
+ for (const {frequencies: frequencies2, pitches: pitches2} of expressions) {
+ frequencies2.sort(compareFunction);
+ pitches2.sort(compareFunction);
+ }
+ }
+
+ _sortKanjiDefinitionMeta(definition) {
+ const compareFunction = (v1, v2) => {
+ // Sort by dictionary
+ let i = v2.dictionaryPriority - v1.dictionaryPriority;
+ if (i !== 0) { return i; }
+
+ // Default order
+ i = v1.index - v2.index;
+ return i;
+ };
+
+ const {frequencies} = definition;
+ frequencies.sort(compareFunction);
+ }
+
// Regex functions
_applyTextReplacements(text, sourceMap, replacements) {