diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-02-27 22:27:00 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-27 22:27:00 -0500 |
commit | e4a4e5f85f61775ff61ae741d3ba6f28924637cb (patch) | |
tree | 0508d6b777b3595fb623a6a0f292428c9fd3eab6 /ext/js/language/translator.js | |
parent | 7d6915ec3b8d5185782f7001c89e2b529b4ebded (diff) |
Improve term meta ordering (#1455)
* Update implementation of _buildTermMeta
* Remove old implementation
* Expose dictionaryPriority on frequencies and pitch accents
* Update how meta data is generated; add index
* Update order
* Update names
* Expose expressionIndex as part of pitch/frequency data
* Implement meta sorting
* Update test data
Diffstat (limited to 'ext/js/language/translator.js')
-rw-r--r-- | ext/js/language/translator.js | 179 |
1 files changed, 105 insertions, 74 deletions
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 61d76b36..117007a4 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -655,70 +655,72 @@ class Translator { // Metadata building async _buildTermMeta(definitions, enabledDictionaryMap) { - const addMetadataTargetInfo = (targetMap1, target, parents) => { - let {expression, reading} = target; - if (!reading) { reading = expression; } - - let targetMap2 = targetMap1.get(expression); - if (typeof targetMap2 === 'undefined') { - targetMap2 = new Map(); - targetMap1.set(expression, targetMap2); - } - - let targets = targetMap2.get(reading); - if (typeof targets === 'undefined') { - targets = new Set([target, ...parents]); - targetMap2.set(reading, targets); - } else { - targets.add(target); - for (const parent of parents) { - targets.add(parent); + const allDefinitions = this._getAllDefinitions(definitions); + const expressionMap = new Map(); + const expressionValues = []; + const expressionKeys = []; + + for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) { + for (let i = 0, ii = expressions.length; i < ii; ++i) { + const {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i]; + let readingMap = expressionMap.get(expression); + if (typeof readingMap === 'undefined') { + readingMap = new Map(); + expressionMap.set(expression, readingMap); + expressionValues.push(readingMap); + expressionKeys.push(expression); } - } - }; - - const targetMap = new Map(); - const definitionsQueue = definitions.map((definition) => ({definition, parents: []})); - while (definitionsQueue.length > 0) { - const {definition, parents} = definitionsQueue.shift(); - const childDefinitions = definition.definitions; - if (Array.isArray(childDefinitions)) { - for (const definition2 of childDefinitions) { - definitionsQueue.push({definition: definition2, parents: [...parents, definition]}); + let targets = readingMap.get(reading); + if (typeof targets === 'undefined') { + targets = []; + readingMap.set(reading, targets); } - } else { - addMetadataTargetInfo(targetMap, definition, parents); - } - - for (const target of definition.expressions) { - addMetadataTargetInfo(targetMap, target, []); + targets.push( + {frequencies: frequencies1, pitches: pitches1, index: i}, + {frequencies: frequencies2, pitches: pitches2, index: i} + ); } } - const targetMapEntries = [...targetMap.entries()]; - const uniqueExpressions = targetMapEntries.map(([expression]) => expression); - const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap); + const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap); for (const {expression, mode, data, dictionary, index} of metas) { - const targetMap2 = targetMapEntries[index][1]; - for (const [reading, targets] of targetMap2) { + const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap); + const map2 = expressionValues[index]; + for (const [reading, targets] of map2.entries()) { switch (mode) { case 'freq': { - const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data); - if (frequencyData === null) { continue; } - for (const {frequencies} of targets) { frequencies.push(frequencyData); } + let frequency = data; + const hasReading = (data !== null && typeof data === 'object'); + if (hasReading) { + if (data.reading !== reading) { continue; } + frequency = data.frequency; + } + for (const {frequencies, index: expressionIndex} of targets) { + frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, hasReading, frequency}); + } } break; case 'pitch': { - const pitchData = await this._getPitchData(expression, reading, dictionary, data); - if (pitchData === null) { continue; } - for (const {pitches} of targets) { pitches.push(pitchData); } + if (data.reading !== reading) { continue; } + const pitches2 = []; + for (let {position, tags} of data.pitches) { + tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : []; + pitches2.push({position, tags}); + } + for (const {pitches, index: expressionIndex} of targets) { + pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, pitches: pitches2}); + } } break; } } } + + for (const definition of allDefinitions) { + this._sortTermDefinitionMeta(definition); + } } async _buildKanjiMeta(definitions, enabledDictionaryMap) { @@ -729,15 +731,20 @@ class Translator { const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap); for (const {character, mode, data, dictionary, index} of metas) { + const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap); switch (mode) { case 'freq': { - const frequencyData = this._getKanjiFrequencyData(character, dictionary, data); - definitions[index].frequencies.push(frequencyData); + const {frequencies} = definitions[index]; + frequencies.push({index: frequencies.length, dictionary, dictionaryPriority, character, frequency: data}); } break; } } + + for (const definition of definitions) { + this._sortKanjiDefinitionMeta(definition); + } } async _expandTags(names, dictionary) { @@ -806,32 +813,6 @@ class Translator { return tagMetaList; } - _getTermFrequencyData(expression, reading, dictionary, data) { - let frequency = data; - const hasReading = (data !== null && typeof data === 'object'); - if (hasReading) { - if (data.reading !== reading) { return null; } - frequency = data.frequency; - } - return {dictionary, expression, reading, hasReading, frequency}; - } - - _getKanjiFrequencyData(character, dictionary, data) { - return {dictionary, character, frequency: data}; - } - - async _getPitchData(expression, reading, dictionary, data) { - if (data.reading !== reading) { return null; } - - const pitches = []; - for (let {position, tags} of data.pitches) { - tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : []; - pitches.push({position, tags}); - } - - return {expression, reading, dictionary, pitches}; - } - // Simple helpers _scoreToTermFrequency(score) { @@ -1000,6 +981,17 @@ class Translator { return result; } + _getAllDefinitions(definitions) { + definitions = [...definitions]; + for (let i = 0; i < definitions.length; ++i) { + const childDefinitions = definitions[i].definitions; + if (Array.isArray(childDefinitions)) { + definitions.push(...childDefinitions); + } + } + return definitions; + } + // Reduction functions _getTermTagsScoreSum(termTags) { @@ -1334,6 +1326,45 @@ class Translator { }); } + _sortTermDefinitionMeta(definition) { + const compareFunction = (v1, v2) => { + // Sort by dictionary + let i = v2.dictionaryPriority - v1.dictionaryPriority; + if (i !== 0) { return i; } + + // Sory by expression order + i = v1.expressionIndex - v2.expressionIndex; + if (i !== 0) { return i; } + + // Default order + i = v1.index - v2.index; + return i; + }; + + const {expressions, frequencies: frequencies1, pitches: pitches1} = definition; + frequencies1.sort(compareFunction); + pitches1.sort(compareFunction); + for (const {frequencies: frequencies2, pitches: pitches2} of expressions) { + frequencies2.sort(compareFunction); + pitches2.sort(compareFunction); + } + } + + _sortKanjiDefinitionMeta(definition) { + const compareFunction = (v1, v2) => { + // Sort by dictionary + let i = v2.dictionaryPriority - v1.dictionaryPriority; + if (i !== 0) { return i; } + + // Default order + i = v1.index - v2.index; + return i; + }; + + const {frequencies} = definition; + frequencies.sort(compareFunction); + } + // Regex functions _applyTextReplacements(text, sourceMap, replacements) { |