diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-02-27 22:27:00 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-02-27 22:27:00 -0500 | 
| commit | e4a4e5f85f61775ff61ae741d3ba6f28924637cb (patch) | |
| tree | 0508d6b777b3595fb623a6a0f292428c9fd3eab6 /ext/js | |
| parent | 7d6915ec3b8d5185782f7001c89e2b529b4ebded (diff) | |
Improve term meta ordering (#1455)
* Update implementation of _buildTermMeta
* Remove old implementation
* Expose dictionaryPriority on frequencies and pitch accents
* Update how meta data is generated; add index
* Update order
* Update names
* Expose expressionIndex as part of pitch/frequency data
* Implement meta sorting
* Update test data
Diffstat (limited to 'ext/js')
| -rw-r--r-- | ext/js/language/translator.js | 179 | 
1 files changed, 105 insertions, 74 deletions
| diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 61d76b36..117007a4 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -655,70 +655,72 @@ class Translator {      // Metadata building      async _buildTermMeta(definitions, enabledDictionaryMap) { -        const addMetadataTargetInfo = (targetMap1, target, parents) => { -            let {expression, reading} = target; -            if (!reading) { reading = expression; } - -            let targetMap2 = targetMap1.get(expression); -            if (typeof targetMap2 === 'undefined') { -                targetMap2 = new Map(); -                targetMap1.set(expression, targetMap2); -            } - -            let targets = targetMap2.get(reading); -            if (typeof targets === 'undefined') { -                targets = new Set([target, ...parents]); -                targetMap2.set(reading, targets); -            } else { -                targets.add(target); -                for (const parent of parents) { -                    targets.add(parent); +        const allDefinitions = this._getAllDefinitions(definitions); +        const expressionMap = new Map(); +        const expressionValues = []; +        const expressionKeys = []; + +        for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) { +            for (let i = 0, ii = expressions.length; i < ii; ++i) { +                const {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i]; +                let readingMap = expressionMap.get(expression); +                if (typeof readingMap === 'undefined') { +                    readingMap = new Map(); +                    expressionMap.set(expression, readingMap); +                    expressionValues.push(readingMap); +                    expressionKeys.push(expression);                  } -            } -        }; - -        const targetMap = new Map(); -        const definitionsQueue = definitions.map((definition) => ({definition, parents: []})); -        while (definitionsQueue.length > 0) { -            const {definition, parents} = definitionsQueue.shift(); -            const childDefinitions = definition.definitions; -            if (Array.isArray(childDefinitions)) { -                for (const definition2 of childDefinitions) { -                    definitionsQueue.push({definition: definition2, parents: [...parents, definition]}); +                let targets = readingMap.get(reading); +                if (typeof targets === 'undefined') { +                    targets = []; +                    readingMap.set(reading, targets);                  } -            } else { -                addMetadataTargetInfo(targetMap, definition, parents); -            } - -            for (const target of definition.expressions) { -                addMetadataTargetInfo(targetMap, target, []); +                targets.push( +                    {frequencies: frequencies1, pitches: pitches1, index: i}, +                    {frequencies: frequencies2, pitches: pitches2, index: i} +                );              }          } -        const targetMapEntries = [...targetMap.entries()]; -        const uniqueExpressions = targetMapEntries.map(([expression]) => expression); -        const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap); +        const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap);          for (const {expression, mode, data, dictionary, index} of metas) { -            const targetMap2 = targetMapEntries[index][1]; -            for (const [reading, targets] of targetMap2) { +            const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap); +            const map2 = expressionValues[index]; +            for (const [reading, targets] of map2.entries()) {                  switch (mode) {                      case 'freq':                          { -                            const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data); -                            if (frequencyData === null) { continue; } -                            for (const {frequencies} of targets) { frequencies.push(frequencyData); } +                            let frequency = data; +                            const hasReading = (data !== null && typeof data === 'object'); +                            if (hasReading) { +                                if (data.reading !== reading) { continue; } +                                frequency = data.frequency; +                            } +                            for (const {frequencies, index: expressionIndex} of targets) { +                                frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, hasReading, frequency}); +                            }                          }                          break;                      case 'pitch':                          { -                            const pitchData = await this._getPitchData(expression, reading, dictionary, data); -                            if (pitchData === null) { continue; } -                            for (const {pitches} of targets) { pitches.push(pitchData); } +                            if (data.reading !== reading) { continue; } +                            const pitches2 = []; +                            for (let {position, tags} of data.pitches) { +                                tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : []; +                                pitches2.push({position, tags}); +                            } +                            for (const {pitches, index: expressionIndex} of targets) { +                                pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, pitches: pitches2}); +                            }                          }                          break;                  }              }          } + +        for (const definition of allDefinitions) { +            this._sortTermDefinitionMeta(definition); +        }      }      async _buildKanjiMeta(definitions, enabledDictionaryMap) { @@ -729,15 +731,20 @@ class Translator {          const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);          for (const {character, mode, data, dictionary, index} of metas) { +            const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);              switch (mode) {                  case 'freq':                      { -                        const frequencyData = this._getKanjiFrequencyData(character, dictionary, data); -                        definitions[index].frequencies.push(frequencyData); +                        const {frequencies} = definitions[index]; +                        frequencies.push({index: frequencies.length, dictionary, dictionaryPriority, character, frequency: data});                      }                      break;              }          } + +        for (const definition of definitions) { +            this._sortKanjiDefinitionMeta(definition); +        }      }      async _expandTags(names, dictionary) { @@ -806,32 +813,6 @@ class Translator {          return tagMetaList;      } -    _getTermFrequencyData(expression, reading, dictionary, data) { -        let frequency = data; -        const hasReading = (data !== null && typeof data === 'object'); -        if (hasReading) { -            if (data.reading !== reading) { return null; } -            frequency = data.frequency; -        } -        return {dictionary, expression, reading, hasReading, frequency}; -    } - -    _getKanjiFrequencyData(character, dictionary, data) { -        return {dictionary, character, frequency: data}; -    } - -    async _getPitchData(expression, reading, dictionary, data) { -        if (data.reading !== reading) { return null; } - -        const pitches = []; -        for (let {position, tags} of data.pitches) { -            tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : []; -            pitches.push({position, tags}); -        } - -        return {expression, reading, dictionary, pitches}; -    } -      // Simple helpers      _scoreToTermFrequency(score) { @@ -1000,6 +981,17 @@ class Translator {          return result;      } +    _getAllDefinitions(definitions) { +        definitions = [...definitions]; +        for (let i = 0; i < definitions.length; ++i) { +            const childDefinitions = definitions[i].definitions; +            if (Array.isArray(childDefinitions)) { +                definitions.push(...childDefinitions); +            } +        } +        return definitions; +    } +      // Reduction functions      _getTermTagsScoreSum(termTags) { @@ -1334,6 +1326,45 @@ class Translator {          });      } +    _sortTermDefinitionMeta(definition) { +        const compareFunction = (v1, v2) => { +            // Sort by dictionary +            let i = v2.dictionaryPriority - v1.dictionaryPriority; +            if (i !== 0) { return i; } + +            // Sory by expression order +            i = v1.expressionIndex - v2.expressionIndex; +            if (i !== 0) { return i; } + +            // Default order +            i = v1.index - v2.index; +            return i; +        }; + +        const {expressions, frequencies: frequencies1, pitches: pitches1} = definition; +        frequencies1.sort(compareFunction); +        pitches1.sort(compareFunction); +        for (const {frequencies: frequencies2, pitches: pitches2} of expressions) { +            frequencies2.sort(compareFunction); +            pitches2.sort(compareFunction); +        } +    } + +    _sortKanjiDefinitionMeta(definition) { +        const compareFunction = (v1, v2) => { +            // Sort by dictionary +            let i = v2.dictionaryPriority - v1.dictionaryPriority; +            if (i !== 0) { return i; } + +            // Default order +            i = v1.index - v2.index; +            return i; +        }; + +        const {frequencies} = definition; +        frequencies.sort(compareFunction); +    } +      // Regex functions      _applyTextReplacements(text, sourceMap, replacements) { |