From 858fe7ae11850eaafb3e024289faf0c78e083abf Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Thu, 14 Oct 2021 21:26:53 -0400 Subject: String frequency support (#1989) * Restore support for string frequency values * Add support for {value, displayValue} frequencies * Update test data * Improve number parsing of string frequencies * Improve reading detection * Expose a displayValue property for frequency information * Update docs * Expose displayValue to Anki note data * Fix translator * Update display generation * Update test data * Update counts --- .../dictionary-kanji-meta-bank-v3-schema.json | 26 ++++++++- .../dictionary-term-meta-bank-v3-schema.json | 28 +++++++++- ext/display-templates.html | 4 +- ext/js/data/sandbox/anki-note-data-creator.js | 12 ++--- ext/js/display/display-generator.js | 44 +++++++++++++--- ext/js/language/sandbox/dictionary-data-util.js | 14 ++--- ext/js/language/translator.js | 61 ++++++++++++++++------ 7 files changed, 147 insertions(+), 42 deletions(-) (limited to 'ext') diff --git a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json index 49f7c813..0864c9aa 100644 --- a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json @@ -1,5 +1,29 @@ { "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "frequency": { + "oneOf": [ + { + "type": ["string", "number"] + }, + { + "type": "object", + "additionalProperties": false, + "required": [ + "value" + ], + "properties": { + "value": { + "type": "number" + }, + "displayValue": { + "type": "string" + } + } + } + ] + } + }, "type": "array", "description": "Custom metadata for kanji characters.", "additionalItems": { @@ -17,7 +41,7 @@ "description": "Type of data. \"freq\" corresponds to frequency information." }, { - "type": ["number"], + "$ref": "#/definitions/frequency", "description": "Data for the character." } ] diff --git a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json index 206e7152..96f2e54b 100644 --- a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json @@ -1,5 +1,29 @@ { "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "frequency": { + "oneOf": [ + { + "type": ["string", "number"] + }, + { + "type": "object", + "additionalProperties": false, + "required": [ + "value" + ], + "properties": { + "value": { + "type": "number" + }, + "displayValue": { + "type": "string" + } + } + } + ] + } + }, "type": "array", "description": "Custom metadata for terms.", "additionalItems": { @@ -28,7 +52,7 @@ { "oneOf": [ { - "type": ["number"], + "$ref": "#/definitions/frequency", "description": "Frequency information for the term." }, { @@ -44,7 +68,7 @@ "description": "Reading for the term." }, "frequency": { - "type": ["number"], + "$ref": "#/definitions/frequency", "description": "Frequency information for the term." } } diff --git a/ext/display-templates.html b/ext/display-templates.html index 534ad704..c181a64e 100644 --- a/ext/display-templates.html +++ b/ext/display-templates.html @@ -71,13 +71,13 @@ - + diff --git a/ext/js/data/sandbox/anki-note-data-creator.js b/ext/js/data/sandbox/anki-note-data-creator.js index b7abc0c5..8d363134 100644 --- a/ext/js/data/sandbox/anki-note-data-creator.js +++ b/ext/js/data/sandbox/anki-note-data-creator.js @@ -243,7 +243,7 @@ class AnkiNoteDataCreator { _getKanjiFrequencies(dictionaryEntry) { const results = []; - for (const {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency} of dictionaryEntry.frequencies) { + for (const {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency, displayValue, displayValueParsed} of dictionaryEntry.frequencies) { results.push({ index, dictionary, @@ -252,7 +252,7 @@ class AnkiNoteDataCreator { priority: dictionaryPriority }, character, - frequency + frequency: displayValueParsed ? displayValue : frequency }); } return results; @@ -374,7 +374,7 @@ class AnkiNoteDataCreator { _getTermFrequencies(dictionaryEntry) { const results = []; const {headwords} = dictionaryEntry; - for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of dictionaryEntry.frequencies) { + for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed} of dictionaryEntry.frequencies) { const {term, reading} = headwords[headwordIndex]; results.push({ index: results.length, @@ -387,7 +387,7 @@ class AnkiNoteDataCreator { expression: term, reading, hasReading, - frequency + frequency: displayValueParsed ? displayValue : frequency }); } return results; @@ -459,7 +459,7 @@ class AnkiNoteDataCreator { _getTermExpressionFrequencies(dictionaryEntry, i) { const results = []; const {headwords, frequencies} = dictionaryEntry; - for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of frequencies) { + for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed} of frequencies) { if (headwordIndex !== i) { continue; } const {term, reading} = headwords[headwordIndex]; results.push({ @@ -473,7 +473,7 @@ class AnkiNoteDataCreator { expression: term, reading, hasReading, - frequency + frequency: displayValueParsed ? displayValue : frequency }); } return results; diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js index fe899e53..fc377df1 100644 --- a/ext/js/display/display-generator.js +++ b/ext/js/display/display-generator.js @@ -548,18 +548,15 @@ class DisplayGenerator { this._setTextContent(node.querySelector('.tag-label-content'), dictionary); - const frequency = values.join(', '); - this._setTextContent(node.querySelector('.frequency-disambiguation-term'), term, 'ja'); this._setTextContent(node.querySelector('.frequency-disambiguation-reading'), (reading !== null ? reading : ''), 'ja'); - this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja'); + this._populateFrequencyValueList(node.querySelector('.frequency-value-list'), values); node.dataset.term = term; node.dataset.reading = reading; node.dataset.hasReading = `${reading !== null}`; node.dataset.readingIsSame = `${reading === term}`; node.dataset.dictionary = dictionary; - node.dataset.frequency = `${frequency}`; node.dataset.details = dictionary; return node; @@ -569,19 +566,50 @@ class DisplayGenerator { const {character, values} = details; const node = this._templates.instantiate('kanji-frequency-item'); - const frequency = values.join(', '); - this._setTextContent(node.querySelector('.tag-label-content'), dictionary); - this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja'); + this._populateFrequencyValueList(node.querySelector('.frequency-value-list'), values); node.dataset.character = character; node.dataset.dictionary = dictionary; - node.dataset.frequency = `${frequency}`; node.dataset.details = dictionary; return node; } + _populateFrequencyValueList(node, values) { + let fullFrequency = ''; + for (let i = 0, ii = values.length; i < ii; ++i) { + const {frequency, displayValue} = values[i]; + const frequencyString = `${frequency}`; + const text = displayValue !== null ? displayValue : frequency; + + if (i > 0) { + const node2 = document.createElement('span'); + node2.className = 'frequency-value'; + node2.dataset.frequency = `${frequency}`; + node2.textContent = ', '; + node.appendChild(node2); + fullFrequency += ', '; + } + + const node2 = document.createElement('span'); + node2.className = 'frequency-value'; + node2.dataset.frequency = frequencyString; + if (displayValue !== null) { + node2.dataset.displayValue = `${displayValue}`; + if (displayValue !== frequencyString) { + node2.title = frequencyString; + } + } + this._setTextContent(node2, text, 'ja'); + node.appendChild(node2); + + fullFrequency += text; + } + + node.dataset.frequency = fullFrequency; + } + _appendKanjiLinks(container, text) { const jp = this._japaneseUtil; let part = ''; diff --git a/ext/js/language/sandbox/dictionary-data-util.js b/ext/js/language/sandbox/dictionary-data-util.js index 68b15c48..83d94b9b 100644 --- a/ext/js/language/sandbox/dictionary-data-util.js +++ b/ext/js/language/sandbox/dictionary-data-util.js @@ -48,7 +48,7 @@ class DictionaryDataUtil { const {headwords, frequencies} = dictionaryEntry; const map1 = new Map(); - for (const {headwordIndex, dictionary, hasReading, frequency} of frequencies) { + for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of frequencies) { const {term, reading} = headwords[headwordIndex]; let map2 = map1.get(dictionary); @@ -61,18 +61,18 @@ class DictionaryDataUtil { const key = this._createMapKey([term, readingKey]); let frequencyData = map2.get(key); if (typeof frequencyData === 'undefined') { - frequencyData = {term, reading: readingKey, values: new Set()}; + frequencyData = {term, reading: readingKey, values: new Map()}; map2.set(key, frequencyData); } - frequencyData.values.add(frequency); + frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); } return this._createFrequencyGroupsFromMap(map1); } static groupKanjiFrequencies(frequencies) { const map1 = new Map(); - for (const {dictionary, character, frequency} of frequencies) { + for (const {dictionary, character, frequency, displayValue} of frequencies) { let map2 = map1.get(dictionary); if (typeof map2 === 'undefined') { map2 = new Map(); @@ -81,11 +81,11 @@ class DictionaryDataUtil { let frequencyData = map2.get(character); if (typeof frequencyData === 'undefined') { - frequencyData = {character, values: new Set()}; + frequencyData = {character, values: new Map()}; map2.set(character, frequencyData); } - frequencyData.values.add(frequency); + frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); } return this._createFrequencyGroupsFromMap(map1); } @@ -222,7 +222,7 @@ class DictionaryDataUtil { for (const [dictionary, map2] of map.entries()) { const frequencies = []; for (const frequencyData of map2.values()) { - frequencyData.values = [...frequencyData.values]; + frequencyData.values = [...frequencyData.values.values()]; frequencies.push(frequencyData); } results.push({dictionary, frequencies}); diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 056ff3a7..28e1cfcc 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -36,6 +36,7 @@ class Translator { this._deinflector = null; this._tagCache = new Map(); this._stringComparer = new Intl.Collator('en-US'); // Invariant locale + this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/; } /** @@ -853,12 +854,15 @@ class Translator { case 'freq': { let frequency = data; - const hasReading = (data !== null && typeof data === 'object'); + const hasReading = (data !== null && typeof data === 'object' && typeof data.reading === 'string'); if (hasReading) { if (data.reading !== reading) { continue; } frequency = data.frequency; } for (const {frequencies, headwordIndex} of targets) { + let displayValue; + let displayValueParsed; + ({frequency, displayValue, displayValueParsed} = this._getFrequencyInfo(frequency)); frequencies.push(this._createTermFrequency( frequencies.length, headwordIndex, @@ -866,7 +870,9 @@ class Translator { dictionaryIndex, dictionaryPriority, hasReading, - this._convertFrequency(frequency) + frequency, + displayValue, + displayValueParsed )); } } @@ -914,13 +920,16 @@ class Translator { case 'freq': { const {frequencies} = dictionaryEntries[index]; + const {frequency, displayValue, displayValueParsed} = this._getFrequencyInfo(data); frequencies.push(this._createKanjiFrequency( frequencies.length, dictionary, dictionaryIndex, dictionaryPriority, character, - this._convertFrequency(data) + frequency, + displayValue, + displayValueParsed )); } break; @@ -971,16 +980,36 @@ class Translator { }); } - _convertFrequency(value) { - switch (typeof value) { - case 'number': - return value; - case 'string': - value = Number.parseFloat(value); - return Number.isFinite(value) ? value : 0; - default: - return 0; + _convertStringToNumber(value) { + const match = this._numberRegex.exec(value); + if (match === null) { return 0; } + value = Number.parseFloat(match[0]); + return Number.isFinite(value) ? value : 0; + } + + _getFrequencyInfo(frequency) { + let displayValue = null; + let displayValueParsed = false; + if (typeof frequency === 'object' && frequency !== null) { + ({value: frequency, displayValue} = frequency); + if (typeof frequency !== 'number') { frequency = 0; } + if (typeof displayValue !== 'string') { displayValue = null; } + } else { + switch (typeof frequency) { + case 'number': + // No change + break; + case 'string': + displayValue = frequency; + displayValueParsed = true; + frequency = this._convertStringToNumber(frequency); + break; + default: + frequency = 0; + break; + } } + return {frequency, displayValue, displayValueParsed}; } // Helpers @@ -1048,8 +1077,8 @@ class Translator { }; } - _createKanjiFrequency(index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency) { - return {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency}; + _createKanjiFrequency(index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency, displayValue, displayValueParsed) { + return {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency, displayValue, displayValueParsed}; } _createKanjiDictionaryEntry(character, dictionary, onyomi, kunyomi, tags, stats, definitions) { @@ -1114,8 +1143,8 @@ class Translator { return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches}; } - _createTermFrequency(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency) { - return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency}; + _createTermFrequency(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed) { + return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed}; } _createTermDictionaryEntry(isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) { -- cgit v1.2.3