aboutsummaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-10-14 21:26:53 -0400
committerGitHub <noreply@github.com>2021-10-14 21:26:53 -0400
commit75aabd983da29023b8423bd44d565202aad6b664 (patch)
tree754fbe5457747cd72fcf13767fc3176fc0d8f280 /ext
parent11f7591a7f5fcbfc3a3e631eaac0addb228c988a (diff)
String frequency support (#1989)
* Restore support for string frequency values * Add support for {value, displayValue} frequencies * Update test data * Improve number parsing of string frequencies * Improve reading detection * Expose a displayValue property for frequency information * Update docs * Expose displayValue to Anki note data * Fix translator * Update display generation * Update test data * Update counts
Diffstat (limited to 'ext')
-rw-r--r--ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json26
-rw-r--r--ext/data/schemas/dictionary-term-meta-bank-v3-schema.json28
-rw-r--r--ext/display-templates.html4
-rw-r--r--ext/js/data/sandbox/anki-note-data-creator.js12
-rw-r--r--ext/js/display/display-generator.js44
-rw-r--r--ext/js/language/sandbox/dictionary-data-util.js14
-rw-r--r--ext/js/language/translator.js61
7 files changed, 147 insertions, 42 deletions
diff --git a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
index 49f7c813..0864c9aa 100644
--- a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
@@ -1,5 +1,29 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
+ "definitions": {
+ "frequency": {
+ "oneOf": [
+ {
+ "type": ["string", "number"]
+ },
+ {
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "value"
+ ],
+ "properties": {
+ "value": {
+ "type": "number"
+ },
+ "displayValue": {
+ "type": "string"
+ }
+ }
+ }
+ ]
+ }
+ },
"type": "array",
"description": "Custom metadata for kanji characters.",
"additionalItems": {
@@ -17,7 +41,7 @@
"description": "Type of data. \"freq\" corresponds to frequency information."
},
{
- "type": ["number"],
+ "$ref": "#/definitions/frequency",
"description": "Data for the character."
}
]
diff --git a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
index 206e7152..96f2e54b 100644
--- a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
@@ -1,5 +1,29 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
+ "definitions": {
+ "frequency": {
+ "oneOf": [
+ {
+ "type": ["string", "number"]
+ },
+ {
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "value"
+ ],
+ "properties": {
+ "value": {
+ "type": "number"
+ },
+ "displayValue": {
+ "type": "string"
+ }
+ }
+ }
+ ]
+ }
+ },
"type": "array",
"description": "Custom metadata for terms.",
"additionalItems": {
@@ -28,7 +52,7 @@
{
"oneOf": [
{
- "type": ["number"],
+ "$ref": "#/definitions/frequency",
"description": "Frequency information for the term."
},
{
@@ -44,7 +68,7 @@
"description": "Reading for the term."
},
"frequency": {
- "type": ["number"],
+ "$ref": "#/definitions/frequency",
"description": "Frequency information for the term."
}
}
diff --git a/ext/display-templates.html b/ext/display-templates.html
index 534ad704..c181a64e 100644
--- a/ext/display-templates.html
+++ b/ext/display-templates.html
@@ -71,13 +71,13 @@
<rt class="frequency-disambiguation-reading"></rt>
</ruby></span>
<span class="frequency-separator"></span>
- <span class="frequency-value"></span>
+ <span class="frequency-value-list"></span>
</span></span>
</span></span></template>
<template id="kanji-frequency-item-template" data-remove-whitespace-text="true"><span class="frequency-item"><span class="tag tag-has-body frequency-tag" data-category="frequency" data-frequency-type="kanji">
<span class="tag-label"><span class="tag-label-content"></span></span>
<span class="tag-body"><span class="tag-body-content frequency-body">
- <span class="frequency-value"></span>
+ <span class="frequency-value-list"></span>
</span></span>
</span></span></template>
diff --git a/ext/js/data/sandbox/anki-note-data-creator.js b/ext/js/data/sandbox/anki-note-data-creator.js
index b7abc0c5..8d363134 100644
--- a/ext/js/data/sandbox/anki-note-data-creator.js
+++ b/ext/js/data/sandbox/anki-note-data-creator.js
@@ -243,7 +243,7 @@ class AnkiNoteDataCreator {
_getKanjiFrequencies(dictionaryEntry) {
const results = [];
- for (const {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency} of dictionaryEntry.frequencies) {
+ for (const {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency, displayValue, displayValueParsed} of dictionaryEntry.frequencies) {
results.push({
index,
dictionary,
@@ -252,7 +252,7 @@ class AnkiNoteDataCreator {
priority: dictionaryPriority
},
character,
- frequency
+ frequency: displayValueParsed ? displayValue : frequency
});
}
return results;
@@ -374,7 +374,7 @@ class AnkiNoteDataCreator {
_getTermFrequencies(dictionaryEntry) {
const results = [];
const {headwords} = dictionaryEntry;
- for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of dictionaryEntry.frequencies) {
+ for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed} of dictionaryEntry.frequencies) {
const {term, reading} = headwords[headwordIndex];
results.push({
index: results.length,
@@ -387,7 +387,7 @@ class AnkiNoteDataCreator {
expression: term,
reading,
hasReading,
- frequency
+ frequency: displayValueParsed ? displayValue : frequency
});
}
return results;
@@ -459,7 +459,7 @@ class AnkiNoteDataCreator {
_getTermExpressionFrequencies(dictionaryEntry, i) {
const results = [];
const {headwords, frequencies} = dictionaryEntry;
- for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of frequencies) {
+ for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed} of frequencies) {
if (headwordIndex !== i) { continue; }
const {term, reading} = headwords[headwordIndex];
results.push({
@@ -473,7 +473,7 @@ class AnkiNoteDataCreator {
expression: term,
reading,
hasReading,
- frequency
+ frequency: displayValueParsed ? displayValue : frequency
});
}
return results;
diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js
index fe899e53..fc377df1 100644
--- a/ext/js/display/display-generator.js
+++ b/ext/js/display/display-generator.js
@@ -548,18 +548,15 @@ class DisplayGenerator {
this._setTextContent(node.querySelector('.tag-label-content'), dictionary);
- const frequency = values.join(', ');
-
this._setTextContent(node.querySelector('.frequency-disambiguation-term'), term, 'ja');
this._setTextContent(node.querySelector('.frequency-disambiguation-reading'), (reading !== null ? reading : ''), 'ja');
- this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja');
+ this._populateFrequencyValueList(node.querySelector('.frequency-value-list'), values);
node.dataset.term = term;
node.dataset.reading = reading;
node.dataset.hasReading = `${reading !== null}`;
node.dataset.readingIsSame = `${reading === term}`;
node.dataset.dictionary = dictionary;
- node.dataset.frequency = `${frequency}`;
node.dataset.details = dictionary;
return node;
@@ -569,19 +566,50 @@ class DisplayGenerator {
const {character, values} = details;
const node = this._templates.instantiate('kanji-frequency-item');
- const frequency = values.join(', ');
-
this._setTextContent(node.querySelector('.tag-label-content'), dictionary);
- this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja');
+ this._populateFrequencyValueList(node.querySelector('.frequency-value-list'), values);
node.dataset.character = character;
node.dataset.dictionary = dictionary;
- node.dataset.frequency = `${frequency}`;
node.dataset.details = dictionary;
return node;
}
+ _populateFrequencyValueList(node, values) {
+ let fullFrequency = '';
+ for (let i = 0, ii = values.length; i < ii; ++i) {
+ const {frequency, displayValue} = values[i];
+ const frequencyString = `${frequency}`;
+ const text = displayValue !== null ? displayValue : frequency;
+
+ if (i > 0) {
+ const node2 = document.createElement('span');
+ node2.className = 'frequency-value';
+ node2.dataset.frequency = `${frequency}`;
+ node2.textContent = ', ';
+ node.appendChild(node2);
+ fullFrequency += ', ';
+ }
+
+ const node2 = document.createElement('span');
+ node2.className = 'frequency-value';
+ node2.dataset.frequency = frequencyString;
+ if (displayValue !== null) {
+ node2.dataset.displayValue = `${displayValue}`;
+ if (displayValue !== frequencyString) {
+ node2.title = frequencyString;
+ }
+ }
+ this._setTextContent(node2, text, 'ja');
+ node.appendChild(node2);
+
+ fullFrequency += text;
+ }
+
+ node.dataset.frequency = fullFrequency;
+ }
+
_appendKanjiLinks(container, text) {
const jp = this._japaneseUtil;
let part = '';
diff --git a/ext/js/language/sandbox/dictionary-data-util.js b/ext/js/language/sandbox/dictionary-data-util.js
index 68b15c48..83d94b9b 100644
--- a/ext/js/language/sandbox/dictionary-data-util.js
+++ b/ext/js/language/sandbox/dictionary-data-util.js
@@ -48,7 +48,7 @@ class DictionaryDataUtil {
const {headwords, frequencies} = dictionaryEntry;
const map1 = new Map();
- for (const {headwordIndex, dictionary, hasReading, frequency} of frequencies) {
+ for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of frequencies) {
const {term, reading} = headwords[headwordIndex];
let map2 = map1.get(dictionary);
@@ -61,18 +61,18 @@ class DictionaryDataUtil {
const key = this._createMapKey([term, readingKey]);
let frequencyData = map2.get(key);
if (typeof frequencyData === 'undefined') {
- frequencyData = {term, reading: readingKey, values: new Set()};
+ frequencyData = {term, reading: readingKey, values: new Map()};
map2.set(key, frequencyData);
}
- frequencyData.values.add(frequency);
+ frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue});
}
return this._createFrequencyGroupsFromMap(map1);
}
static groupKanjiFrequencies(frequencies) {
const map1 = new Map();
- for (const {dictionary, character, frequency} of frequencies) {
+ for (const {dictionary, character, frequency, displayValue} of frequencies) {
let map2 = map1.get(dictionary);
if (typeof map2 === 'undefined') {
map2 = new Map();
@@ -81,11 +81,11 @@ class DictionaryDataUtil {
let frequencyData = map2.get(character);
if (typeof frequencyData === 'undefined') {
- frequencyData = {character, values: new Set()};
+ frequencyData = {character, values: new Map()};
map2.set(character, frequencyData);
}
- frequencyData.values.add(frequency);
+ frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue});
}
return this._createFrequencyGroupsFromMap(map1);
}
@@ -222,7 +222,7 @@ class DictionaryDataUtil {
for (const [dictionary, map2] of map.entries()) {
const frequencies = [];
for (const frequencyData of map2.values()) {
- frequencyData.values = [...frequencyData.values];
+ frequencyData.values = [...frequencyData.values.values()];
frequencies.push(frequencyData);
}
results.push({dictionary, frequencies});
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index 056ff3a7..28e1cfcc 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -36,6 +36,7 @@ class Translator {
this._deinflector = null;
this._tagCache = new Map();
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
+ this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/;
}
/**
@@ -853,12 +854,15 @@ class Translator {
case 'freq':
{
let frequency = data;
- const hasReading = (data !== null && typeof data === 'object');
+ const hasReading = (data !== null && typeof data === 'object' && typeof data.reading === 'string');
if (hasReading) {
if (data.reading !== reading) { continue; }
frequency = data.frequency;
}
for (const {frequencies, headwordIndex} of targets) {
+ let displayValue;
+ let displayValueParsed;
+ ({frequency, displayValue, displayValueParsed} = this._getFrequencyInfo(frequency));
frequencies.push(this._createTermFrequency(
frequencies.length,
headwordIndex,
@@ -866,7 +870,9 @@ class Translator {
dictionaryIndex,
dictionaryPriority,
hasReading,
- this._convertFrequency(frequency)
+ frequency,
+ displayValue,
+ displayValueParsed
));
}
}
@@ -914,13 +920,16 @@ class Translator {
case 'freq':
{
const {frequencies} = dictionaryEntries[index];
+ const {frequency, displayValue, displayValueParsed} = this._getFrequencyInfo(data);
frequencies.push(this._createKanjiFrequency(
frequencies.length,
dictionary,
dictionaryIndex,
dictionaryPriority,
character,
- this._convertFrequency(data)
+ frequency,
+ displayValue,
+ displayValueParsed
));
}
break;
@@ -971,16 +980,36 @@ class Translator {
});
}
- _convertFrequency(value) {
- switch (typeof value) {
- case 'number':
- return value;
- case 'string':
- value = Number.parseFloat(value);
- return Number.isFinite(value) ? value : 0;
- default:
- return 0;
+ _convertStringToNumber(value) {
+ const match = this._numberRegex.exec(value);
+ if (match === null) { return 0; }
+ value = Number.parseFloat(match[0]);
+ return Number.isFinite(value) ? value : 0;
+ }
+
+ _getFrequencyInfo(frequency) {
+ let displayValue = null;
+ let displayValueParsed = false;
+ if (typeof frequency === 'object' && frequency !== null) {
+ ({value: frequency, displayValue} = frequency);
+ if (typeof frequency !== 'number') { frequency = 0; }
+ if (typeof displayValue !== 'string') { displayValue = null; }
+ } else {
+ switch (typeof frequency) {
+ case 'number':
+ // No change
+ break;
+ case 'string':
+ displayValue = frequency;
+ displayValueParsed = true;
+ frequency = this._convertStringToNumber(frequency);
+ break;
+ default:
+ frequency = 0;
+ break;
+ }
}
+ return {frequency, displayValue, displayValueParsed};
}
// Helpers
@@ -1048,8 +1077,8 @@ class Translator {
};
}
- _createKanjiFrequency(index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency) {
- return {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency};
+ _createKanjiFrequency(index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency, displayValue, displayValueParsed) {
+ return {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency, displayValue, displayValueParsed};
}
_createKanjiDictionaryEntry(character, dictionary, onyomi, kunyomi, tags, stats, definitions) {
@@ -1114,8 +1143,8 @@ class Translator {
return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches};
}
- _createTermFrequency(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency) {
- return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency};
+ _createTermFrequency(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed) {
+ return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency, displayValue, displayValueParsed};
}
_createTermDictionaryEntry(isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) {