summaryrefslogtreecommitdiff
path: root/ext/js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-09-26 11:08:16 -0400
committerGitHub <noreply@github.com>2021-09-26 11:08:16 -0400
commit9899727d7d53caed4c5b5e68176f7ed7f90a9438 (patch)
tree3d764007cf8e86cee23be969a2065a644b27f73d /ext/js
parent88e71f82232781a1bc16701ce4719d770222ec4c (diff)
Frequency dictionary sort (#1938)
* Add sortDictionary/sortDictionaryOrder options * Update options * Add API.getTermFrequencies * Add settings * Implement frequency dictionary sorting * Update test * Update test data * Fix handling of undefined rank-based frequencies
Diffstat (limited to 'ext/js')
-rw-r--r--ext/js/background/backend.js11
-rw-r--r--ext/js/comm/api.js4
-rw-r--r--ext/js/data/options-util.js14
-rw-r--r--ext/js/language/translator.js112
-rw-r--r--ext/js/pages/settings/settings-main.js4
-rw-r--r--ext/js/pages/settings/sort-frequency-dictionary-controller.js169
6 files changed, 309 insertions, 5 deletions
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js
index b9e1f51b..e76f4cfe 100644
--- a/ext/js/background/backend.js
+++ b/ext/js/background/backend.js
@@ -125,7 +125,8 @@ class Backend {
['triggerDatabaseUpdated', {async: false, contentScript: true, handler: this._onApiTriggerDatabaseUpdated.bind(this)}],
['testMecab', {async: true, contentScript: true, handler: this._onApiTestMecab.bind(this)}],
['textHasJapaneseCharacters', {async: false, contentScript: true, handler: this._onApiTextHasJapaneseCharacters.bind(this)}],
- ['documentStart', {async: false, contentScript: true, handler: this._onApiDocumentStart.bind(this)}]
+ ['documentStart', {async: false, contentScript: true, handler: this._onApiDocumentStart.bind(this)}],
+ ['getTermFrequencies', {async: true, contentScript: true, handler: this._onApiGetTermFrequencies.bind(this)}]
]);
this._messageHandlersWithProgress = new Map([
]);
@@ -748,6 +749,10 @@ class Backend {
this._updateTabAccessibility(url, tab, frameId);
}
+ async _onApiGetTermFrequencies({termReadingList, dictionaries}) {
+ return await this._translator.getTermFrequencies(termReadingList, dictionaries);
+ }
+
// Command handlers
async _onCommandOpenSearchPage(params) {
@@ -1953,7 +1958,7 @@ class Backend {
const {wildcard} = details;
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
const {
- general: {mainDictionary},
+ general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder},
scanning: {alphanumeric},
translation: {
convertHalfWidthCharacters,
@@ -1979,6 +1984,8 @@ class Backend {
return {
wildcard,
mainDictionary,
+ sortFrequencyDictionary,
+ sortFrequencyDictionaryOrder,
removeNonJapaneseCharacters: !alphanumeric,
convertHalfWidthCharacters,
convertNumericCharacters,
diff --git a/ext/js/comm/api.js b/ext/js/comm/api.js
index 3fa7c92b..cb2fef85 100644
--- a/ext/js/comm/api.js
+++ b/ext/js/comm/api.js
@@ -168,6 +168,10 @@ class API {
return this._invoke('textHasJapaneseCharacters', {text});
}
+ getTermFrequencies(termReadingList, dictionaries) {
+ return this._invoke('getTermFrequencies', {termReadingList, dictionaries});
+ }
+
// Utilities
_createActionPort(timeout=5000) {
diff --git a/ext/js/data/options-util.js b/ext/js/data/options-util.js
index 30ffadb1..c8ab2d01 100644
--- a/ext/js/data/options-util.js
+++ b/ext/js/data/options-util.js
@@ -463,7 +463,8 @@ class OptionsUtil {
{async: false, update: this._updateVersion11.bind(this)},
{async: true, update: this._updateVersion12.bind(this)},
{async: true, update: this._updateVersion13.bind(this)},
- {async: false, update: this._updateVersion14.bind(this)}
+ {async: false, update: this._updateVersion14.bind(this)},
+ {async: false, update: this._updateVersion15.bind(this)}
];
if (typeof targetVersion === 'number' && targetVersion < result.length) {
result.splice(targetVersion);
@@ -876,4 +877,15 @@ class OptionsUtil {
}
return options;
}
+
+ _updateVersion15(options) {
+ // Version 15 changes:
+ // Added general.sortFrequencyDictionary.
+ // Added general.sortFrequencyDictionaryOrder.
+ for (const profile of options.profiles) {
+ profile.options.general.sortFrequencyDictionary = null;
+ profile.options.general.sortFrequencyDictionaryOrder = 'descending';
+ }
+ return options;
+ }
}
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index 641c9d57..1abf9f4e 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -64,6 +64,8 @@ class Translator {
* {
* wildcard: (enum: null, 'prefix', 'suffix'),
* mainDictionary: (string),
+ * sortFrequencyDictionary: (null or string),
+ * sortFrequencyDictionaryOrder: (enum: 'ascending', 'descending'),
* removeNonJapaneseCharacters: (boolean),
* convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'),
* convertNumericCharacters: (enum: 'false', 'true', 'variant'),
@@ -92,7 +94,7 @@ class Translator {
* @returns An object of the structure `{dictionaryEntries, originalTextLength}`.
*/
async findTerms(mode, text, options) {
- const {enabledDictionaryMap, excludeDictionaryDefinitions} = options;
+ const {enabledDictionaryMap, excludeDictionaryDefinitions, sortFrequencyDictionary, sortFrequencyDictionaryOrder} = options;
let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options);
switch (mode) {
@@ -115,6 +117,9 @@ class Translator {
await this._expandTermTags(dictionaryEntries);
}
+ if (sortFrequencyDictionary !== null) {
+ this._updateSortFrequencies(dictionaryEntries, sortFrequencyDictionary, sortFrequencyDictionaryOrder === 'ascending');
+ }
if (dictionaryEntries.length > 1) {
this._sortTermDictionaryEntries(dictionaryEntries);
}
@@ -176,6 +181,48 @@ class Translator {
return dictionaryEntries;
}
+ /**
+ * Gets a list of frequency information for a given list of term-reading pairs
+ * and a list of dictionaries.
+ * @param termReadingList An array of `{term, reading}` pairs. If reading is null,
+ * the reading won't be compared.
+ * @param dictionaries An array of dictionary names.
+ * @returns An array of objects with the format
+ * `{term, reading, dictionary, hasReading, frequency}`.
+ */
+ async getTermFrequencies(termReadingList, dictionaries) {
+ const dictionarySet = new Set();
+ for (const dictionary of dictionaries) {
+ dictionarySet.add(dictionary);
+ }
+
+ const termList = termReadingList.map(({term}) => term);
+ const metas = await this._database.findTermMetaBulk(termList, dictionarySet);
+
+ const results = [];
+ for (const {mode, data, dictionary, index} of metas) {
+ if (mode !== 'freq') { continue; }
+ let {term, reading} = termReadingList[index];
+ let frequency = data;
+ const hasReading = (data !== null && typeof data === 'object');
+ if (hasReading) {
+ if (data.reading !== reading) {
+ if (reading !== null) { continue; }
+ reading = data.reading;
+ }
+ frequency = data.frequency;
+ }
+ results.push({
+ term,
+ reading,
+ dictionary,
+ hasReading,
+ frequency
+ });
+ }
+ return results;
+ }
+
// Find terms internal implementation
async _findTermsInternal(text, enabledDictionaryMap, options) {
@@ -1035,7 +1082,20 @@ class Translator {
}
_createTermDefinition(index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries) {
- return {index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries};
+ return {
+ index,
+ headwordIndices,
+ dictionary,
+ dictionaryIndex,
+ dictionaryPriority,
+ id,
+ score,
+ frequencyOrder: 0,
+ sequences,
+ isPrimary,
+ tags,
+ entries
+ };
}
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
@@ -1052,6 +1112,7 @@ class Translator {
isPrimary,
inflections,
score,
+ frequencyOrder: 0,
dictionaryIndex,
dictionaryPriority,
sourceTermExactMatchCount,
@@ -1314,6 +1375,10 @@ class Translator {
i = v2.dictionaryPriority - v1.dictionaryPriority;
if (i !== 0) { return i; }
+ // Sort by frequency order
+ i = v1.frequencyOrder - v2.frequencyOrder;
+ if (i !== 0) { return i; }
+
// Sort by term score
i = v2.score - v1.score;
if (i !== 0) { return i; }
@@ -1345,6 +1410,10 @@ class Translator {
let i = v2.dictionaryPriority - v1.dictionaryPriority;
if (i !== 0) { return i; }
+ // Sort by frequency order
+ i = v1.frequencyOrder - v2.frequencyOrder;
+ if (i !== 0) { return i; }
+
// Sort by term score
i = v2.score - v1.score;
if (i !== 0) { return i; }
@@ -1416,4 +1485,43 @@ class Translator {
frequencies.sort(compare);
}
}
+
+ _updateSortFrequencies(dictionaryEntries, dictionary, ascending) {
+ const frequencyMap = new Map();
+ for (const dictionaryEntry of dictionaryEntries) {
+ const {definitions, frequencies} = dictionaryEntry;
+ let frequencyMin = Number.MAX_SAFE_INTEGER;
+ let frequencyMax = Number.MIN_SAFE_INTEGER;
+ for (const item of frequencies) {
+ if (item.dictionary !== dictionary) { continue; }
+ const {headwordIndex, frequency} = item;
+ if (typeof frequency !== 'number') { continue; }
+ frequencyMap.set(headwordIndex, frequency);
+ frequencyMin = Math.min(frequencyMin, frequency);
+ frequencyMax = Math.max(frequencyMax, frequency);
+ }
+ dictionaryEntry.frequencyOrder = (
+ frequencyMin <= frequencyMax ?
+ (ascending ? frequencyMin : -frequencyMax) :
+ (ascending ? Number.MAX_SAFE_INTEGER : 0)
+ );
+ for (const definition of definitions) {
+ frequencyMin = Number.MAX_SAFE_INTEGER;
+ frequencyMax = Number.MIN_SAFE_INTEGER;
+ const {headwordIndices} = definition;
+ for (const headwordIndex of headwordIndices) {
+ const frequency = frequencyMap.get(headwordIndex);
+ if (typeof frequency !== 'number') { continue; }
+ frequencyMin = Math.min(frequencyMin, frequency);
+ frequencyMax = Math.max(frequencyMax, frequency);
+ }
+ definition.frequencyOrder = (
+ frequencyMin <= frequencyMax ?
+ (ascending ? frequencyMin : -frequencyMax) :
+ (ascending ? Number.MAX_SAFE_INTEGER : 0)
+ );
+ }
+ frequencyMap.clear();
+ }
+ }
}
diff --git a/ext/js/pages/settings/settings-main.js b/ext/js/pages/settings/settings-main.js
index e8092112..73b5c22c 100644
--- a/ext/js/pages/settings/settings-main.js
+++ b/ext/js/pages/settings/settings-main.js
@@ -42,6 +42,7 @@
* SentenceTerminationCharactersController
* SettingsController
* SettingsDisplayController
+ * SortFrequencyDictionaryController
* StatusFooter
* StorageController
* TranslationTextReplacementsController
@@ -167,6 +168,9 @@ async function setupGenericSettingsController(genericSettingController) {
const collapsibleDictionaryController = new CollapsibleDictionaryController(settingsController);
collapsibleDictionaryController.prepare();
+ const sortFrequencyDictionaryController = new SortFrequencyDictionaryController(settingsController);
+ sortFrequencyDictionaryController.prepare();
+
await Promise.all(preparePromises);
document.documentElement.dataset.loaded = 'true';
diff --git a/ext/js/pages/settings/sort-frequency-dictionary-controller.js b/ext/js/pages/settings/sort-frequency-dictionary-controller.js
new file mode 100644
index 00000000..9f167ec1
--- /dev/null
+++ b/ext/js/pages/settings/sort-frequency-dictionary-controller.js
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2021 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+class SortFrequencyDictionaryController {
+ constructor(settingsController) {
+ this._settingsController = settingsController;
+ this._sortFrequencyDictionarySelect = null;
+ this._sortFrequencyDictionaryOrderSelect = null;
+ this._sortFrequencyDictionaryOrderAutoButton = null;
+ this._sortFrequencyDictionaryOrderContainerNode = null;
+ this._getDictionaryInfoToken = null;
+ }
+
+ async prepare() {
+ this._sortFrequencyDictionarySelect = document.querySelector('#sort-frequency-dictionary');
+ this._sortFrequencyDictionaryOrderSelect = document.querySelector('#sort-frequency-dictionary-order');
+ this._sortFrequencyDictionaryOrderAutoButton = document.querySelector('#sort-frequency-dictionary-order-auto');
+ this._sortFrequencyDictionaryOrderContainerNode = document.querySelector('#sort-frequency-dictionary-order-container');
+
+ await this._onDatabaseUpdated();
+
+ yomichan.on('databaseUpdated', this._onDatabaseUpdated.bind(this));
+ this._settingsController.on('optionsChanged', this._onOptionsChanged.bind(this));
+ this._sortFrequencyDictionarySelect.addEventListener('change', this._onSortFrequencyDictionarySelectChange.bind(this));
+ this._sortFrequencyDictionaryOrderSelect.addEventListener('change', this._onSortFrequencyDictionaryOrderSelectChange.bind(this));
+ this._sortFrequencyDictionaryOrderAutoButton.addEventListener('click', this._onSortFrequencyDictionaryOrderAutoButtonClick.bind(this));
+ }
+
+ // Private
+
+ async _onDatabaseUpdated() {
+ const token = {};
+ this._getDictionaryInfoToken = token;
+ const dictionaries = await this._settingsController.getDictionaryInfo();
+ if (this._getDictionaryInfoToken !== token) { return; }
+ this._getDictionaryInfoToken = null;
+
+ this._updateDictionaryOptions(dictionaries);
+
+ const options = await this._settingsController.getOptions();
+ this._onOptionsChanged({options});
+ }
+
+ _onOptionsChanged({options}) {
+ const {sortFrequencyDictionary, sortFrequencyDictionaryOrder} = options.general;
+ this._sortFrequencyDictionarySelect.value = (sortFrequencyDictionary !== null ? sortFrequencyDictionary : '');
+ this._sortFrequencyDictionaryOrderSelect.value = sortFrequencyDictionaryOrder;
+ this._sortFrequencyDictionaryOrderContainerNode.hidden = (sortFrequencyDictionary === null);
+ }
+
+ _onSortFrequencyDictionarySelectChange() {
+ let {value} = this._sortFrequencyDictionarySelect;
+ if (value === '') { value = null; }
+ this._setSortFrequencyDictionaryValue(value);
+ }
+
+ _onSortFrequencyDictionaryOrderSelectChange() {
+ const {value} = this._sortFrequencyDictionaryOrderSelect;
+ this._setSortFrequencyDictionaryOrderValue(value);
+ }
+
+ _onSortFrequencyDictionaryOrderAutoButtonClick() {
+ const {value} = this._sortFrequencyDictionarySelect;
+ if (value === '') { return; }
+ this._autoUpdateOrder(value);
+ }
+
+ _updateDictionaryOptions(dictionaries) {
+ const fragment = document.createDocumentFragment();
+ let option = document.createElement('option');
+ option.value = '';
+ option.textContent = 'None';
+ fragment.appendChild(option);
+ for (const {title, counts} of dictionaries) {
+ if (this._dictionaryHasNoFrequencies(counts)) { continue; }
+ option = document.createElement('option');
+ option.value = title;
+ option.textContent = title;
+ fragment.appendChild(option);
+ }
+ this._sortFrequencyDictionarySelect.textContent = '';
+ this._sortFrequencyDictionarySelect.appendChild(fragment);
+ }
+
+ async _setSortFrequencyDictionaryValue(value) {
+ this._sortFrequencyDictionaryOrderContainerNode.hidden = (value === null);
+ await this._settingsController.setProfileSetting('general.sortFrequencyDictionary', value);
+ if (value !== null) {
+ await this._autoUpdateOrder(value);
+ }
+ }
+
+ async _setSortFrequencyDictionaryOrderValue(value) {
+ await this._settingsController.setProfileSetting('general.sortFrequencyDictionaryOrder', value);
+ }
+
+ async _autoUpdateOrder(dictionary) {
+ const order = await this._getFrequencyOrder(dictionary);
+ if (order === 0) { return; }
+ const value = (order > 0 ? 'descending' : 'ascending');
+ this._sortFrequencyDictionaryOrderSelect.value = value;
+ await this._setSortFrequencyDictionaryOrderValue(value);
+ }
+
+ async _getFrequencyOrder(dictionary) {
+ const moreCommonTerms = ['来る', '言う', '出る', '入る', '方', '男', '女', '今', '何', '時'];
+ const lessCommonTerms = ['行なう', '論じる', '過す', '行方', '人口', '猫', '犬', '滝', '理', '暁'];
+ const terms = [...moreCommonTerms, ...lessCommonTerms];
+
+ const frequencies = await yomichan.api.getTermFrequencies(
+ terms.map((term) => ({term, reading: null})),
+ [dictionary]
+ );
+
+ const termDetails = new Map();
+ const moreCommonTermDetails = [];
+ const lessCommonTermDetails = [];
+ for (const term of moreCommonTerms) {
+ const details = {hasValue: false, minValue: Number.MAX_SAFE_INTEGER, maxValue: Number.MIN_SAFE_INTEGER};
+ termDetails.set(term, details);
+ moreCommonTermDetails.push(details);
+ }
+ for (const term of lessCommonTerms) {
+ const details = {hasValue: false, minValue: Number.MAX_SAFE_INTEGER, maxValue: Number.MIN_SAFE_INTEGER};
+ termDetails.set(term, details);
+ lessCommonTermDetails.push(details);
+ }
+
+ for (const {term, frequency} of frequencies) {
+ if (typeof frequency !== 'number') { continue; }
+ const details = termDetails.get(term);
+ if (typeof details === 'undefined') { continue; }
+ details.minValue = Math.min(details.minValue, frequency);
+ details.maxValue = Math.max(details.maxValue, frequency);
+ details.hasValue = true;
+ }
+
+ let result = 0;
+ for (const details1 of moreCommonTermDetails) {
+ if (!details1.hasValue) { continue; }
+ for (const details2 of lessCommonTermDetails) {
+ if (!details2.hasValue) { continue; }
+ result += Math.sign(details1.maxValue - details2.minValue) + Math.sign(details1.minValue - details2.maxValue);
+ }
+ }
+ return Math.sign(result);
+ }
+
+ _dictionaryHasNoFrequencies(counts) {
+ if (typeof counts !== 'object' || counts === null) { return false; }
+ const {termMeta} = counts;
+ if (typeof termMeta !== 'object' || termMeta === null) { return false; }
+ return termMeta.freq <= 0;
+ }
+}