diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-09-26 11:08:16 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-26 11:08:16 -0400 |
commit | 9899727d7d53caed4c5b5e68176f7ed7f90a9438 (patch) | |
tree | 3d764007cf8e86cee23be969a2065a644b27f73d /ext | |
parent | 88e71f82232781a1bc16701ce4719d770222ec4c (diff) |
Frequency dictionary sort (#1938)
* Add sortDictionary/sortDictionaryOrder options
* Update options
* Add API.getTermFrequencies
* Add settings
* Implement frequency dictionary sorting
* Update test
* Update test data
* Fix handling of undefined rank-based frequencies
Diffstat (limited to 'ext')
-rw-r--r-- | ext/css/settings.css | 4 | ||||
-rw-r--r-- | ext/data/schemas/options-schema.json | 13 | ||||
-rw-r--r-- | ext/js/background/backend.js | 11 | ||||
-rw-r--r-- | ext/js/comm/api.js | 4 | ||||
-rw-r--r-- | ext/js/data/options-util.js | 14 | ||||
-rw-r--r-- | ext/js/language/translator.js | 112 | ||||
-rw-r--r-- | ext/js/pages/settings/settings-main.js | 4 | ||||
-rw-r--r-- | ext/js/pages/settings/sort-frequency-dictionary-controller.js | 169 | ||||
-rw-r--r-- | ext/settings.html | 68 |
9 files changed, 393 insertions, 6 deletions
diff --git a/ext/css/settings.css b/ext/css/settings.css index f05ab5e0..cedd9f40 100644 --- a/ext/css/settings.css +++ b/ext/css/settings.css @@ -2259,10 +2259,14 @@ input[type=number].dictionary-priority { } .horizontal-flex.horizontal-flex-nowrap { flex-wrap: nowrap; + margin-left: 0; } .horizontal-flex>* { margin-left: 0.375em; } +.horizontal-flex.horizontal-flex-nowrap>*:first-child { + margin-left: 0; +} .horizontal-flex-fill { flex-grow: 1; } diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index d1fb28ad..2dd3981d 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -116,7 +116,9 @@ "popupActionBarVisibility", "popupActionBarLocation", "frequencyDisplayMode", - "termDisplayMode" + "termDisplayMode", + "sortFrequencyDictionary", + "sortFrequencyDictionaryOrder" ], "properties": { "enable": { @@ -284,6 +286,15 @@ "type": "string", "enum": ["ruby", "ruby-and-reading", "term-and-reading"], "default": "ruby" + }, + "sortFrequencyDictionary": { + "type": ["string", "null"], + "default": null + }, + "sortFrequencyDictionaryOrder": { + "type": "string", + "enum": ["ascending", "descending"], + "default": "descending" } } }, diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index b9e1f51b..e76f4cfe 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -125,7 +125,8 @@ class Backend { ['triggerDatabaseUpdated', {async: false, contentScript: true, handler: this._onApiTriggerDatabaseUpdated.bind(this)}], ['testMecab', {async: true, contentScript: true, handler: this._onApiTestMecab.bind(this)}], ['textHasJapaneseCharacters', {async: false, contentScript: true, handler: this._onApiTextHasJapaneseCharacters.bind(this)}], - ['documentStart', {async: false, contentScript: true, handler: this._onApiDocumentStart.bind(this)}] + ['documentStart', {async: false, contentScript: true, handler: this._onApiDocumentStart.bind(this)}], + ['getTermFrequencies', {async: true, contentScript: true, handler: this._onApiGetTermFrequencies.bind(this)}] ]); this._messageHandlersWithProgress = new Map([ ]); @@ -748,6 +749,10 @@ class Backend { this._updateTabAccessibility(url, tab, frameId); } + async _onApiGetTermFrequencies({termReadingList, dictionaries}) { + return await this._translator.getTermFrequencies(termReadingList, dictionaries); + } + // Command handlers async _onCommandOpenSearchPage(params) { @@ -1953,7 +1958,7 @@ class Backend { const {wildcard} = details; const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options); const { - general: {mainDictionary}, + general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder}, scanning: {alphanumeric}, translation: { convertHalfWidthCharacters, @@ -1979,6 +1984,8 @@ class Backend { return { wildcard, mainDictionary, + sortFrequencyDictionary, + sortFrequencyDictionaryOrder, removeNonJapaneseCharacters: !alphanumeric, convertHalfWidthCharacters, convertNumericCharacters, diff --git a/ext/js/comm/api.js b/ext/js/comm/api.js index 3fa7c92b..cb2fef85 100644 --- a/ext/js/comm/api.js +++ b/ext/js/comm/api.js @@ -168,6 +168,10 @@ class API { return this._invoke('textHasJapaneseCharacters', {text}); } + getTermFrequencies(termReadingList, dictionaries) { + return this._invoke('getTermFrequencies', {termReadingList, dictionaries}); + } + // Utilities _createActionPort(timeout=5000) { diff --git a/ext/js/data/options-util.js b/ext/js/data/options-util.js index 30ffadb1..c8ab2d01 100644 --- a/ext/js/data/options-util.js +++ b/ext/js/data/options-util.js @@ -463,7 +463,8 @@ class OptionsUtil { {async: false, update: this._updateVersion11.bind(this)}, {async: true, update: this._updateVersion12.bind(this)}, {async: true, update: this._updateVersion13.bind(this)}, - {async: false, update: this._updateVersion14.bind(this)} + {async: false, update: this._updateVersion14.bind(this)}, + {async: false, update: this._updateVersion15.bind(this)} ]; if (typeof targetVersion === 'number' && targetVersion < result.length) { result.splice(targetVersion); @@ -876,4 +877,15 @@ class OptionsUtil { } return options; } + + _updateVersion15(options) { + // Version 15 changes: + // Added general.sortFrequencyDictionary. + // Added general.sortFrequencyDictionaryOrder. + for (const profile of options.profiles) { + profile.options.general.sortFrequencyDictionary = null; + profile.options.general.sortFrequencyDictionaryOrder = 'descending'; + } + return options; + } } diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 641c9d57..1abf9f4e 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -64,6 +64,8 @@ class Translator { * { * wildcard: (enum: null, 'prefix', 'suffix'), * mainDictionary: (string), + * sortFrequencyDictionary: (null or string), + * sortFrequencyDictionaryOrder: (enum: 'ascending', 'descending'), * removeNonJapaneseCharacters: (boolean), * convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'), * convertNumericCharacters: (enum: 'false', 'true', 'variant'), @@ -92,7 +94,7 @@ class Translator { * @returns An object of the structure `{dictionaryEntries, originalTextLength}`. */ async findTerms(mode, text, options) { - const {enabledDictionaryMap, excludeDictionaryDefinitions} = options; + const {enabledDictionaryMap, excludeDictionaryDefinitions, sortFrequencyDictionary, sortFrequencyDictionaryOrder} = options; let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options); switch (mode) { @@ -115,6 +117,9 @@ class Translator { await this._expandTermTags(dictionaryEntries); } + if (sortFrequencyDictionary !== null) { + this._updateSortFrequencies(dictionaryEntries, sortFrequencyDictionary, sortFrequencyDictionaryOrder === 'ascending'); + } if (dictionaryEntries.length > 1) { this._sortTermDictionaryEntries(dictionaryEntries); } @@ -176,6 +181,48 @@ class Translator { return dictionaryEntries; } + /** + * Gets a list of frequency information for a given list of term-reading pairs + * and a list of dictionaries. + * @param termReadingList An array of `{term, reading}` pairs. If reading is null, + * the reading won't be compared. + * @param dictionaries An array of dictionary names. + * @returns An array of objects with the format + * `{term, reading, dictionary, hasReading, frequency}`. + */ + async getTermFrequencies(termReadingList, dictionaries) { + const dictionarySet = new Set(); + for (const dictionary of dictionaries) { + dictionarySet.add(dictionary); + } + + const termList = termReadingList.map(({term}) => term); + const metas = await this._database.findTermMetaBulk(termList, dictionarySet); + + const results = []; + for (const {mode, data, dictionary, index} of metas) { + if (mode !== 'freq') { continue; } + let {term, reading} = termReadingList[index]; + let frequency = data; + const hasReading = (data !== null && typeof data === 'object'); + if (hasReading) { + if (data.reading !== reading) { + if (reading !== null) { continue; } + reading = data.reading; + } + frequency = data.frequency; + } + results.push({ + term, + reading, + dictionary, + hasReading, + frequency + }); + } + return results; + } + // Find terms internal implementation async _findTermsInternal(text, enabledDictionaryMap, options) { @@ -1035,7 +1082,20 @@ class Translator { } _createTermDefinition(index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries) { - return {index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries}; + return { + index, + headwordIndices, + dictionary, + dictionaryIndex, + dictionaryPriority, + id, + score, + frequencyOrder: 0, + sequences, + isPrimary, + tags, + entries + }; } _createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) { @@ -1052,6 +1112,7 @@ class Translator { isPrimary, inflections, score, + frequencyOrder: 0, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, @@ -1314,6 +1375,10 @@ class Translator { i = v2.dictionaryPriority - v1.dictionaryPriority; if (i !== 0) { return i; } + // Sort by frequency order + i = v1.frequencyOrder - v2.frequencyOrder; + if (i !== 0) { return i; } + // Sort by term score i = v2.score - v1.score; if (i !== 0) { return i; } @@ -1345,6 +1410,10 @@ class Translator { let i = v2.dictionaryPriority - v1.dictionaryPriority; if (i !== 0) { return i; } + // Sort by frequency order + i = v1.frequencyOrder - v2.frequencyOrder; + if (i !== 0) { return i; } + // Sort by term score i = v2.score - v1.score; if (i !== 0) { return i; } @@ -1416,4 +1485,43 @@ class Translator { frequencies.sort(compare); } } + + _updateSortFrequencies(dictionaryEntries, dictionary, ascending) { + const frequencyMap = new Map(); + for (const dictionaryEntry of dictionaryEntries) { + const {definitions, frequencies} = dictionaryEntry; + let frequencyMin = Number.MAX_SAFE_INTEGER; + let frequencyMax = Number.MIN_SAFE_INTEGER; + for (const item of frequencies) { + if (item.dictionary !== dictionary) { continue; } + const {headwordIndex, frequency} = item; + if (typeof frequency !== 'number') { continue; } + frequencyMap.set(headwordIndex, frequency); + frequencyMin = Math.min(frequencyMin, frequency); + frequencyMax = Math.max(frequencyMax, frequency); + } + dictionaryEntry.frequencyOrder = ( + frequencyMin <= frequencyMax ? + (ascending ? frequencyMin : -frequencyMax) : + (ascending ? Number.MAX_SAFE_INTEGER : 0) + ); + for (const definition of definitions) { + frequencyMin = Number.MAX_SAFE_INTEGER; + frequencyMax = Number.MIN_SAFE_INTEGER; + const {headwordIndices} = definition; + for (const headwordIndex of headwordIndices) { + const frequency = frequencyMap.get(headwordIndex); + if (typeof frequency !== 'number') { continue; } + frequencyMin = Math.min(frequencyMin, frequency); + frequencyMax = Math.max(frequencyMax, frequency); + } + definition.frequencyOrder = ( + frequencyMin <= frequencyMax ? + (ascending ? frequencyMin : -frequencyMax) : + (ascending ? Number.MAX_SAFE_INTEGER : 0) + ); + } + frequencyMap.clear(); + } + } } diff --git a/ext/js/pages/settings/settings-main.js b/ext/js/pages/settings/settings-main.js index e8092112..73b5c22c 100644 --- a/ext/js/pages/settings/settings-main.js +++ b/ext/js/pages/settings/settings-main.js @@ -42,6 +42,7 @@ * SentenceTerminationCharactersController * SettingsController * SettingsDisplayController + * SortFrequencyDictionaryController * StatusFooter * StorageController * TranslationTextReplacementsController @@ -167,6 +168,9 @@ async function setupGenericSettingsController(genericSettingController) { const collapsibleDictionaryController = new CollapsibleDictionaryController(settingsController); collapsibleDictionaryController.prepare(); + const sortFrequencyDictionaryController = new SortFrequencyDictionaryController(settingsController); + sortFrequencyDictionaryController.prepare(); + await Promise.all(preparePromises); document.documentElement.dataset.loaded = 'true'; diff --git a/ext/js/pages/settings/sort-frequency-dictionary-controller.js b/ext/js/pages/settings/sort-frequency-dictionary-controller.js new file mode 100644 index 00000000..9f167ec1 --- /dev/null +++ b/ext/js/pages/settings/sort-frequency-dictionary-controller.js @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2021 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +class SortFrequencyDictionaryController { + constructor(settingsController) { + this._settingsController = settingsController; + this._sortFrequencyDictionarySelect = null; + this._sortFrequencyDictionaryOrderSelect = null; + this._sortFrequencyDictionaryOrderAutoButton = null; + this._sortFrequencyDictionaryOrderContainerNode = null; + this._getDictionaryInfoToken = null; + } + + async prepare() { + this._sortFrequencyDictionarySelect = document.querySelector('#sort-frequency-dictionary'); + this._sortFrequencyDictionaryOrderSelect = document.querySelector('#sort-frequency-dictionary-order'); + this._sortFrequencyDictionaryOrderAutoButton = document.querySelector('#sort-frequency-dictionary-order-auto'); + this._sortFrequencyDictionaryOrderContainerNode = document.querySelector('#sort-frequency-dictionary-order-container'); + + await this._onDatabaseUpdated(); + + yomichan.on('databaseUpdated', this._onDatabaseUpdated.bind(this)); + this._settingsController.on('optionsChanged', this._onOptionsChanged.bind(this)); + this._sortFrequencyDictionarySelect.addEventListener('change', this._onSortFrequencyDictionarySelectChange.bind(this)); + this._sortFrequencyDictionaryOrderSelect.addEventListener('change', this._onSortFrequencyDictionaryOrderSelectChange.bind(this)); + this._sortFrequencyDictionaryOrderAutoButton.addEventListener('click', this._onSortFrequencyDictionaryOrderAutoButtonClick.bind(this)); + } + + // Private + + async _onDatabaseUpdated() { + const token = {}; + this._getDictionaryInfoToken = token; + const dictionaries = await this._settingsController.getDictionaryInfo(); + if (this._getDictionaryInfoToken !== token) { return; } + this._getDictionaryInfoToken = null; + + this._updateDictionaryOptions(dictionaries); + + const options = await this._settingsController.getOptions(); + this._onOptionsChanged({options}); + } + + _onOptionsChanged({options}) { + const {sortFrequencyDictionary, sortFrequencyDictionaryOrder} = options.general; + this._sortFrequencyDictionarySelect.value = (sortFrequencyDictionary !== null ? sortFrequencyDictionary : ''); + this._sortFrequencyDictionaryOrderSelect.value = sortFrequencyDictionaryOrder; + this._sortFrequencyDictionaryOrderContainerNode.hidden = (sortFrequencyDictionary === null); + } + + _onSortFrequencyDictionarySelectChange() { + let {value} = this._sortFrequencyDictionarySelect; + if (value === '') { value = null; } + this._setSortFrequencyDictionaryValue(value); + } + + _onSortFrequencyDictionaryOrderSelectChange() { + const {value} = this._sortFrequencyDictionaryOrderSelect; + this._setSortFrequencyDictionaryOrderValue(value); + } + + _onSortFrequencyDictionaryOrderAutoButtonClick() { + const {value} = this._sortFrequencyDictionarySelect; + if (value === '') { return; } + this._autoUpdateOrder(value); + } + + _updateDictionaryOptions(dictionaries) { + const fragment = document.createDocumentFragment(); + let option = document.createElement('option'); + option.value = ''; + option.textContent = 'None'; + fragment.appendChild(option); + for (const {title, counts} of dictionaries) { + if (this._dictionaryHasNoFrequencies(counts)) { continue; } + option = document.createElement('option'); + option.value = title; + option.textContent = title; + fragment.appendChild(option); + } + this._sortFrequencyDictionarySelect.textContent = ''; + this._sortFrequencyDictionarySelect.appendChild(fragment); + } + + async _setSortFrequencyDictionaryValue(value) { + this._sortFrequencyDictionaryOrderContainerNode.hidden = (value === null); + await this._settingsController.setProfileSetting('general.sortFrequencyDictionary', value); + if (value !== null) { + await this._autoUpdateOrder(value); + } + } + + async _setSortFrequencyDictionaryOrderValue(value) { + await this._settingsController.setProfileSetting('general.sortFrequencyDictionaryOrder', value); + } + + async _autoUpdateOrder(dictionary) { + const order = await this._getFrequencyOrder(dictionary); + if (order === 0) { return; } + const value = (order > 0 ? 'descending' : 'ascending'); + this._sortFrequencyDictionaryOrderSelect.value = value; + await this._setSortFrequencyDictionaryOrderValue(value); + } + + async _getFrequencyOrder(dictionary) { + const moreCommonTerms = ['来る', '言う', '出る', '入る', '方', '男', '女', '今', '何', '時']; + const lessCommonTerms = ['行なう', '論じる', '過す', '行方', '人口', '猫', '犬', '滝', '理', '暁']; + const terms = [...moreCommonTerms, ...lessCommonTerms]; + + const frequencies = await yomichan.api.getTermFrequencies( + terms.map((term) => ({term, reading: null})), + [dictionary] + ); + + const termDetails = new Map(); + const moreCommonTermDetails = []; + const lessCommonTermDetails = []; + for (const term of moreCommonTerms) { + const details = {hasValue: false, minValue: Number.MAX_SAFE_INTEGER, maxValue: Number.MIN_SAFE_INTEGER}; + termDetails.set(term, details); + moreCommonTermDetails.push(details); + } + for (const term of lessCommonTerms) { + const details = {hasValue: false, minValue: Number.MAX_SAFE_INTEGER, maxValue: Number.MIN_SAFE_INTEGER}; + termDetails.set(term, details); + lessCommonTermDetails.push(details); + } + + for (const {term, frequency} of frequencies) { + if (typeof frequency !== 'number') { continue; } + const details = termDetails.get(term); + if (typeof details === 'undefined') { continue; } + details.minValue = Math.min(details.minValue, frequency); + details.maxValue = Math.max(details.maxValue, frequency); + details.hasValue = true; + } + + let result = 0; + for (const details1 of moreCommonTermDetails) { + if (!details1.hasValue) { continue; } + for (const details2 of lessCommonTermDetails) { + if (!details2.hasValue) { continue; } + result += Math.sign(details1.maxValue - details2.minValue) + Math.sign(details1.minValue - details2.maxValue); + } + } + return Math.sign(result); + } + + _dictionaryHasNoFrequencies(counts) { + if (typeof counts !== 'object' || counts === null) { return false; } + const {termMeta} = counts; + if (typeof termMeta !== 'object' || termMeta === null) { return false; } + return termMeta.freq <= 0; + } +} diff --git a/ext/settings.html b/ext/settings.html index 12ea7629..2f9d3fb3 100644 --- a/ext/settings.html +++ b/ext/settings.html @@ -285,6 +285,73 @@ </div></div> </div> </div> + <div class="settings-item advanced-only"> + <div class="settings-item-inner settings-item-inner-wrappable"> + <div class="settings-item-left"> + <div class="settings-item-label">Frequency sorting dictionary</div> + <div class="settings-item-description"> + Sort results using a frequency dictionary. + <a tabindex="0" class="more-toggle more-only" data-parent-distance="4">More…</a> + </div> + </div> + <div class="settings-item-right"> + <select id="sort-frequency-dictionary"></select> + </div> + </div> + <div class="settings-item-children more" hidden> + <p> + Enabling this option will sort search results using a specific dictionary. + This can be beneficial when using multiple dictionaries which may not have + consistent sorting information. + </p> + <p> + <a tabindex="0" class="more-toggle" data-parent-distance="3">Less…</a> + </p> + </div> + <div class="settings-item-children settings-item-children-group" id="sort-frequency-dictionary-order-container" hidden> + <div class="settings-item"> + <div class="settings-item-inner settings-item-inner-wrappable"> + <div class="settings-item-left"> + <div class="settings-item-label"> + Frequency sorting mode + <a tabindex="0" class="more-toggle more-only" data-parent-distance="4">(?)</a> + </div> + </div> + <div class="settings-item-right"> + <div class="horizontal-flex horizontal-flex-nowrap"> + <button class="low-emphasis" id="sort-frequency-dictionary-order-auto">Auto</button> + <select id="sort-frequency-dictionary-order"> + <option value="descending">Occurrence-based</option> + <option value="ascending">Rank-based</option> + </select> + </div> + </div> + </div> + <div class="settings-item-children more" hidden> + <p> + Dictionary frequency data can be represented in one of two ways: + </p> + <ul> + <li> + <em>Occurrence-based</em>, where the frequency corresponds to a number of occurrences. + Large values indicate a more common term. + </li> + <li> + <em>Rank-based</em>, where the frequency value corresponds to a ranking index. + Smaller values indicate a more common term. + </li> + </ul> + <p> + The correct mode can be determined based on the contents of the dictionary; + the <em>Auto</em> button attempts to auto-detect the correct value. + </p> + <p> + <a tabindex="0" class="more-toggle" data-parent-distance="3">Less…</a> + </p> + </div> + </div> + </div> + </div> <div class="settings-item advanced-only"><div class="settings-item-inner settings-item-inner-wrappable"> <div class="settings-item-left"> <div class="settings-item-label">Maximum number of results</div> @@ -3516,6 +3583,7 @@ <script src="/js/pages/settings/sentence-termination-characters-controller.js"></script> <script src="/js/pages/settings/settings-controller.js"></script> <script src="/js/pages/settings/settings-display-controller.js"></script> +<script src="/js/pages/settings/sort-frequency-dictionary-controller.js"></script> <script src="/js/pages/settings/status-footer.js"></script> <script src="/js/pages/settings/storage-controller.js"></script> <script src="/js/pages/settings/translation-text-replacements-controller.js"></script> |