diff options
Diffstat (limited to 'ext/js/language/sandbox/dictionary-data-util.js')
-rw-r--r-- | ext/js/language/sandbox/dictionary-data-util.js | 391 |
1 files changed, 0 insertions, 391 deletions
diff --git a/ext/js/language/sandbox/dictionary-data-util.js b/ext/js/language/sandbox/dictionary-data-util.js deleted file mode 100644 index a54b043b..00000000 --- a/ext/js/language/sandbox/dictionary-data-util.js +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (C) 2023 Yomitan Authors - * Copyright (C) 2020-2022 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -export class DictionaryDataUtil { - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').TagGroup[]} - */ - static groupTermTags(dictionaryEntry) { - const {headwords} = dictionaryEntry; - const headwordCount = headwords.length; - const uniqueCheck = (headwordCount > 1); - const resultsIndexMap = new Map(); - const results = []; - for (let i = 0; i < headwordCount; ++i) { - const {tags} = headwords[i]; - for (const tag of tags) { - if (uniqueCheck) { - const {name, category, content, dictionaries} = tag; - const key = this._createMapKey([name, category, content, dictionaries]); - const index = resultsIndexMap.get(key); - if (typeof index !== 'undefined') { - const existingItem = results[index]; - existingItem.headwordIndices.push(i); - continue; - } - resultsIndexMap.set(key, results.length); - } - - const item = {tag, headwordIndices: [i]}; - results.push(item); - } - } - return results; - } - - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').TermFrequency>[]} - */ - static groupTermFrequencies(dictionaryEntry) { - const {headwords, frequencies: sourceFrequencies} = dictionaryEntry; - - /** @type {import('dictionary-data-util').TermFrequenciesMap1} */ - const map1 = new Map(); - for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) { - const {term, reading} = headwords[headwordIndex]; - - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - const readingKey = hasReading ? reading : null; - const key = this._createMapKey([term, readingKey]); - let frequencyData = map2.get(key); - if (typeof frequencyData === 'undefined') { - frequencyData = {term, reading: readingKey, values: new Map()}; - map2.set(key, frequencyData); - } - - frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); - } - - const results = []; - for (const [dictionary, map2] of map1.entries()) { - const frequencies = []; - for (const {term, reading, values} of map2.values()) { - frequencies.push({ - term, - reading, - values: [...values.values()] - }); - } - results.push({dictionary, frequencies}); - } - return results; - } - - /** - * @param {import('dictionary').KanjiFrequency[]} sourceFrequencies - * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').KanjiFrequency>[]} - */ - static groupKanjiFrequencies(sourceFrequencies) { - /** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */ - const map1 = new Map(); - for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) { - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - let frequencyData = map2.get(character); - if (typeof frequencyData === 'undefined') { - frequencyData = {character, values: new Map()}; - map2.set(character, frequencyData); - } - - frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); - } - - const results = []; - for (const [dictionary, map2] of map1.entries()) { - const frequencies = []; - for (const {character, values} of map2.values()) { - frequencies.push({ - character, - values: [...values.values()] - }); - } - results.push({dictionary, frequencies}); - } - return results; - } - - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]} - */ - static getGroupedPronunciations(dictionaryEntry) { - const {headwords, pronunciations} = dictionaryEntry; - - const allTerms = new Set(); - const allReadings = new Set(); - for (const {term, reading} of headwords) { - allTerms.add(term); - allReadings.add(reading); - } - - /** @type {Map<string, import('dictionary-data-util').GroupedPronunciationInternal[]>} */ - const groupedPronunciationsMap = new Map(); - for (const {headwordIndex, dictionary, pitches} of pronunciations) { - const {term, reading} = headwords[headwordIndex]; - let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary); - if (typeof dictionaryGroupedPronunciationList === 'undefined') { - dictionaryGroupedPronunciationList = []; - groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList); - } - for (const {position, nasalPositions, devoicePositions, tags} of pitches) { - let groupedPronunciation = this._findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, dictionaryGroupedPronunciationList); - if (groupedPronunciation === null) { - groupedPronunciation = { - terms: new Set(), - reading, - position, - nasalPositions, - devoicePositions, - tags - }; - dictionaryGroupedPronunciationList.push(groupedPronunciation); - } - groupedPronunciation.terms.add(term); - } - } - - /** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */ - const results2 = []; - const multipleReadings = (allReadings.size > 1); - for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) { - /** @type {import('dictionary-data-util').GroupedPronunciation[]} */ - const pronunciations2 = []; - for (const groupedPronunciation of dictionaryGroupedPronunciationList) { - const {terms, reading, position, nasalPositions, devoicePositions, tags} = groupedPronunciation; - const exclusiveTerms = !this._areSetsEqual(terms, allTerms) ? this._getSetIntersection(terms, allTerms) : []; - const exclusiveReadings = []; - if (multipleReadings) { - exclusiveReadings.push(reading); - } - pronunciations2.push({ - terms: [...terms], - reading, - position, - nasalPositions, - devoicePositions, - tags, - exclusiveTerms, - exclusiveReadings - }); - } - results2.push({dictionary, pronunciations: pronunciations2}); - } - return results2; - } - - /** - * @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags - * @returns {import('dictionary-data-util').TermFrequencyType} - */ - static getTermFrequency(termTags) { - let totalScore = 0; - for (const {score} of termTags) { - totalScore += score; - } - if (totalScore > 0) { - return 'popular'; - } else if (totalScore < 0) { - return 'rare'; - } else { - return 'normal'; - } - } - - /** - * @param {import('dictionary').TermHeadword[]} headwords - * @param {number[]} headwordIndices - * @param {Set<string>} allTermsSet - * @param {Set<string>} allReadingsSet - * @returns {string[]} - */ - static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { - if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } - - /** @type {Set<string>} */ - const terms = new Set(); - /** @type {Set<string>} */ - const readings = new Set(); - for (const headwordIndex of headwordIndices) { - const {term, reading} = headwords[headwordIndex]; - terms.add(term); - readings.add(reading); - } - - /** @type {string[]} */ - const disambiguations = []; - const addTerms = !this._areSetsEqual(terms, allTermsSet); - const addReadings = !this._areSetsEqual(readings, allReadingsSet); - if (addTerms) { - disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); - } - if (addReadings) { - if (addTerms) { - for (const term of terms) { - readings.delete(term); - } - } - disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); - } - return disambiguations; - } - - /** - * @param {string[]} wordClasses - * @returns {boolean} - */ - static isNonNounVerbOrAdjective(wordClasses) { - let isVerbOrAdjective = false; - let isSuruVerb = false; - let isNoun = false; - for (const wordClass of wordClasses) { - switch (wordClass) { - case 'v1': - case 'v5': - case 'vk': - case 'vz': - case 'adj-i': - isVerbOrAdjective = true; - break; - case 'vs': - isVerbOrAdjective = true; - isSuruVerb = true; - break; - case 'n': - isNoun = true; - break; - } - } - return isVerbOrAdjective && !(isSuruVerb && isNoun); - } - - // Private - - /** - * @param {string} reading - * @param {number} position - * @param {number[]} nasalPositions - * @param {number[]} devoicePositions - * @param {import('dictionary').Tag[]} tags - * @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList - * @returns {?import('dictionary-data-util').GroupedPronunciationInternal} - */ - static _findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, groupedPronunciationList) { - for (const pitchInfo of groupedPronunciationList) { - if ( - pitchInfo.reading === reading && - pitchInfo.position === position && - this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) && - this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) && - this._areTagListsEqual(pitchInfo.tags, tags) - ) { - return pitchInfo; - } - } - return null; - } - - /** - * @template [T=unknown] - * @param {T[]} array1 - * @param {T[]} array2 - * @returns {boolean} - */ - static _areArraysEqual(array1, array2) { - const ii = array1.length; - if (ii !== array2.length) { return false; } - for (let i = 0; i < ii; ++i) { - if (array1[i] !== array2[i]) { return false; } - } - return true; - } - - /** - * @param {import('dictionary').Tag[]} tagList1 - * @param {import('dictionary').Tag[]} tagList2 - * @returns {boolean} - */ - static _areTagListsEqual(tagList1, tagList2) { - const ii = tagList1.length; - if (tagList2.length !== ii) { return false; } - - for (let i = 0; i < ii; ++i) { - const tag1 = tagList1[i]; - const tag2 = tagList2[i]; - if (tag1.name !== tag2.name || !this._areArraysEqual(tag1.dictionaries, tag2.dictionaries)) { - return false; - } - } - - return true; - } - - /** - * @template [T=unknown] - * @param {Set<T>} set1 - * @param {Set<T>} set2 - * @returns {boolean} - */ - static _areSetsEqual(set1, set2) { - if (set1.size !== set2.size) { - return false; - } - - for (const value of set1) { - if (!set2.has(value)) { - return false; - } - } - - return true; - } - - /** - * @template [T=unknown] - * @param {Set<T>} set1 - * @param {Set<T>} set2 - * @returns {T[]} - */ - static _getSetIntersection(set1, set2) { - const result = []; - for (const value of set1) { - if (set2.has(value)) { - result.push(value); - } - } - return result; - } - - /** - * @param {unknown[]} array - * @returns {string} - */ - static _createMapKey(array) { - return JSON.stringify(array); - } -} |