diff options
Diffstat (limited to 'ext/js/language/dictionary-data-util.js')
-rw-r--r-- | ext/js/language/dictionary-data-util.js | 299 |
1 files changed, 0 insertions, 299 deletions
diff --git a/ext/js/language/dictionary-data-util.js b/ext/js/language/dictionary-data-util.js deleted file mode 100644 index 951e10ff..00000000 --- a/ext/js/language/dictionary-data-util.js +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (C) 2020-2021 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -class DictionaryDataUtil { - static groupTermTags(dictionaryEntry) { - const {headwords} = dictionaryEntry; - const headwordCount = headwords.length; - const uniqueCheck = (headwordCount > 1); - const resultsIndexMap = new Map(); - const results = []; - for (let i = 0; i < headwordCount; ++i) { - const {tags} = headwords[i]; - for (const tag of tags) { - if (uniqueCheck) { - const {name, category, notes, dictionary} = tag; - const key = this._createMapKey([name, category, notes, dictionary]); - const index = resultsIndexMap.get(key); - if (typeof index !== 'undefined') { - const existingItem = results[index]; - existingItem.headwordIndices.push(i); - continue; - } - resultsIndexMap.set(key, results.length); - } - - const item = {tag, headwordIndices: [i]}; - results.push(item); - } - } - return results; - } - - static groupTermFrequencies(dictionaryEntry) { - const {headwords, frequencies} = dictionaryEntry; - - const map1 = new Map(); - for (const {headwordIndex, dictionary, hasReading, frequency} of frequencies) { - const {term, reading} = headwords[headwordIndex]; - - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - const readingKey = hasReading ? reading : null; - const key = this._createMapKey([term, readingKey]); - let frequencyData = map2.get(key); - if (typeof frequencyData === 'undefined') { - frequencyData = {term, reading: readingKey, values: new Set()}; - map2.set(key, frequencyData); - } - - frequencyData.values.add(frequency); - } - return this._createFrequencyGroupsFromMap(map1); - } - - static groupKanjiFrequencies(frequencies) { - const map1 = new Map(); - for (const {dictionary, character, frequency} of frequencies) { - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - let frequencyData = map2.get(character); - if (typeof frequencyData === 'undefined') { - frequencyData = {character, values: new Set()}; - map2.set(character, frequencyData); - } - - frequencyData.values.add(frequency); - } - return this._createFrequencyGroupsFromMap(map1); - } - - static getPitchAccentInfos(dictionaryEntry) { - const {headwords, pronunciations} = dictionaryEntry; - - const allTerms = new Set(); - const allReadings = new Set(); - for (const {term, reading} of headwords) { - allTerms.add(term); - allReadings.add(reading); - } - - const pitchAccentInfoMap = new Map(); - for (const {headwordIndex, dictionary, pitches} of pronunciations) { - const {term, reading} = headwords[headwordIndex]; - let dictionaryPitchAccentInfoList = pitchAccentInfoMap.get(dictionary); - if (typeof dictionaryPitchAccentInfoList === 'undefined') { - dictionaryPitchAccentInfoList = []; - pitchAccentInfoMap.set(dictionary, dictionaryPitchAccentInfoList); - } - for (const {position, nasalPositions, devoicePositions, tags} of pitches) { - let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, dictionaryPitchAccentInfoList); - if (pitchAccentInfo === null) { - pitchAccentInfo = { - terms: new Set(), - reading, - position, - nasalPositions, - devoicePositions, - tags, - exclusiveTerms: [], - exclusiveReadings: [] - }; - dictionaryPitchAccentInfoList.push(pitchAccentInfo); - } - pitchAccentInfo.terms.add(term); - } - } - - const multipleReadings = (allReadings.size > 1); - for (const dictionaryPitchAccentInfoList of pitchAccentInfoMap.values()) { - for (const pitchAccentInfo of dictionaryPitchAccentInfoList) { - const {terms, reading, exclusiveTerms, exclusiveReadings} = pitchAccentInfo; - if (!this._areSetsEqual(terms, allTerms)) { - exclusiveTerms.push(...this._getSetIntersection(terms, allTerms)); - } - if (multipleReadings) { - exclusiveReadings.push(reading); - } - pitchAccentInfo.terms = [...terms]; - } - } - - const results2 = []; - for (const [dictionary, pitches] of pitchAccentInfoMap.entries()) { - results2.push({dictionary, pitches}); - } - return results2; - } - - static getTermFrequency(termTags) { - let totalScore = 0; - for (const {score} of termTags) { - totalScore += score; - } - if (totalScore > 0) { - return 'popular'; - } else if (totalScore < 0) { - return 'rare'; - } else { - return 'normal'; - } - } - - static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { - if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } - - const terms = new Set(); - const readings = new Set(); - for (const headwordIndex of headwordIndices) { - const {term, reading} = headwords[headwordIndex]; - terms.add(term); - readings.add(reading); - } - - const disambiguations = []; - const addTerms = !this._areSetsEqual(terms, allTermsSet); - const addReadings = !this._areSetsEqual(readings, allReadingsSet); - if (addTerms) { - disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); - } - if (addReadings) { - if (addTerms) { - for (const term of terms) { - readings.delete(term); - } - } - disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); - } - return disambiguations; - } - - static isNonNounVerbOrAdjective(wordClasses) { - let isVerbOrAdjective = false; - let isSuruVerb = false; - let isNoun = false; - for (const wordClass of wordClasses) { - switch (wordClass) { - case 'v1': - case 'v5': - case 'vk': - case 'vz': - case 'adj-i': - isVerbOrAdjective = true; - break; - case 'vs': - isVerbOrAdjective = true; - isSuruVerb = true; - break; - case 'n': - isNoun = true; - break; - } - } - return isVerbOrAdjective && !(isSuruVerb && isNoun); - } - - // Private - - static _createFrequencyGroupsFromMap(map) { - const results = []; - for (const [dictionary, map2] of map.entries()) { - const frequencies = []; - for (const frequencyData of map2.values()) { - frequencyData.values = [...frequencyData.values]; - frequencies.push(frequencyData); - } - results.push({dictionary, frequencies}); - } - return results; - } - - static _findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, pitchAccentInfoList) { - for (const pitchInfo of pitchAccentInfoList) { - if ( - pitchInfo.reading === reading && - pitchInfo.position === position && - this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) && - this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) && - this._areTagListsEqual(pitchInfo.tags, tags) - ) { - return pitchInfo; - } - } - return null; - } - - static _areArraysEqual(array1, array2) { - const ii = array1.length; - if (ii !== array2.length) { return false; } - for (let i = 0; i < ii; ++i) { - if (array1[i] !== array2[i]) { return false; } - } - return true; - } - - static _areTagListsEqual(tagList1, tagList2) { - const ii = tagList1.length; - if (tagList2.length !== ii) { return false; } - - for (let i = 0; i < ii; ++i) { - const tag1 = tagList1[i]; - const tag2 = tagList2[i]; - if (tag1.name !== tag2.name || tag1.dictionary !== tag2.dictionary) { - return false; - } - } - - return true; - } - - static _areSetsEqual(set1, set2) { - if (set1.size !== set2.size) { - return false; - } - - for (const value of set1) { - if (!set2.has(value)) { - return false; - } - } - - return true; - } - - static _getSetIntersection(set1, set2) { - const result = []; - for (const value of set1) { - if (set2.has(value)) { - result.push(value); - } - } - return result; - } - - static _createMapKey(array) { - return JSON.stringify(array); - } -} |