diff options
Diffstat (limited to 'ext/js/dictionary/dictionary-data-util.js')
-rw-r--r-- | ext/js/dictionary/dictionary-data-util.js | 696 |
1 files changed, 347 insertions, 349 deletions
diff --git a/ext/js/dictionary/dictionary-data-util.js b/ext/js/dictionary/dictionary-data-util.js index 9b49c7af..a2a106cc 100644 --- a/ext/js/dictionary/dictionary-data-util.js +++ b/ext/js/dictionary/dictionary-data-util.js @@ -16,411 +16,409 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -export class DictionaryDataUtil { - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').TagGroup[]} - */ - static groupTermTags(dictionaryEntry) { - const {headwords} = dictionaryEntry; - const headwordCount = headwords.length; - const uniqueCheck = (headwordCount > 1); - const resultsIndexMap = new Map(); - const results = []; - for (let i = 0; i < headwordCount; ++i) { - const {tags} = headwords[i]; - for (const tag of tags) { - if (uniqueCheck) { - const {name, category, content, dictionaries} = tag; - const key = this._createMapKey([name, category, content, dictionaries]); - const index = resultsIndexMap.get(key); - if (typeof index !== 'undefined') { - const existingItem = results[index]; - existingItem.headwordIndices.push(i); - continue; - } - resultsIndexMap.set(key, results.length); +/** + * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry + * @returns {import('dictionary-data-util').TagGroup[]} + */ +export function groupTermTags(dictionaryEntry) { + const {headwords} = dictionaryEntry; + const headwordCount = headwords.length; + const uniqueCheck = (headwordCount > 1); + const resultsIndexMap = new Map(); + const results = []; + for (let i = 0; i < headwordCount; ++i) { + const {tags} = headwords[i]; + for (const tag of tags) { + if (uniqueCheck) { + const {name, category, content, dictionaries} = tag; + const key = createMapKey([name, category, content, dictionaries]); + const index = resultsIndexMap.get(key); + if (typeof index !== 'undefined') { + const existingItem = results[index]; + existingItem.headwordIndices.push(i); + continue; } - - const item = {tag, headwordIndices: [i]}; - results.push(item); + resultsIndexMap.set(key, results.length); } + + const item = {tag, headwordIndices: [i]}; + results.push(item); } - return results; } + return results; +} - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').TermFrequency>[]} - */ - static groupTermFrequencies(dictionaryEntry) { - const {headwords, frequencies: sourceFrequencies} = dictionaryEntry; - - /** @type {import('dictionary-data-util').TermFrequenciesMap1} */ - const map1 = new Map(); - for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) { - const {term, reading} = headwords[headwordIndex]; - - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - const readingKey = hasReading ? reading : null; - const key = this._createMapKey([term, readingKey]); - let frequencyData = map2.get(key); - if (typeof frequencyData === 'undefined') { - frequencyData = {term, reading: readingKey, values: new Map()}; - map2.set(key, frequencyData); - } - - frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); +/** + * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry + * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').TermFrequency>[]} + */ +export function groupTermFrequencies(dictionaryEntry) { + const {headwords, frequencies: sourceFrequencies} = dictionaryEntry; + + /** @type {import('dictionary-data-util').TermFrequenciesMap1} */ + const map1 = new Map(); + for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) { + const {term, reading} = headwords[headwordIndex]; + + let map2 = map1.get(dictionary); + if (typeof map2 === 'undefined') { + map2 = new Map(); + map1.set(dictionary, map2); } - const results = []; - for (const [dictionary, map2] of map1.entries()) { - const frequencies = []; - for (const {term, reading, values} of map2.values()) { - frequencies.push({ - term, - reading, - values: [...values.values()] - }); - } - results.push({dictionary, frequencies}); + const readingKey = hasReading ? reading : null; + const key = createMapKey([term, readingKey]); + let frequencyData = map2.get(key); + if (typeof frequencyData === 'undefined') { + frequencyData = {term, reading: readingKey, values: new Map()}; + map2.set(key, frequencyData); } - return results; - } - /** - * @param {import('dictionary').KanjiFrequency[]} sourceFrequencies - * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').KanjiFrequency>[]} - */ - static groupKanjiFrequencies(sourceFrequencies) { - /** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */ - const map1 = new Map(); - for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) { - let map2 = map1.get(dictionary); - if (typeof map2 === 'undefined') { - map2 = new Map(); - map1.set(dictionary, map2); - } - - let frequencyData = map2.get(character); - if (typeof frequencyData === 'undefined') { - frequencyData = {character, values: new Map()}; - map2.set(character, frequencyData); - } + frequencyData.values.set(createMapKey([frequency, displayValue]), {frequency, displayValue}); + } - frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue}); + const results = []; + for (const [dictionary, map2] of map1.entries()) { + const frequencies = []; + for (const {term, reading, values} of map2.values()) { + frequencies.push({ + term, + reading, + values: [...values.values()] + }); } - - const results = []; - for (const [dictionary, map2] of map1.entries()) { - const frequencies = []; - for (const {character, values} of map2.values()) { - frequencies.push({ - character, - values: [...values.values()] - }); - } - results.push({dictionary, frequencies}); - } - return results; + results.push({dictionary, frequencies}); } + return results; +} - /** - * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry - * @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]} - */ - static getGroupedPronunciations(dictionaryEntry) { - const {headwords, pronunciations: termPronunciations} = dictionaryEntry; - - const allTerms = new Set(); - const allReadings = new Set(); - for (const {term, reading} of headwords) { - allTerms.add(term); - allReadings.add(reading); +/** + * @param {import('dictionary').KanjiFrequency[]} sourceFrequencies + * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').KanjiFrequency>[]} + */ +export function groupKanjiFrequencies(sourceFrequencies) { + /** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */ + const map1 = new Map(); + for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) { + let map2 = map1.get(dictionary); + if (typeof map2 === 'undefined') { + map2 = new Map(); + map1.set(dictionary, map2); } - /** @type {Map<string, import('dictionary-data-util').GroupedPronunciationInternal[]>} */ - const groupedPronunciationsMap = new Map(); - for (const {headwordIndex, dictionary, pronunciations} of termPronunciations) { - const {term, reading} = headwords[headwordIndex]; - let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary); - if (typeof dictionaryGroupedPronunciationList === 'undefined') { - dictionaryGroupedPronunciationList = []; - groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList); - } - for (const pronunciation of pronunciations) { - let groupedPronunciation = this._findExistingGroupedPronunciation(reading, pronunciation, dictionaryGroupedPronunciationList); - if (groupedPronunciation === null) { - groupedPronunciation = { - pronunciation, - terms: new Set(), - reading - }; - dictionaryGroupedPronunciationList.push(groupedPronunciation); - } - groupedPronunciation.terms.add(term); - } + let frequencyData = map2.get(character); + if (typeof frequencyData === 'undefined') { + frequencyData = {character, values: new Map()}; + map2.set(character, frequencyData); } - /** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */ - const results2 = []; - const multipleReadings = (allReadings.size > 1); - for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) { - /** @type {import('dictionary-data-util').GroupedPronunciation[]} */ - const pronunciations2 = []; - for (const groupedPronunciation of dictionaryGroupedPronunciationList) { - const {pronunciation, terms, reading} = groupedPronunciation; - const exclusiveTerms = !this._areSetsEqual(terms, allTerms) ? this._getSetIntersection(terms, allTerms) : []; - const exclusiveReadings = []; - if (multipleReadings) { - exclusiveReadings.push(reading); - } - pronunciations2.push({ - pronunciation, - terms: [...terms], - reading, - exclusiveTerms, - exclusiveReadings - }); - } - - results2.push({dictionary, pronunciations: pronunciations2}); - } - return results2; + frequencyData.values.set(createMapKey([frequency, displayValue]), {frequency, displayValue}); } - /** - * @template {import('dictionary').PronunciationType} T - * @param {import('dictionary').Pronunciation[]} pronunciations - * @param {T} type - * @returns {import('dictionary').PronunciationGeneric<T>[]} - */ - static getPronunciationsOfType(pronunciations, type) { - /** @type {import('dictionary').PronunciationGeneric<T>[]} */ - const results = []; - for (const pronunciation of pronunciations) { - if (pronunciation.type !== type) { continue; } - // This is type safe, but for some reason the cast is needed. - results.push(/** @type {import('dictionary').PronunciationGeneric<T>} */ (pronunciation)); + const results = []; + for (const [dictionary, map2] of map1.entries()) { + const frequencies = []; + for (const {character, values} of map2.values()) { + frequencies.push({ + character, + values: [...values.values()] + }); } - return results; + results.push({dictionary, frequencies}); } + return results; +} - /** - * @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags - * @returns {import('dictionary-data-util').TermFrequencyType} - */ - static getTermFrequency(termTags) { - let totalScore = 0; - for (const {score} of termTags) { - totalScore += score; - } - if (totalScore > 0) { - return 'popular'; - } else if (totalScore < 0) { - return 'rare'; - } else { - return 'normal'; - } +/** + * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry + * @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]} + */ +export function getGroupedPronunciations(dictionaryEntry) { + const {headwords, pronunciations: termPronunciations} = dictionaryEntry; + + const allTerms = new Set(); + const allReadings = new Set(); + for (const {term, reading} of headwords) { + allTerms.add(term); + allReadings.add(reading); } - /** - * @param {import('dictionary').TermHeadword[]} headwords - * @param {number[]} headwordIndices - * @param {Set<string>} allTermsSet - * @param {Set<string>} allReadingsSet - * @returns {string[]} - */ - static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { - if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } - - /** @type {Set<string>} */ - const terms = new Set(); - /** @type {Set<string>} */ - const readings = new Set(); - for (const headwordIndex of headwordIndices) { - const {term, reading} = headwords[headwordIndex]; - terms.add(term); - readings.add(reading); + /** @type {Map<string, import('dictionary-data-util').GroupedPronunciationInternal[]>} */ + const groupedPronunciationsMap = new Map(); + for (const {headwordIndex, dictionary, pronunciations} of termPronunciations) { + const {term, reading} = headwords[headwordIndex]; + let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary); + if (typeof dictionaryGroupedPronunciationList === 'undefined') { + dictionaryGroupedPronunciationList = []; + groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList); } - - /** @type {string[]} */ - const disambiguations = []; - const addTerms = !this._areSetsEqual(terms, allTermsSet); - const addReadings = !this._areSetsEqual(readings, allReadingsSet); - if (addTerms) { - disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); - } - if (addReadings) { - if (addTerms) { - for (const term of terms) { - readings.delete(term); - } + for (const pronunciation of pronunciations) { + let groupedPronunciation = findExistingGroupedPronunciation(reading, pronunciation, dictionaryGroupedPronunciationList); + if (groupedPronunciation === null) { + groupedPronunciation = { + pronunciation, + terms: new Set(), + reading + }; + dictionaryGroupedPronunciationList.push(groupedPronunciation); } - disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); + groupedPronunciation.terms.add(term); } - return disambiguations; } - /** - * @param {string[]} wordClasses - * @returns {boolean} - */ - static isNonNounVerbOrAdjective(wordClasses) { - let isVerbOrAdjective = false; - let isSuruVerb = false; - let isNoun = false; - for (const wordClass of wordClasses) { - switch (wordClass) { - case 'v1': - case 'v5': - case 'vk': - case 'vz': - case 'adj-i': - isVerbOrAdjective = true; - break; - case 'vs': - isVerbOrAdjective = true; - isSuruVerb = true; - // falls through - case 'n': - isNoun = true; - break; + /** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */ + const results2 = []; + const multipleReadings = (allReadings.size > 1); + for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) { + /** @type {import('dictionary-data-util').GroupedPronunciation[]} */ + const pronunciations2 = []; + for (const groupedPronunciation of dictionaryGroupedPronunciationList) { + const {pronunciation, terms, reading} = groupedPronunciation; + const exclusiveTerms = !areSetsEqual(terms, allTerms) ? getSetIntersection(terms, allTerms) : []; + const exclusiveReadings = []; + if (multipleReadings) { + exclusiveReadings.push(reading); } + pronunciations2.push({ + pronunciation, + terms: [...terms], + reading, + exclusiveTerms, + exclusiveReadings + }); } - return isVerbOrAdjective && !(isSuruVerb && isNoun); + + results2.push({dictionary, pronunciations: pronunciations2}); } + return results2; +} - // Private +/** + * @template {import('dictionary').PronunciationType} T + * @param {import('dictionary').Pronunciation[]} pronunciations + * @param {T} type + * @returns {import('dictionary').PronunciationGeneric<T>[]} + */ +export function getPronunciationsOfType(pronunciations, type) { + /** @type {import('dictionary').PronunciationGeneric<T>[]} */ + const results = []; + for (const pronunciation of pronunciations) { + if (pronunciation.type !== type) { continue; } + // This is type safe, but for some reason the cast is needed. + results.push(/** @type {import('dictionary').PronunciationGeneric<T>} */ (pronunciation)); + } + return results; +} - /** - * @param {string} reading - * @param {import('dictionary').Pronunciation} pronunciation - * @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList - * @returns {?import('dictionary-data-util').GroupedPronunciationInternal} - */ - static _findExistingGroupedPronunciation(reading, pronunciation, groupedPronunciationList) { - const existingGroupedPronunciation = groupedPronunciationList.find((groupedPronunciation) => { - return groupedPronunciation.reading === reading && this._arePronunciationsEquivalent(groupedPronunciation, pronunciation); - }); +/** + * @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags + * @returns {import('dictionary-data-util').TermFrequencyType} + */ +export function getTermFrequency(termTags) { + let totalScore = 0; + for (const {score} of termTags) { + totalScore += score; + } + if (totalScore > 0) { + return 'popular'; + } else if (totalScore < 0) { + return 'rare'; + } else { + return 'normal'; + } +} - return existingGroupedPronunciation || null; +/** + * @param {import('dictionary').TermHeadword[]} headwords + * @param {number[]} headwordIndices + * @param {Set<string>} allTermsSet + * @param {Set<string>} allReadingsSet + * @returns {string[]} + */ +export function getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { + if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } + + /** @type {Set<string>} */ + const terms = new Set(); + /** @type {Set<string>} */ + const readings = new Set(); + for (const headwordIndex of headwordIndices) { + const {term, reading} = headwords[headwordIndex]; + terms.add(term); + readings.add(reading); } - /** - * @param {import('dictionary-data-util').GroupedPronunciationInternal} groupedPronunciation - * @param {import('dictionary').Pronunciation} pronunciation2 - * @returns {boolean} - */ - static _arePronunciationsEquivalent({pronunciation: pronunciation1}, pronunciation2) { - if ( - pronunciation1.type !== pronunciation2.type || - !this._areTagListsEqual(pronunciation1.tags, pronunciation2.tags) - ) { - return false; - } - switch (pronunciation1.type) { - case 'pitch-accent': - { - // This cast is valid based on the type check at the start of the function. - const pitchAccent2 = /** @type {import('dictionary').PitchAccent} */ (pronunciation2); - return ( - pronunciation1.position === pitchAccent2.position && - this._areArraysEqual(pronunciation1.nasalPositions, pitchAccent2.nasalPositions) && - this._areArraysEqual(pronunciation1.devoicePositions, pitchAccent2.devoicePositions) - ); - } - case 'phonetic-transcription': - { - // This cast is valid based on the type check at the start of the function. - const phoneticTranscription2 = /** @type {import('dictionary').PhoneticTranscription} */ (pronunciation2); - return pronunciation1.ipa === phoneticTranscription2.ipa; + /** @type {string[]} */ + const disambiguations = []; + const addTerms = !areSetsEqual(terms, allTermsSet); + const addReadings = !areSetsEqual(readings, allReadingsSet); + if (addTerms) { + disambiguations.push(...getSetIntersection(terms, allTermsSet)); + } + if (addReadings) { + if (addTerms) { + for (const term of terms) { + readings.delete(term); } } - return true; + disambiguations.push(...getSetIntersection(readings, allReadingsSet)); } + return disambiguations; +} - /** - * @template [T=unknown] - * @param {T[]} array1 - * @param {T[]} array2 - * @returns {boolean} - */ - static _areArraysEqual(array1, array2) { - const ii = array1.length; - if (ii !== array2.length) { return false; } - for (let i = 0; i < ii; ++i) { - if (array1[i] !== array2[i]) { return false; } +/** + * @param {string[]} wordClasses + * @returns {boolean} + */ +export function isNonNounVerbOrAdjective(wordClasses) { + let isVerbOrAdjective = false; + let isSuruVerb = false; + let isNoun = false; + for (const wordClass of wordClasses) { + switch (wordClass) { + case 'v1': + case 'v5': + case 'vk': + case 'vz': + case 'adj-i': + isVerbOrAdjective = true; + break; + case 'vs': + isVerbOrAdjective = true; + isSuruVerb = true; + // falls through + case 'n': + isNoun = true; + break; } - return true; } + return isVerbOrAdjective && !(isSuruVerb && isNoun); +} - /** - * @param {import('dictionary').Tag[]} tagList1 - * @param {import('dictionary').Tag[]} tagList2 - * @returns {boolean} - */ - static _areTagListsEqual(tagList1, tagList2) { - const ii = tagList1.length; - if (tagList2.length !== ii) { return false; } - - for (let i = 0; i < ii; ++i) { - const tag1 = tagList1[i]; - const tag2 = tagList2[i]; - if (tag1.name !== tag2.name || !this._areArraysEqual(tag1.dictionaries, tag2.dictionaries)) { - return false; - } +// Private + +/** + * @param {string} reading + * @param {import('dictionary').Pronunciation} pronunciation + * @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList + * @returns {?import('dictionary-data-util').GroupedPronunciationInternal} + */ +function findExistingGroupedPronunciation(reading, pronunciation, groupedPronunciationList) { + const existingGroupedPronunciation = groupedPronunciationList.find((groupedPronunciation) => { + return groupedPronunciation.reading === reading && arePronunciationsEquivalent(groupedPronunciation, pronunciation); + }); + + return existingGroupedPronunciation || null; +} + +/** + * @param {import('dictionary-data-util').GroupedPronunciationInternal} groupedPronunciation + * @param {import('dictionary').Pronunciation} pronunciation2 + * @returns {boolean} + */ +function arePronunciationsEquivalent({pronunciation: pronunciation1}, pronunciation2) { + if ( + pronunciation1.type !== pronunciation2.type || + !areTagListsEqual(pronunciation1.tags, pronunciation2.tags) + ) { + return false; + } + switch (pronunciation1.type) { + case 'pitch-accent': + { + // This cast is valid based on the type check at the start of the function. + const pitchAccent2 = /** @type {import('dictionary').PitchAccent} */ (pronunciation2); + return ( + pronunciation1.position === pitchAccent2.position && + areArraysEqual(pronunciation1.nasalPositions, pitchAccent2.nasalPositions) && + areArraysEqual(pronunciation1.devoicePositions, pitchAccent2.devoicePositions) + ); + } + case 'phonetic-transcription': + { + // This cast is valid based on the type check at the start of the function. + const phoneticTranscription2 = /** @type {import('dictionary').PhoneticTranscription} */ (pronunciation2); + return pronunciation1.ipa === phoneticTranscription2.ipa; } + } + return true; +} - return true; +/** + * @template [T=unknown] + * @param {T[]} array1 + * @param {T[]} array2 + * @returns {boolean} + */ +function areArraysEqual(array1, array2) { + const ii = array1.length; + if (ii !== array2.length) { return false; } + for (let i = 0; i < ii; ++i) { + if (array1[i] !== array2[i]) { return false; } } + return true; +} - /** - * @template [T=unknown] - * @param {Set<T>} set1 - * @param {Set<T>} set2 - * @returns {boolean} - */ - static _areSetsEqual(set1, set2) { - if (set1.size !== set2.size) { +/** + * @param {import('dictionary').Tag[]} tagList1 + * @param {import('dictionary').Tag[]} tagList2 + * @returns {boolean} + */ +function areTagListsEqual(tagList1, tagList2) { + const ii = tagList1.length; + if (tagList2.length !== ii) { return false; } + + for (let i = 0; i < ii; ++i) { + const tag1 = tagList1[i]; + const tag2 = tagList2[i]; + if (tag1.name !== tag2.name || !areArraysEqual(tag1.dictionaries, tag2.dictionaries)) { return false; } + } - for (const value of set1) { - if (!set2.has(value)) { - return false; - } - } + return true; +} - return true; +/** + * @template [T=unknown] + * @param {Set<T>} set1 + * @param {Set<T>} set2 + * @returns {boolean} + */ +function areSetsEqual(set1, set2) { + if (set1.size !== set2.size) { + return false; } - /** - * @template [T=unknown] - * @param {Set<T>} set1 - * @param {Set<T>} set2 - * @returns {T[]} - */ - static _getSetIntersection(set1, set2) { - const result = []; - for (const value of set1) { - if (set2.has(value)) { - result.push(value); - } + for (const value of set1) { + if (!set2.has(value)) { + return false; } - return result; } - /** - * @param {unknown[]} array - * @returns {string} - */ - static _createMapKey(array) { - return JSON.stringify(array); + return true; +} + +/** + * @template [T=unknown] + * @param {Set<T>} set1 + * @param {Set<T>} set2 + * @returns {T[]} + */ +function getSetIntersection(set1, set2) { + const result = []; + for (const value of set1) { + if (set2.has(value)) { + result.push(value); + } } + return result; +} + +/** + * @param {unknown[]} array + * @returns {string} + */ +function createMapKey(array) { + return JSON.stringify(array); } |