/*
* Copyright (C) 2023 Yomitan Authors
* Copyright (C) 2020-2022 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
export class DictionaryDataUtil {
/**
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
* @returns {import('dictionary-data-util').TagGroup[]}
*/
static groupTermTags(dictionaryEntry) {
const {headwords} = dictionaryEntry;
const headwordCount = headwords.length;
const uniqueCheck = (headwordCount > 1);
const resultsIndexMap = new Map();
const results = [];
for (let i = 0; i < headwordCount; ++i) {
const {tags} = headwords[i];
for (const tag of tags) {
if (uniqueCheck) {
const {name, category, content, dictionaries} = tag;
const key = this._createMapKey([name, category, content, dictionaries]);
const index = resultsIndexMap.get(key);
if (typeof index !== 'undefined') {
const existingItem = results[index];
existingItem.headwordIndices.push(i);
continue;
}
resultsIndexMap.set(key, results.length);
}
const item = {tag, headwordIndices: [i]};
results.push(item);
}
}
return results;
}
/**
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
* @returns {import('dictionary-data-util').DictionaryFrequency[]}
*/
static groupTermFrequencies(dictionaryEntry) {
const {headwords, frequencies: sourceFrequencies} = dictionaryEntry;
/** @type {import('dictionary-data-util').TermFrequenciesMap1} */
const map1 = new Map();
for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) {
const {term, reading} = headwords[headwordIndex];
let map2 = map1.get(dictionary);
if (typeof map2 === 'undefined') {
map2 = new Map();
map1.set(dictionary, map2);
}
const readingKey = hasReading ? reading : null;
const key = this._createMapKey([term, readingKey]);
let frequencyData = map2.get(key);
if (typeof frequencyData === 'undefined') {
frequencyData = {term, reading: readingKey, values: new Map()};
map2.set(key, frequencyData);
}
frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue});
}
const results = [];
for (const [dictionary, map2] of map1.entries()) {
const frequencies = [];
for (const {term, reading, values} of map2.values()) {
frequencies.push({
term,
reading,
values: [...values.values()]
});
}
results.push({dictionary, frequencies});
}
return results;
}
/**
* @param {import('dictionary').KanjiFrequency[]} sourceFrequencies
* @returns {import('dictionary-data-util').DictionaryFrequency[]}
*/
static groupKanjiFrequencies(sourceFrequencies) {
/** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */
const map1 = new Map();
for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) {
let map2 = map1.get(dictionary);
if (typeof map2 === 'undefined') {
map2 = new Map();
map1.set(dictionary, map2);
}
let frequencyData = map2.get(character);
if (typeof frequencyData === 'undefined') {
frequencyData = {character, values: new Map()};
map2.set(character, frequencyData);
}
frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue});
}
const results = [];
for (const [dictionary, map2] of map1.entries()) {
const frequencies = [];
for (const {character, values} of map2.values()) {
frequencies.push({
character,
values: [...values.values()]
});
}
results.push({dictionary, frequencies});
}
return results;
}
/**
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
* @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]}
*/
static getGroupedPronunciations(dictionaryEntry) {
const {headwords, pronunciations: termPronunciations} = dictionaryEntry;
const allTerms = new Set();
const allReadings = new Set();
for (const {term, reading} of headwords) {
allTerms.add(term);
allReadings.add(reading);
}
/** @type {Map} */
const groupedPronunciationsMap = new Map();
for (const {headwordIndex, dictionary, pronunciations} of termPronunciations) {
const {term, reading} = headwords[headwordIndex];
let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary);
if (typeof dictionaryGroupedPronunciationList === 'undefined') {
dictionaryGroupedPronunciationList = [];
groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList);
}
for (const pronunciation of pronunciations) {
let groupedPronunciation = this._findExistingGroupedPronunciation(reading, pronunciation, dictionaryGroupedPronunciationList);
if (groupedPronunciation === null) {
groupedPronunciation = {
pronunciation,
terms: new Set(),
reading
};
dictionaryGroupedPronunciationList.push(groupedPronunciation);
}
groupedPronunciation.terms.add(term);
}
}
/** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */
const results2 = [];
const multipleReadings = (allReadings.size > 1);
for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) {
/** @type {import('dictionary-data-util').GroupedPronunciation[]} */
const pronunciations2 = [];
for (const groupedPronunciation of dictionaryGroupedPronunciationList) {
const {pronunciation, terms, reading} = groupedPronunciation;
const exclusiveTerms = !this._areSetsEqual(terms, allTerms) ? this._getSetIntersection(terms, allTerms) : [];
const exclusiveReadings = [];
if (multipleReadings) {
exclusiveReadings.push(reading);
}
pronunciations2.push({
pronunciation,
terms: [...terms],
reading,
exclusiveTerms,
exclusiveReadings
});
}
results2.push({dictionary, pronunciations: pronunciations2});
}
return results2;
}
/**
* @template {import('dictionary').PronunciationType} T
* @param {import('dictionary').Pronunciation[]} pronunciations
* @param {T} type
* @returns {import('dictionary').PronunciationGeneric[]}
*/
static getPronunciationsOfType(pronunciations, type) {
/** @type {import('dictionary').PronunciationGeneric[]} */
const results = [];
for (const pronunciation of pronunciations) {
if (pronunciation.type !== type) { continue; }
// This is type safe, but for some reason the cast is needed.
results.push(/** @type {import('dictionary').PronunciationGeneric} */ (pronunciation));
}
return results;
}
/**
* @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags
* @returns {import('dictionary-data-util').TermFrequencyType}
*/
static getTermFrequency(termTags) {
let totalScore = 0;
for (const {score} of termTags) {
totalScore += score;
}
if (totalScore > 0) {
return 'popular';
} else if (totalScore < 0) {
return 'rare';
} else {
return 'normal';
}
}
/**
* @param {import('dictionary').TermHeadword[]} headwords
* @param {number[]} headwordIndices
* @param {Set} allTermsSet
* @param {Set} allReadingsSet
* @returns {string[]}
*/
static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) {
if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; }
/** @type {Set} */
const terms = new Set();
/** @type {Set} */
const readings = new Set();
for (const headwordIndex of headwordIndices) {
const {term, reading} = headwords[headwordIndex];
terms.add(term);
readings.add(reading);
}
/** @type {string[]} */
const disambiguations = [];
const addTerms = !this._areSetsEqual(terms, allTermsSet);
const addReadings = !this._areSetsEqual(readings, allReadingsSet);
if (addTerms) {
disambiguations.push(...this._getSetIntersection(terms, allTermsSet));
}
if (addReadings) {
if (addTerms) {
for (const term of terms) {
readings.delete(term);
}
}
disambiguations.push(...this._getSetIntersection(readings, allReadingsSet));
}
return disambiguations;
}
/**
* @param {string[]} wordClasses
* @returns {boolean}
*/
static isNonNounVerbOrAdjective(wordClasses) {
let isVerbOrAdjective = false;
let isSuruVerb = false;
let isNoun = false;
for (const wordClass of wordClasses) {
switch (wordClass) {
case 'v1':
case 'v5':
case 'vk':
case 'vz':
case 'adj-i':
isVerbOrAdjective = true;
break;
case 'vs':
isVerbOrAdjective = true;
isSuruVerb = true;
break;
case 'n':
isNoun = true;
break;
}
}
return isVerbOrAdjective && !(isSuruVerb && isNoun);
}
// Private
/**
* @param {string} reading
* @param {import('dictionary').Pronunciation} pronunciation
* @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList
* @returns {?import('dictionary-data-util').GroupedPronunciationInternal}
*/
static _findExistingGroupedPronunciation(reading, pronunciation, groupedPronunciationList) {
const existingGroupedPronunciation = groupedPronunciationList.find((groupedPronunciation) => {
return groupedPronunciation.reading === reading && this._arePronunciationsEquivalent(groupedPronunciation, pronunciation);
});
return existingGroupedPronunciation || null;
}
/**
* @param {import('dictionary-data-util').GroupedPronunciationInternal} groupedPronunciation
* @param {import('dictionary').Pronunciation} pronunciation2
* @returns {boolean}
*/
static _arePronunciationsEquivalent({pronunciation: pronunciation1}, pronunciation2) {
if (
pronunciation1.type !== pronunciation2.type ||
!this._areTagListsEqual(pronunciation1.tags, pronunciation2.tags)
) {
return false;
}
switch (pronunciation1.type) {
case 'pitch-accent':
{
// This cast is valid based on the type check at the start of the function.
const pitchAccent2 = /** @type {import('dictionary').PitchAccent} */ (pronunciation2);
return (
pronunciation1.position === pitchAccent2.position &&
this._areArraysEqual(pronunciation1.nasalPositions, pitchAccent2.nasalPositions) &&
this._areArraysEqual(pronunciation1.devoicePositions, pitchAccent2.devoicePositions)
);
}
case 'phonetic-transcription':
{
// This cast is valid based on the type check at the start of the function.
const phoneticTranscription2 = /** @type {import('dictionary').PhoneticTranscription} */ (pronunciation2);
return pronunciation1.ipa === phoneticTranscription2.ipa;
}
}
return true;
}
/**
* @template [T=unknown]
* @param {T[]} array1
* @param {T[]} array2
* @returns {boolean}
*/
static _areArraysEqual(array1, array2) {
const ii = array1.length;
if (ii !== array2.length) { return false; }
for (let i = 0; i < ii; ++i) {
if (array1[i] !== array2[i]) { return false; }
}
return true;
}
/**
* @param {import('dictionary').Tag[]} tagList1
* @param {import('dictionary').Tag[]} tagList2
* @returns {boolean}
*/
static _areTagListsEqual(tagList1, tagList2) {
const ii = tagList1.length;
if (tagList2.length !== ii) { return false; }
for (let i = 0; i < ii; ++i) {
const tag1 = tagList1[i];
const tag2 = tagList2[i];
if (tag1.name !== tag2.name || !this._areArraysEqual(tag1.dictionaries, tag2.dictionaries)) {
return false;
}
}
return true;
}
/**
* @template [T=unknown]
* @param {Set} set1
* @param {Set} set2
* @returns {boolean}
*/
static _areSetsEqual(set1, set2) {
if (set1.size !== set2.size) {
return false;
}
for (const value of set1) {
if (!set2.has(value)) {
return false;
}
}
return true;
}
/**
* @template [T=unknown]
* @param {Set} set1
* @param {Set} set2
* @returns {T[]}
*/
static _getSetIntersection(set1, set2) {
const result = [];
for (const value of set1) {
if (set2.has(value)) {
result.push(value);
}
}
return result;
}
/**
* @param {unknown[]} array
* @returns {string}
*/
static _createMapKey(array) {
return JSON.stringify(array);
}
}