aboutsummaryrefslogtreecommitdiff
path: root/ext/js/dictionary/dictionary-data-util.js
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2023-12-23 11:29:48 +0100
committerGitHub <noreply@github.com>2023-12-23 10:29:48 +0000
commit6550495282780abaaa4c1aa4d33e3907fedf2927 (patch)
tree48d176ccac8a802e1bf03caf175d0f02c124cc97 /ext/js/dictionary/dictionary-data-util.js
parent76c52d2ea08d9764c8f044c1f3a8c1eaf6434828 (diff)
move dictionary files to dictionary folder (#429)
Diffstat (limited to 'ext/js/dictionary/dictionary-data-util.js')
-rw-r--r--ext/js/dictionary/dictionary-data-util.js391
1 files changed, 391 insertions, 0 deletions
diff --git a/ext/js/dictionary/dictionary-data-util.js b/ext/js/dictionary/dictionary-data-util.js
new file mode 100644
index 00000000..a54b043b
--- /dev/null
+++ b/ext/js/dictionary/dictionary-data-util.js
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2023 Yomitan Authors
+ * Copyright (C) 2020-2022 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+export class DictionaryDataUtil {
+ /**
+ * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
+ * @returns {import('dictionary-data-util').TagGroup[]}
+ */
+ static groupTermTags(dictionaryEntry) {
+ const {headwords} = dictionaryEntry;
+ const headwordCount = headwords.length;
+ const uniqueCheck = (headwordCount > 1);
+ const resultsIndexMap = new Map();
+ const results = [];
+ for (let i = 0; i < headwordCount; ++i) {
+ const {tags} = headwords[i];
+ for (const tag of tags) {
+ if (uniqueCheck) {
+ const {name, category, content, dictionaries} = tag;
+ const key = this._createMapKey([name, category, content, dictionaries]);
+ const index = resultsIndexMap.get(key);
+ if (typeof index !== 'undefined') {
+ const existingItem = results[index];
+ existingItem.headwordIndices.push(i);
+ continue;
+ }
+ resultsIndexMap.set(key, results.length);
+ }
+
+ const item = {tag, headwordIndices: [i]};
+ results.push(item);
+ }
+ }
+ return results;
+ }
+
+ /**
+ * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
+ * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').TermFrequency>[]}
+ */
+ static groupTermFrequencies(dictionaryEntry) {
+ const {headwords, frequencies: sourceFrequencies} = dictionaryEntry;
+
+ /** @type {import('dictionary-data-util').TermFrequenciesMap1} */
+ const map1 = new Map();
+ for (const {headwordIndex, dictionary, hasReading, frequency, displayValue} of sourceFrequencies) {
+ const {term, reading} = headwords[headwordIndex];
+
+ let map2 = map1.get(dictionary);
+ if (typeof map2 === 'undefined') {
+ map2 = new Map();
+ map1.set(dictionary, map2);
+ }
+
+ const readingKey = hasReading ? reading : null;
+ const key = this._createMapKey([term, readingKey]);
+ let frequencyData = map2.get(key);
+ if (typeof frequencyData === 'undefined') {
+ frequencyData = {term, reading: readingKey, values: new Map()};
+ map2.set(key, frequencyData);
+ }
+
+ frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue});
+ }
+
+ const results = [];
+ for (const [dictionary, map2] of map1.entries()) {
+ const frequencies = [];
+ for (const {term, reading, values} of map2.values()) {
+ frequencies.push({
+ term,
+ reading,
+ values: [...values.values()]
+ });
+ }
+ results.push({dictionary, frequencies});
+ }
+ return results;
+ }
+
+ /**
+ * @param {import('dictionary').KanjiFrequency[]} sourceFrequencies
+ * @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').KanjiFrequency>[]}
+ */
+ static groupKanjiFrequencies(sourceFrequencies) {
+ /** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */
+ const map1 = new Map();
+ for (const {dictionary, character, frequency, displayValue} of sourceFrequencies) {
+ let map2 = map1.get(dictionary);
+ if (typeof map2 === 'undefined') {
+ map2 = new Map();
+ map1.set(dictionary, map2);
+ }
+
+ let frequencyData = map2.get(character);
+ if (typeof frequencyData === 'undefined') {
+ frequencyData = {character, values: new Map()};
+ map2.set(character, frequencyData);
+ }
+
+ frequencyData.values.set(this._createMapKey([frequency, displayValue]), {frequency, displayValue});
+ }
+
+ const results = [];
+ for (const [dictionary, map2] of map1.entries()) {
+ const frequencies = [];
+ for (const {character, values} of map2.values()) {
+ frequencies.push({
+ character,
+ values: [...values.values()]
+ });
+ }
+ results.push({dictionary, frequencies});
+ }
+ return results;
+ }
+
+ /**
+ * @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
+ * @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]}
+ */
+ static getGroupedPronunciations(dictionaryEntry) {
+ const {headwords, pronunciations} = dictionaryEntry;
+
+ const allTerms = new Set();
+ const allReadings = new Set();
+ for (const {term, reading} of headwords) {
+ allTerms.add(term);
+ allReadings.add(reading);
+ }
+
+ /** @type {Map<string, import('dictionary-data-util').GroupedPronunciationInternal[]>} */
+ const groupedPronunciationsMap = new Map();
+ for (const {headwordIndex, dictionary, pitches} of pronunciations) {
+ const {term, reading} = headwords[headwordIndex];
+ let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary);
+ if (typeof dictionaryGroupedPronunciationList === 'undefined') {
+ dictionaryGroupedPronunciationList = [];
+ groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList);
+ }
+ for (const {position, nasalPositions, devoicePositions, tags} of pitches) {
+ let groupedPronunciation = this._findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, dictionaryGroupedPronunciationList);
+ if (groupedPronunciation === null) {
+ groupedPronunciation = {
+ terms: new Set(),
+ reading,
+ position,
+ nasalPositions,
+ devoicePositions,
+ tags
+ };
+ dictionaryGroupedPronunciationList.push(groupedPronunciation);
+ }
+ groupedPronunciation.terms.add(term);
+ }
+ }
+
+ /** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */
+ const results2 = [];
+ const multipleReadings = (allReadings.size > 1);
+ for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) {
+ /** @type {import('dictionary-data-util').GroupedPronunciation[]} */
+ const pronunciations2 = [];
+ for (const groupedPronunciation of dictionaryGroupedPronunciationList) {
+ const {terms, reading, position, nasalPositions, devoicePositions, tags} = groupedPronunciation;
+ const exclusiveTerms = !this._areSetsEqual(terms, allTerms) ? this._getSetIntersection(terms, allTerms) : [];
+ const exclusiveReadings = [];
+ if (multipleReadings) {
+ exclusiveReadings.push(reading);
+ }
+ pronunciations2.push({
+ terms: [...terms],
+ reading,
+ position,
+ nasalPositions,
+ devoicePositions,
+ tags,
+ exclusiveTerms,
+ exclusiveReadings
+ });
+ }
+ results2.push({dictionary, pronunciations: pronunciations2});
+ }
+ return results2;
+ }
+
+ /**
+ * @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags
+ * @returns {import('dictionary-data-util').TermFrequencyType}
+ */
+ static getTermFrequency(termTags) {
+ let totalScore = 0;
+ for (const {score} of termTags) {
+ totalScore += score;
+ }
+ if (totalScore > 0) {
+ return 'popular';
+ } else if (totalScore < 0) {
+ return 'rare';
+ } else {
+ return 'normal';
+ }
+ }
+
+ /**
+ * @param {import('dictionary').TermHeadword[]} headwords
+ * @param {number[]} headwordIndices
+ * @param {Set<string>} allTermsSet
+ * @param {Set<string>} allReadingsSet
+ * @returns {string[]}
+ */
+ static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) {
+ if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; }
+
+ /** @type {Set<string>} */
+ const terms = new Set();
+ /** @type {Set<string>} */
+ const readings = new Set();
+ for (const headwordIndex of headwordIndices) {
+ const {term, reading} = headwords[headwordIndex];
+ terms.add(term);
+ readings.add(reading);
+ }
+
+ /** @type {string[]} */
+ const disambiguations = [];
+ const addTerms = !this._areSetsEqual(terms, allTermsSet);
+ const addReadings = !this._areSetsEqual(readings, allReadingsSet);
+ if (addTerms) {
+ disambiguations.push(...this._getSetIntersection(terms, allTermsSet));
+ }
+ if (addReadings) {
+ if (addTerms) {
+ for (const term of terms) {
+ readings.delete(term);
+ }
+ }
+ disambiguations.push(...this._getSetIntersection(readings, allReadingsSet));
+ }
+ return disambiguations;
+ }
+
+ /**
+ * @param {string[]} wordClasses
+ * @returns {boolean}
+ */
+ static isNonNounVerbOrAdjective(wordClasses) {
+ let isVerbOrAdjective = false;
+ let isSuruVerb = false;
+ let isNoun = false;
+ for (const wordClass of wordClasses) {
+ switch (wordClass) {
+ case 'v1':
+ case 'v5':
+ case 'vk':
+ case 'vz':
+ case 'adj-i':
+ isVerbOrAdjective = true;
+ break;
+ case 'vs':
+ isVerbOrAdjective = true;
+ isSuruVerb = true;
+ break;
+ case 'n':
+ isNoun = true;
+ break;
+ }
+ }
+ return isVerbOrAdjective && !(isSuruVerb && isNoun);
+ }
+
+ // Private
+
+ /**
+ * @param {string} reading
+ * @param {number} position
+ * @param {number[]} nasalPositions
+ * @param {number[]} devoicePositions
+ * @param {import('dictionary').Tag[]} tags
+ * @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList
+ * @returns {?import('dictionary-data-util').GroupedPronunciationInternal}
+ */
+ static _findExistingGroupedPronunciation(reading, position, nasalPositions, devoicePositions, tags, groupedPronunciationList) {
+ for (const pitchInfo of groupedPronunciationList) {
+ if (
+ pitchInfo.reading === reading &&
+ pitchInfo.position === position &&
+ this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) &&
+ this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) &&
+ this._areTagListsEqual(pitchInfo.tags, tags)
+ ) {
+ return pitchInfo;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * @template [T=unknown]
+ * @param {T[]} array1
+ * @param {T[]} array2
+ * @returns {boolean}
+ */
+ static _areArraysEqual(array1, array2) {
+ const ii = array1.length;
+ if (ii !== array2.length) { return false; }
+ for (let i = 0; i < ii; ++i) {
+ if (array1[i] !== array2[i]) { return false; }
+ }
+ return true;
+ }
+
+ /**
+ * @param {import('dictionary').Tag[]} tagList1
+ * @param {import('dictionary').Tag[]} tagList2
+ * @returns {boolean}
+ */
+ static _areTagListsEqual(tagList1, tagList2) {
+ const ii = tagList1.length;
+ if (tagList2.length !== ii) { return false; }
+
+ for (let i = 0; i < ii; ++i) {
+ const tag1 = tagList1[i];
+ const tag2 = tagList2[i];
+ if (tag1.name !== tag2.name || !this._areArraysEqual(tag1.dictionaries, tag2.dictionaries)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * @template [T=unknown]
+ * @param {Set<T>} set1
+ * @param {Set<T>} set2
+ * @returns {boolean}
+ */
+ static _areSetsEqual(set1, set2) {
+ if (set1.size !== set2.size) {
+ return false;
+ }
+
+ for (const value of set1) {
+ if (!set2.has(value)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * @template [T=unknown]
+ * @param {Set<T>} set1
+ * @param {Set<T>} set2
+ * @returns {T[]}
+ */
+ static _getSetIntersection(set1, set2) {
+ const result = [];
+ for (const value of set1) {
+ if (set2.has(value)) {
+ result.push(value);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * @param {unknown[]} array
+ * @returns {string}
+ */
+ static _createMapKey(array) {
+ return JSON.stringify(array);
+ }
+}