1 files changed, 299 insertions, 0 deletions
diff --git a/ext/js/language/sandbox/dictionary-data-util.js b/ext/js/language/sandbox/dictionary-data-util.js
new file mode 100644
index 00000000..951e10ff
--- /dev/null
+++ b/ext/js/language/sandbox/dictionary-data-util.js
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2020-2021  Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+class DictionaryDataUtil {
+    static groupTermTags(dictionaryEntry) {
+        const {headwords} = dictionaryEntry;
+        const headwordCount = headwords.length;
+        const uniqueCheck = (headwordCount > 1);
+        const resultsIndexMap = new Map();
+        const results = [];
+        for (let i = 0; i < headwordCount; ++i) {
+            const {tags} = headwords[i];
+            for (const tag of tags) {
+                if (uniqueCheck) {
+                    const {name, category, notes, dictionary} = tag;
+                    const key = this._createMapKey([name, category, notes, dictionary]);
+                    const index = resultsIndexMap.get(key);
+                    if (typeof index !== 'undefined') {
+                        const existingItem = results[index];
+                        existingItem.headwordIndices.push(i);
+                        continue;
+                    }
+                    resultsIndexMap.set(key, results.length);
+                }
+
+                const item = {tag, headwordIndices: [i]};
+                results.push(item);
+            }
+        }
+        return results;
+    }
+
+    static groupTermFrequencies(dictionaryEntry) {
+        const {headwords, frequencies} = dictionaryEntry;
+
+        const map1 = new Map();
+        for (const {headwordIndex, dictionary, hasReading, frequency} of frequencies) {
+            const {term, reading} = headwords[headwordIndex];
+
+            let map2 = map1.get(dictionary);
+            if (typeof map2 === 'undefined') {
+                map2 = new Map();
+                map1.set(dictionary, map2);
+            }
+
+            const readingKey = hasReading ? reading : null;
+            const key = this._createMapKey([term, readingKey]);
+            let frequencyData = map2.get(key);
+            if (typeof frequencyData === 'undefined') {
+                frequencyData = {term, reading: readingKey, values: new Set()};
+                map2.set(key, frequencyData);
+            }
+
+            frequencyData.values.add(frequency);
+        }
+        return this._createFrequencyGroupsFromMap(map1);
+    }
+
+    static groupKanjiFrequencies(frequencies) {
+        const map1 = new Map();
+        for (const {dictionary, character, frequency} of frequencies) {
+            let map2 = map1.get(dictionary);
+            if (typeof map2 === 'undefined') {
+                map2 = new Map();
+                map1.set(dictionary, map2);
+            }
+
+            let frequencyData = map2.get(character);
+            if (typeof frequencyData === 'undefined') {
+                frequencyData = {character, values: new Set()};
+                map2.set(character, frequencyData);
+            }
+
+            frequencyData.values.add(frequency);
+        }
+        return this._createFrequencyGroupsFromMap(map1);
+    }
+
+    static getPitchAccentInfos(dictionaryEntry) {
+        const {headwords, pronunciations} = dictionaryEntry;
+
+        const allTerms = new Set();
+        const allReadings = new Set();
+        for (const {term, reading} of headwords) {
+            allTerms.add(term);
+            allReadings.add(reading);
+        }
+
+        const pitchAccentInfoMap = new Map();
+        for (const {headwordIndex, dictionary, pitches} of pronunciations) {
+            const {term, reading} = headwords[headwordIndex];
+            let dictionaryPitchAccentInfoList = pitchAccentInfoMap.get(dictionary);
+            if (typeof dictionaryPitchAccentInfoList === 'undefined') {
+                dictionaryPitchAccentInfoList = [];
+                pitchAccentInfoMap.set(dictionary, dictionaryPitchAccentInfoList);
+            }
+            for (const {position, nasalPositions, devoicePositions, tags} of pitches) {
+                let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, dictionaryPitchAccentInfoList);
+                if (pitchAccentInfo === null) {
+                    pitchAccentInfo = {
+                        terms: new Set(),
+                        reading,
+                        position,
+                        nasalPositions,
+                        devoicePositions,
+                        tags,
+                        exclusiveTerms: [],
+                        exclusiveReadings: []
+                    };
+                    dictionaryPitchAccentInfoList.push(pitchAccentInfo);
+                }
+                pitchAccentInfo.terms.add(term);
+            }
+        }
+
+        const multipleReadings = (allReadings.size > 1);
+        for (const dictionaryPitchAccentInfoList of pitchAccentInfoMap.values()) {
+            for (const pitchAccentInfo of dictionaryPitchAccentInfoList) {
+                const {terms, reading, exclusiveTerms, exclusiveReadings} = pitchAccentInfo;
+                if (!this._areSetsEqual(terms, allTerms)) {
+                    exclusiveTerms.push(...this._getSetIntersection(terms, allTerms));
+                }
+                if (multipleReadings) {
+                    exclusiveReadings.push(reading);
+                }
+                pitchAccentInfo.terms = [...terms];
+            }
+        }
+
+        const results2 = [];
+        for (const [dictionary, pitches] of pitchAccentInfoMap.entries()) {
+            results2.push({dictionary, pitches});
+        }
+        return results2;
+    }
+
+    static getTermFrequency(termTags) {
+        let totalScore = 0;
+        for (const {score} of termTags) {
+            totalScore += score;
+        }
+        if (totalScore > 0) {
+            return 'popular';
+        } else if (totalScore < 0) {
+            return 'rare';
+        } else {
+            return 'normal';
+        }
+    }
+
+    static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) {
+        if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; }
+
+        const terms = new Set();
+        const readings = new Set();
+        for (const headwordIndex of headwordIndices) {
+            const {term, reading} = headwords[headwordIndex];
+            terms.add(term);
+            readings.add(reading);
+        }
+
+        const disambiguations = [];
+        const addTerms = !this._areSetsEqual(terms, allTermsSet);
+        const addReadings = !this._areSetsEqual(readings, allReadingsSet);
+        if (addTerms) {
+            disambiguations.push(...this._getSetIntersection(terms, allTermsSet));
+        }
+        if (addReadings) {
+            if (addTerms) {
+                for (const term of terms) {
+                    readings.delete(term);
+                }
+            }
+            disambiguations.push(...this._getSetIntersection(readings, allReadingsSet));
+        }
+        return disambiguations;
+    }
+
+    static isNonNounVerbOrAdjective(wordClasses) {
+        let isVerbOrAdjective = false;
+        let isSuruVerb = false;
+        let isNoun = false;
+        for (const wordClass of wordClasses) {
+            switch (wordClass) {
+                case 'v1':
+                case 'v5':
+                case 'vk':
+                case 'vz':
+                case 'adj-i':
+                    isVerbOrAdjective = true;
+                    break;
+                case 'vs':
+                    isVerbOrAdjective = true;
+                    isSuruVerb = true;
+                    break;
+                case 'n':
+                    isNoun = true;
+                    break;
+            }
+        }
+        return isVerbOrAdjective && !(isSuruVerb && isNoun);
+    }
+
+    // Private
+
+    static _createFrequencyGroupsFromMap(map) {
+        const results = [];
+        for (const [dictionary, map2] of map.entries()) {
+            const frequencies = [];
+            for (const frequencyData of map2.values()) {
+                frequencyData.values = [...frequencyData.values];
+                frequencies.push(frequencyData);
+            }
+            results.push({dictionary, frequencies});
+        }
+        return results;
+    }
+
+    static _findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, pitchAccentInfoList) {
+        for (const pitchInfo of pitchAccentInfoList) {
+            if (
+                pitchInfo.reading === reading &&
+                pitchInfo.position === position &&
+                this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) &&
+                this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) &&
+                this._areTagListsEqual(pitchInfo.tags, tags)
+            ) {
+                return pitchInfo;
+            }
+        }
+        return null;
+    }
+
+    static _areArraysEqual(array1, array2) {
+        const ii = array1.length;
+        if (ii !== array2.length) { return false; }
+        for (let i = 0; i < ii; ++i) {
+            if (array1[i] !== array2[i]) { return false; }
+        }
+        return true;
+    }
+
+    static _areTagListsEqual(tagList1, tagList2) {
+        const ii = tagList1.length;
+        if (tagList2.length !== ii) { return false; }
+
+        for (let i = 0; i < ii; ++i) {
+            const tag1 = tagList1[i];
+            const tag2 = tagList2[i];
+            if (tag1.name !== tag2.name || tag1.dictionary !== tag2.dictionary) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    static _areSetsEqual(set1, set2) {
+        if (set1.size !== set2.size) {
+            return false;
+        }
+
+        for (const value of set1) {
+            if (!set2.has(value)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    static _getSetIntersection(set1, set2) {
+        const result = [];
+        for (const value of set1) {
+            if (set2.has(value)) {
+                result.push(value);
+            }
+        }
+        return result;
+    }
+
+    static _createMapKey(array) {
+        return JSON.stringify(array);
+    }
+}