summaryrefslogtreecommitdiff
path: root/ext/js/language
diff options
context:
space:
mode:
Diffstat (limited to 'ext/js/language')
-rw-r--r--ext/js/language/deinflector.js96
-rw-r--r--ext/js/language/dictionary-database.js484
-rw-r--r--ext/js/language/dictionary-importer.js407
-rw-r--r--ext/js/language/translator.js1397
4 files changed, 2384 insertions, 0 deletions
diff --git a/ext/js/language/deinflector.js b/ext/js/language/deinflector.js
new file mode 100644
index 00000000..8fee3f01
--- /dev/null
+++ b/ext/js/language/deinflector.js
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2016-2021 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+
+class Deinflector {
+ constructor(reasons) {
+ this.reasons = Deinflector.normalizeReasons(reasons);
+ }
+
+ deinflect(source, rawSource) {
+ const results = [{
+ source,
+ rawSource,
+ term: source,
+ rules: 0,
+ reasons: [],
+ databaseDefinitions: []
+ }];
+ for (let i = 0; i < results.length; ++i) {
+ const {rules, term, reasons} = results[i];
+ for (const [reason, variants] of this.reasons) {
+ for (const [kanaIn, kanaOut, rulesIn, rulesOut] of variants) {
+ if (
+ (rules !== 0 && (rules & rulesIn) === 0) ||
+ !term.endsWith(kanaIn) ||
+ (term.length - kanaIn.length + kanaOut.length) <= 0
+ ) {
+ continue;
+ }
+
+ results.push({
+ source,
+ rawSource,
+ term: term.substring(0, term.length - kanaIn.length) + kanaOut,
+ rules: rulesOut,
+ reasons: [reason, ...reasons],
+ databaseDefinitions: []
+ });
+ }
+ }
+ }
+ return results;
+ }
+
+ static normalizeReasons(reasons) {
+ const normalizedReasons = [];
+ for (const [reason, reasonInfo] of Object.entries(reasons)) {
+ const variants = [];
+ for (const {kanaIn, kanaOut, rulesIn, rulesOut} of reasonInfo) {
+ variants.push([
+ kanaIn,
+ kanaOut,
+ Deinflector.rulesToRuleFlags(rulesIn),
+ Deinflector.rulesToRuleFlags(rulesOut)
+ ]);
+ }
+ normalizedReasons.push([reason, variants]);
+ }
+ return normalizedReasons;
+ }
+
+ static rulesToRuleFlags(rules) {
+ const ruleTypes = Deinflector.ruleTypes;
+ let value = 0;
+ for (const rule of rules) {
+ const ruleBits = ruleTypes.get(rule);
+ if (typeof ruleBits === 'undefined') { continue; }
+ value |= ruleBits;
+ }
+ return value;
+ }
+}
+
+Deinflector.ruleTypes = new Map([
+ ['v1', 0b00000001], // Verb ichidan
+ ['v5', 0b00000010], // Verb godan
+ ['vs', 0b00000100], // Verb suru
+ ['vk', 0b00001000], // Verb kuru
+ ['vz', 0b00010000], // Verb zuru
+ ['adj-i', 0b00100000], // Adjective i
+ ['iru', 0b01000000] // Intermediate -iru endings for progressive or perfect tense
+]);
diff --git a/ext/js/language/dictionary-database.js b/ext/js/language/dictionary-database.js
new file mode 100644
index 00000000..b363ed25
--- /dev/null
+++ b/ext/js/language/dictionary-database.js
@@ -0,0 +1,484 @@
+/*
+ * Copyright (C) 2016-2021 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/* global
+ * Database
+ */
+
+class DictionaryDatabase {
+ constructor() {
+ this._db = new Database();
+ this._dbName = 'dict';
+ this._schemas = new Map();
+ }
+
+ // Public
+
+ async prepare() {
+ await this._db.open(
+ this._dbName,
+ 60,
+ [
+ {
+ version: 20,
+ stores: {
+ terms: {
+ primaryKey: {keyPath: 'id', autoIncrement: true},
+ indices: ['dictionary', 'expression', 'reading']
+ },
+ kanji: {
+ primaryKey: {autoIncrement: true},
+ indices: ['dictionary', 'character']
+ },
+ tagMeta: {
+ primaryKey: {autoIncrement: true},
+ indices: ['dictionary']
+ },
+ dictionaries: {
+ primaryKey: {autoIncrement: true},
+ indices: ['title', 'version']
+ }
+ }
+ },
+ {
+ version: 30,
+ stores: {
+ termMeta: {
+ primaryKey: {autoIncrement: true},
+ indices: ['dictionary', 'expression']
+ },
+ kanjiMeta: {
+ primaryKey: {autoIncrement: true},
+ indices: ['dictionary', 'character']
+ },
+ tagMeta: {
+ primaryKey: {autoIncrement: true},
+ indices: ['dictionary', 'name']
+ }
+ }
+ },
+ {
+ version: 40,
+ stores: {
+ terms: {
+ primaryKey: {keyPath: 'id', autoIncrement: true},
+ indices: ['dictionary', 'expression', 'reading', 'sequence']
+ }
+ }
+ },
+ {
+ version: 50,
+ stores: {
+ terms: {
+ primaryKey: {keyPath: 'id', autoIncrement: true},
+ indices: ['dictionary', 'expression', 'reading', 'sequence', 'expressionReverse', 'readingReverse']
+ }
+ }
+ },
+ {
+ version: 60,
+ stores: {
+ media: {
+ primaryKey: {keyPath: 'id', autoIncrement: true},
+ indices: ['dictionary', 'path']
+ }
+ }
+ }
+ ]
+ );
+ }
+
+ async close() {
+ this._db.close();
+ }
+
+ isPrepared() {
+ return this._db.isOpen();
+ }
+
+ async purge() {
+ if (this._db.isOpening()) {
+ throw new Error('Cannot purge database while opening');
+ }
+ if (this._db.isOpen()) {
+ this._db.close();
+ }
+ let result = false;
+ try {
+ await Database.deleteDatabase(this._dbName);
+ result = true;
+ } catch (e) {
+ yomichan.logError(e);
+ }
+ await this.prepare();
+ return result;
+ }
+
+ async deleteDictionary(dictionaryName, progressSettings, onProgress) {
+ const targets = [
+ ['dictionaries', 'title'],
+ ['kanji', 'dictionary'],
+ ['kanjiMeta', 'dictionary'],
+ ['terms', 'dictionary'],
+ ['termMeta', 'dictionary'],
+ ['tagMeta', 'dictionary'],
+ ['media', 'dictionary']
+ ];
+
+ const {rate} = progressSettings;
+ const progressData = {
+ count: 0,
+ processed: 0,
+ storeCount: targets.length,
+ storesProcesed: 0
+ };
+
+ const filterKeys = (keys) => {
+ ++progressData.storesProcesed;
+ progressData.count += keys.length;
+ onProgress(progressData);
+ return keys;
+ };
+ const onProgress2 = () => {
+ const processed = progressData.processed + 1;
+ progressData.processed = processed;
+ if ((processed % rate) === 0 || processed === progressData.count) {
+ onProgress(progressData);
+ }
+ };
+
+ const promises = [];
+ for (const [objectStoreName, indexName] of targets) {
+ const query = IDBKeyRange.only(dictionaryName);
+ const promise = this._db.bulkDelete(objectStoreName, indexName, query, filterKeys, onProgress2);
+ promises.push(promise);
+ }
+ await Promise.all(promises);
+ }
+
+ findTermsBulk(termList, dictionaries, wildcard) {
+ return new Promise((resolve, reject) => {
+ const results = [];
+ const count = termList.length;
+ if (count === 0) {
+ resolve(results);
+ return;
+ }
+
+ const visited = new Set();
+ const useWildcard = !!wildcard;
+ const prefixWildcard = wildcard === 'prefix';
+
+ const transaction = this._db.transaction(['terms'], 'readonly');
+ const terms = transaction.objectStore('terms');
+ const index1 = terms.index(prefixWildcard ? 'expressionReverse' : 'expression');
+ const index2 = terms.index(prefixWildcard ? 'readingReverse' : 'reading');
+
+ const count2 = count * 2;
+ let completeCount = 0;
+ for (let i = 0; i < count; ++i) {
+ const inputIndex = i;
+ const term = prefixWildcard ? stringReverse(termList[i]) : termList[i];
+ const query = useWildcard ? IDBKeyRange.bound(term, `${term}\uffff`, false, false) : IDBKeyRange.only(term);
+
+ const onGetAll = (rows) => {
+ for (const row of rows) {
+ if (dictionaries.has(row.dictionary) && !visited.has(row.id)) {
+ visited.add(row.id);
+ results.push(this._createTerm(row, inputIndex));
+ }
+ }
+ if (++completeCount >= count2) {
+ resolve(results);
+ }
+ };
+
+ this._db.getAll(index1, query, onGetAll, reject);
+ this._db.getAll(index2, query, onGetAll, reject);
+ }
+ });
+ }
+
+ findTermsExactBulk(termList, readingList, dictionaries) {
+ return new Promise((resolve, reject) => {
+ const results = [];
+ const count = termList.length;
+ if (count === 0) {
+ resolve(results);
+ return;
+ }
+
+ const transaction = this._db.transaction(['terms'], 'readonly');
+ const terms = transaction.objectStore('terms');
+ const index = terms.index('expression');
+
+ let completeCount = 0;
+ for (let i = 0; i < count; ++i) {
+ const inputIndex = i;
+ const reading = readingList[i];
+ const query = IDBKeyRange.only(termList[i]);
+
+ const onGetAll = (rows) => {
+ for (const row of rows) {
+ if (row.reading === reading && dictionaries.has(row.dictionary)) {
+ results.push(this._createTerm(row, inputIndex));
+ }
+ }
+ if (++completeCount >= count) {
+ resolve(results);
+ }
+ };
+
+ this._db.getAll(index, query, onGetAll, reject);
+ }
+ });
+ }
+
+ findTermsBySequenceBulk(sequenceList, mainDictionary) {
+ return new Promise((resolve, reject) => {
+ const results = [];
+ const count = sequenceList.length;
+ if (count === 0) {
+ resolve(results);
+ return;
+ }
+
+ const transaction = this._db.transaction(['terms'], 'readonly');
+ const terms = transaction.objectStore('terms');
+ const index = terms.index('sequence');
+
+ let completeCount = 0;
+ for (let i = 0; i < count; ++i) {
+ const inputIndex = i;
+ const query = IDBKeyRange.only(sequenceList[i]);
+
+ const onGetAll = (rows) => {
+ for (const row of rows) {
+ if (row.dictionary === mainDictionary) {
+ results.push(this._createTerm(row, inputIndex));
+ }
+ }
+ if (++completeCount >= count) {
+ resolve(results);
+ }
+ };
+
+ this._db.getAll(index, query, onGetAll, reject);
+ }
+ });
+ }
+
+ findTermMetaBulk(termList, dictionaries) {
+ return this._findGenericBulk('termMeta', 'expression', termList, dictionaries, this._createTermMeta.bind(this));
+ }
+
+ findKanjiBulk(kanjiList, dictionaries) {
+ return this._findGenericBulk('kanji', 'character', kanjiList, dictionaries, this._createKanji.bind(this));
+ }
+
+ findKanjiMetaBulk(kanjiList, dictionaries) {
+ return this._findGenericBulk('kanjiMeta', 'character', kanjiList, dictionaries, this._createKanjiMeta.bind(this));
+ }
+
+ findTagForTitle(name, title) {
+ const query = IDBKeyRange.only(name);
+ return this._db.find('tagMeta', 'name', query, (row) => (row.dictionary === title), null);
+ }
+
+ getMedia(targets) {
+ return new Promise((resolve, reject) => {
+ const count = targets.length;
+ const results = new Array(count).fill(null);
+ if (count === 0) {
+ resolve(results);
+ return;
+ }
+
+ let completeCount = 0;
+ const transaction = this._db.transaction(['media'], 'readonly');
+ const objectStore = transaction.objectStore('media');
+ const index = objectStore.index('path');
+
+ for (let i = 0; i < count; ++i) {
+ const inputIndex = i;
+ const {path, dictionaryName} = targets[i];
+ const query = IDBKeyRange.only(path);
+
+ const onGetAll = (rows) => {
+ for (const row of rows) {
+ if (row.dictionary !== dictionaryName) { continue; }
+ results[inputIndex] = this._createMedia(row, inputIndex);
+ }
+ if (++completeCount >= count) {
+ resolve(results);
+ }
+ };
+
+ this._db.getAll(index, query, onGetAll, reject);
+ }
+ });
+ }
+
+ getDictionaryInfo() {
+ return new Promise((resolve, reject) => {
+ const transaction = this._db.transaction(['dictionaries'], 'readonly');
+ const objectStore = transaction.objectStore('dictionaries');
+ this._db.getAll(objectStore, null, resolve, reject);
+ });
+ }
+
+ getDictionaryCounts(dictionaryNames, getTotal) {
+ return new Promise((resolve, reject) => {
+ const targets = [
+ ['kanji', 'dictionary'],
+ ['kanjiMeta', 'dictionary'],
+ ['terms', 'dictionary'],
+ ['termMeta', 'dictionary'],
+ ['tagMeta', 'dictionary'],
+ ['media', 'dictionary']
+ ];
+ const objectStoreNames = targets.map(([objectStoreName]) => objectStoreName);
+ const transaction = this._db.transaction(objectStoreNames, 'readonly');
+ const databaseTargets = targets.map(([objectStoreName, indexName]) => {
+ const objectStore = transaction.objectStore(objectStoreName);
+ const index = objectStore.index(indexName);
+ return {objectStore, index};
+ });
+
+ const countTargets = [];
+ if (getTotal) {
+ for (const {objectStore} of databaseTargets) {
+ countTargets.push([objectStore, null]);
+ }
+ }
+ for (const dictionaryName of dictionaryNames) {
+ const query = IDBKeyRange.only(dictionaryName);
+ for (const {index} of databaseTargets) {
+ countTargets.push([index, query]);
+ }
+ }
+
+ const onCountComplete = (results) => {
+ const resultCount = results.length;
+ const targetCount = targets.length;
+ const counts = [];
+ for (let i = 0; i < resultCount; i += targetCount) {
+ const countGroup = {};
+ for (let j = 0; j < targetCount; ++j) {
+ countGroup[targets[j][0]] = results[i + j];
+ }
+ counts.push(countGroup);
+ }
+ const total = getTotal ? counts.shift() : null;
+ resolve({total, counts});
+ };
+
+ this._db.bulkCount(countTargets, onCountComplete, reject);
+ });
+ }
+
+ async dictionaryExists(title) {
+ const query = IDBKeyRange.only(title);
+ const result = await this._db.find('dictionaries', 'title', query);
+ return typeof result !== 'undefined';
+ }
+
+ bulkAdd(objectStoreName, items, start, count) {
+ return this._db.bulkAdd(objectStoreName, items, start, count);
+ }
+
+ // Private
+
+ async _findGenericBulk(objectStoreName, indexName, indexValueList, dictionaries, createResult) {
+ return new Promise((resolve, reject) => {
+ const results = [];
+ const count = indexValueList.length;
+ if (count === 0) {
+ resolve(results);
+ return;
+ }
+
+ const transaction = this._db.transaction([objectStoreName], 'readonly');
+ const terms = transaction.objectStore(objectStoreName);
+ const index = terms.index(indexName);
+
+ let completeCount = 0;
+ for (let i = 0; i < count; ++i) {
+ const inputIndex = i;
+ const query = IDBKeyRange.only(indexValueList[i]);
+
+ const onGetAll = (rows) => {
+ for (const row of rows) {
+ if (dictionaries.has(row.dictionary)) {
+ results.push(createResult(row, inputIndex));
+ }
+ }
+ if (++completeCount >= count) {
+ resolve(results);
+ }
+ };
+
+ this._db.getAll(index, query, onGetAll, reject);
+ }
+ });
+ }
+
+ _createTerm(row, index) {
+ return {
+ index,
+ expression: row.expression,
+ reading: row.reading,
+ definitionTags: this._splitField(row.definitionTags || row.tags || ''),
+ termTags: this._splitField(row.termTags || ''),
+ rules: this._splitField(row.rules),
+ glossary: row.glossary,
+ score: row.score,
+ dictionary: row.dictionary,
+ id: row.id,
+ sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
+ };
+ }
+
+ _createKanji(row, index) {
+ return {
+ index,
+ character: row.character,
+ onyomi: this._splitField(row.onyomi),
+ kunyomi: this._splitField(row.kunyomi),
+ tags: this._splitField(row.tags),
+ glossary: row.meanings,
+ stats: row.stats,
+ dictionary: row.dictionary
+ };
+ }
+
+ _createTermMeta({expression, mode, data, dictionary}, index) {
+ return {expression, mode, data, dictionary, index};
+ }
+
+ _createKanjiMeta({character, mode, data, dictionary}, index) {
+ return {character, mode, data, dictionary, index};
+ }
+
+ _createMedia(row, index) {
+ return Object.assign({}, row, {index});
+ }
+
+ _splitField(field) {
+ return field.length === 0 ? [] : field.split(' ');
+ }
+}
diff --git a/ext/js/language/dictionary-importer.js b/ext/js/language/dictionary-importer.js
new file mode 100644
index 00000000..4cb608db
--- /dev/null
+++ b/ext/js/language/dictionary-importer.js
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2020-2021 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/* global
+ * JSZip
+ * JsonSchemaValidator
+ * MediaUtility
+ */
+
+class DictionaryImporter {
+ constructor() {
+ this._schemas = new Map();
+ this._jsonSchemaValidator = new JsonSchemaValidator();
+ this._mediaUtility = new MediaUtility();
+ }
+
+ async importDictionary(dictionaryDatabase, archiveSource, details, onProgress) {
+ if (!dictionaryDatabase) {
+ throw new Error('Invalid database');
+ }
+ if (!dictionaryDatabase.isPrepared()) {
+ throw new Error('Database is not ready');
+ }
+
+ const hasOnProgress = (typeof onProgress === 'function');
+
+ // Read archive
+ const archive = await JSZip.loadAsync(archiveSource);
+
+ // Read and validate index
+ const indexFileName = 'index.json';
+ const indexFile = archive.files[indexFileName];
+ if (!indexFile) {
+ throw new Error('No dictionary index found in archive');
+ }
+
+ const index = JSON.parse(await indexFile.async('string'));
+
+ const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json');
+ this._validateJsonSchema(index, indexSchema, indexFileName);
+
+ const dictionaryTitle = index.title;
+ const version = index.format || index.version;
+
+ if (!dictionaryTitle || !index.revision) {
+ throw new Error('Unrecognized dictionary format');
+ }
+
+ // Verify database is not already imported
+ if (await dictionaryDatabase.dictionaryExists(dictionaryTitle)) {
+ throw new Error('Dictionary is already imported');
+ }
+
+ // Data format converters
+ const convertTermBankEntry = (entry) => {
+ if (version === 1) {
+ const [expression, reading, definitionTags, rules, score, ...glossary] = entry;
+ return {expression, reading, definitionTags, rules, score, glossary};
+ } else {
+ const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry;
+ return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags};
+ }
+ };
+
+ const convertTermMetaBankEntry = (entry) => {
+ const [expression, mode, data] = entry;
+ return {expression, mode, data};
+ };
+
+ const convertKanjiBankEntry = (entry) => {
+ if (version === 1) {
+ const [character, onyomi, kunyomi, tags, ...meanings] = entry;
+ return {character, onyomi, kunyomi, tags, meanings};
+ } else {
+ const [character, onyomi, kunyomi, tags, meanings, stats] = entry;
+ return {character, onyomi, kunyomi, tags, meanings, stats};
+ }
+ };
+
+ const convertKanjiMetaBankEntry = (entry) => {
+ const [character, mode, data] = entry;
+ return {character, mode, data};
+ };
+
+ const convertTagBankEntry = (entry) => {
+ const [name, category, order, notes, score] = entry;
+ return {name, category, order, notes, score};
+ };
+
+ // Archive file reading
+ const readFileSequence = async (fileNameFormat, convertEntry, schema) => {
+ const results = [];
+ for (let i = 1; true; ++i) {
+ const fileName = fileNameFormat.replace(/\?/, `${i}`);
+ const file = archive.files[fileName];
+ if (!file) { break; }
+
+ const entries = JSON.parse(await file.async('string'));
+ this._validateJsonSchema(entries, schema, fileName);
+
+ for (let entry of entries) {
+ entry = convertEntry(entry);
+ entry.dictionary = dictionaryTitle;
+ results.push(entry);
+ }
+ }
+ return results;
+ };
+
+ // Load schemas
+ const dataBankSchemaPaths = this._getDataBankSchemaPaths(version);
+ const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path)));
+
+ // Load data
+ const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]);
+ const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]);
+ const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]);
+ const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]);
+ const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]);
+
+ // Old tags
+ const indexTagMeta = index.tagMeta;
+ if (typeof indexTagMeta === 'object' && indexTagMeta !== null) {
+ for (const name of Object.keys(indexTagMeta)) {
+ const {category, order, notes, score} = indexTagMeta[name];
+ tagList.push({name, category, order, notes, score});
+ }
+ }
+
+ // Prefix wildcard support
+ const prefixWildcardsSupported = !!details.prefixWildcardsSupported;
+ if (prefixWildcardsSupported) {
+ for (const entry of termList) {
+ entry.expressionReverse = stringReverse(entry.expression);
+ entry.readingReverse = stringReverse(entry.reading);
+ }
+ }
+
+ // Extended data support
+ const extendedDataContext = {
+ archive,
+ media: new Map()
+ };
+ for (const entry of termList) {
+ const glossaryList = entry.glossary;
+ for (let i = 0, ii = glossaryList.length; i < ii; ++i) {
+ const glossary = glossaryList[i];
+ if (typeof glossary !== 'object' || glossary === null) { continue; }
+ glossaryList[i] = await this._formatDictionaryTermGlossaryObject(glossary, extendedDataContext, entry);
+ }
+ }
+
+ const media = [...extendedDataContext.media.values()];
+
+ // Add dictionary
+ const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported});
+
+ dictionaryDatabase.bulkAdd('dictionaries', [summary], 0, 1);
+
+ // Add data
+ const errors = [];
+ const total = (
+ termList.length +
+ termMetaList.length +
+ kanjiList.length +
+ kanjiMetaList.length +
+ tagList.length
+ );
+ let loadedCount = 0;
+ const maxTransactionLength = 1000;
+
+ const bulkAdd = async (objectStoreName, entries) => {
+ const ii = entries.length;
+ for (let i = 0; i < ii; i += maxTransactionLength) {
+ const count = Math.min(maxTransactionLength, ii - i);
+
+ try {
+ await dictionaryDatabase.bulkAdd(objectStoreName, entries, i, count);
+ } catch (e) {
+ errors.push(e);
+ }
+
+ loadedCount += count;
+ if (hasOnProgress) {
+ onProgress(total, loadedCount);
+ }
+ }
+ };
+
+ await bulkAdd('terms', termList);
+ await bulkAdd('termMeta', termMetaList);
+ await bulkAdd('kanji', kanjiList);
+ await bulkAdd('kanjiMeta', kanjiMetaList);
+ await bulkAdd('tagMeta', tagList);
+ await bulkAdd('media', media);
+
+ return {result: summary, errors};
+ }
+
+ _createSummary(dictionaryTitle, version, index, details) {
+ const summary = {
+ title: dictionaryTitle,
+ revision: index.revision,
+ sequenced: index.sequenced,
+ version
+ };
+
+ const {author, url, description, attribution} = index;
+ if (typeof author === 'string') { summary.author = author; }
+ if (typeof url === 'string') { summary.url = url; }
+ if (typeof description === 'string') { summary.description = description; }
+ if (typeof attribution === 'string') { summary.attribution = attribution; }
+
+ Object.assign(summary, details);
+
+ return summary;
+ }
+
+ async _getSchema(fileName) {
+ let schemaPromise = this._schemas.get(fileName);
+ if (typeof schemaPromise !== 'undefined') {
+ return schemaPromise;
+ }
+
+ schemaPromise = this._fetchJsonAsset(fileName);
+ this._schemas.set(fileName, schemaPromise);
+ return schemaPromise;
+ }
+
+ _validateJsonSchema(value, schema, fileName) {
+ try {
+ this._jsonSchemaValidator.validate(value, schema);
+ } catch (e) {
+ throw this._formatSchemaError(e, fileName);
+ }
+ }
+
+ _formatSchemaError(e, fileName) {
+ const valuePathString = this._getSchemaErrorPathString(e.info.valuePath, 'dictionary');
+ const schemaPathString = this._getSchemaErrorPathString(e.info.schemaPath, 'schema');
+
+ const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`);
+ e2.data = e;
+
+ return e2;
+ }
+
+ _getSchemaErrorPathString(infoList, base='') {
+ let result = base;
+ for (const [part] of infoList) {
+ switch (typeof part) {
+ case 'string':
+ if (result.length > 0) {
+ result += '.';
+ }
+ result += part;
+ break;
+ case 'number':
+ result += `[${part}]`;
+ break;
+ }
+ }
+ return result;
+ }
+
+ _getDataBankSchemaPaths(version) {
+ const termBank = (
+ version === 1 ?
+ '/data/schemas/dictionary-term-bank-v1-schema.json' :
+ '/data/schemas/dictionary-term-bank-v3-schema.json'
+ );
+ const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json';
+ const kanjiBank = (
+ version === 1 ?
+ '/data/schemas/dictionary-kanji-bank-v1-schema.json' :
+ '/data/schemas/dictionary-kanji-bank-v3-schema.json'
+ );
+ const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json';
+ const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json';
+
+ return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
+ }
+
+ async _formatDictionaryTermGlossaryObject(data, context, entry) {
+ switch (data.type) {
+ case 'text':
+ return data.text;
+ case 'image':
+ return await this._formatDictionaryTermGlossaryImage(data, context, entry);
+ default:
+ throw new Error(`Unhandled data type: ${data.type}`);
+ }
+ }
+
+ async _formatDictionaryTermGlossaryImage(data, context, entry) {
+ const dictionary = entry.dictionary;
+ const {path, width: preferredWidth, height: preferredHeight, title, description, pixelated} = data;
+ if (context.media.has(path)) {
+ // Already exists
+ return data;
+ }
+
+ let errorSource = entry.expression;
+ if (entry.reading.length > 0) {
+ errorSource += ` (${entry.reading});`;
+ }
+
+ const file = context.archive.file(path);
+ if (file === null) {
+ throw new Error(`Could not find image at path ${JSON.stringify(path)} for ${errorSource}`);
+ }
+
+ const content = await file.async('base64');
+ const mediaType = this._mediaUtility.getImageMediaTypeFromFileName(path);
+ if (mediaType === null) {
+ throw new Error(`Could not determine media type for image at path ${JSON.stringify(path)} for ${errorSource}`);
+ }
+
+ let image;
+ try {
+ image = await this._loadImageBase64(mediaType, content);
+ } catch (e) {
+ throw new Error(`Could not load image at path ${JSON.stringify(path)} for ${errorSource}`);
+ }
+
+ const width = image.naturalWidth;
+ const height = image.naturalHeight;
+
+ // Create image data
+ const mediaData = {
+ dictionary,
+ path,
+ mediaType,
+ width,
+ height,
+ content
+ };
+ context.media.set(path, mediaData);
+
+ // Create new data
+ const newData = {
+ type: 'image',
+ path,
+ width,
+ height
+ };
+ if (typeof preferredWidth === 'number') { newData.preferredWidth = preferredWidth; }
+ if (typeof preferredHeight === 'number') { newData.preferredHeight = preferredHeight; }
+ if (typeof title === 'string') { newData.title = title; }
+ if (typeof description === 'string') { newData.description = description; }
+ if (typeof pixelated === 'boolean') { newData.pixelated = pixelated; }
+
+ return newData;
+ }
+
+ async _fetchJsonAsset(url) {
+ const response = await fetch(chrome.runtime.getURL(url), {
+ method: 'GET',
+ mode: 'no-cors',
+ cache: 'default',
+ credentials: 'omit',
+ redirect: 'follow',
+ referrerPolicy: 'no-referrer'
+ });
+ if (!response.ok) {
+ throw new Error(`Failed to fetch ${url}: ${response.status}`);
+ }
+ return await response.json();
+ }
+
+ /**
+ * Attempts to load an image using a base64 encoded content and a media type.
+ * @param mediaType The media type for the image content.
+ * @param content The binary content for the image, encoded in base64.
+ * @returns A Promise which resolves with an HTMLImageElement instance on
+ * successful load, otherwise an error is thrown.
+ */
+ _loadImageBase64(mediaType, content) {
+ return new Promise((resolve, reject) => {
+ const image = new Image();
+ const eventListeners = new EventListenerCollection();
+ eventListeners.addEventListener(image, 'load', () => {
+ eventListeners.removeAllEventListeners();
+ resolve(image);
+ }, false);
+ eventListeners.addEventListener(image, 'error', () => {
+ eventListeners.removeAllEventListeners();
+ reject(new Error('Image failed to load'));
+ }, false);
+ image.src = `data:${mediaType};base64,${content}`;
+ });
+ }
+}
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
new file mode 100644
index 00000000..729c8294
--- /dev/null
+++ b/ext/js/language/translator.js
@@ -0,0 +1,1397 @@
+/*
+ * Copyright (C) 2016-2021 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/* global
+ * Deinflector
+ * TextSourceMap
+ */
+
+/**
+ * Class which finds term and kanji definitions for text.
+ */
+class Translator {
+ /**
+ * Creates a new Translator instance.
+ * @param database An instance of DictionaryDatabase.
+ */
+ constructor({japaneseUtil, database}) {
+ this._japaneseUtil = japaneseUtil;
+ this._database = database;
+ this._deinflector = null;
+ this._tagCache = new Map();
+ this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
+ }
+
+ /**
+ * Initializes the instance for use. The public API should not be used until
+ * this function has been called.
+ * @param deinflectionReasons The raw deinflections reasons data that the Deinflector uses.
+ */
+ prepare(deinflectionReasons) {
+ this._deinflector = new Deinflector(deinflectionReasons);
+ }
+
+ /**
+ * Clears the database tag cache. This should be executed if the database is changed.
+ */
+ clearDatabaseCaches() {
+ this._tagCache.clear();
+ }
+
+ /**
+ * Finds term definitions for the given text.
+ * @param mode The mode to use for finding terms, which determines the format of the resulting array.
+ * One of: 'group', 'merge', 'split', 'simple'
+ * @param text The text to find terms for.
+ * @param options An object using the following structure:
+ * {
+ * wildcard: (enum: null, 'prefix', 'suffix'),
+ * mainDictionary: (string),
+ * alphanumeric: (boolean),
+ * convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'),
+ * convertNumericCharacters: (enum: 'false', 'true', 'variant'),
+ * convertAlphabeticCharacters: (enum: 'false', 'true', 'variant'),
+ * convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'),
+ * convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'),
+ * collapseEmphaticSequences: (enum: 'false', 'true', 'full'),
+ * textReplacements: [
+ * (null or [
+ * {pattern: (RegExp), replacement: (string)}
+ * ...
+ * ])
+ * ...
+ * ],
+ * enabledDictionaryMap: (Map of [
+ * (string),
+ * {
+ * priority: (number),
+ * allowSecondarySearches: (boolean)
+ * }
+ * ])
+ * }
+ * @returns An array of [definitions, textLength]. The structure of each definition depends on the
+ * mode parameter, see the _create?TermDefinition?() functions for structure details.
+ */
+ async findTerms(mode, text, options) {
+ switch (mode) {
+ case 'group':
+ return await this._findTermsGrouped(text, options);
+ case 'merge':
+ return await this._findTermsMerged(text, options);
+ case 'split':
+ return await this._findTermsSplit(text, options);
+ case 'simple':
+ return await this._findTermsSimple(text, options);
+ default:
+ return [[], 0];
+ }
+ }
+
+ /**
+ * Finds kanji definitions for the given text.
+ * @param text The text to find kanji definitions for. This string can be of any length,
+ * but is typically just one character, which is a single kanji. If the string is multiple
+ * characters long, each character will be searched in the database.
+ * @param options An object using the following structure:
+ * {
+ * enabledDictionaryMap: (Map of [
+ * (string),
+ * {
+ * priority: (number)
+ * }
+ * ])
+ * }
+ * @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
+ */
+ async findKanji(text, options) {
+ const {enabledDictionaryMap} = options;
+ const kanjiUnique = new Set();
+ for (const c of text) {
+ kanjiUnique.add(c);
+ }
+
+ const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap);
+ if (databaseDefinitions.length === 0) { return []; }
+
+ this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
+
+ const definitions = [];
+ for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
+ const expandedStats = await this._expandStats(stats, dictionary);
+ const expandedTags = await this._expandTags(tags, dictionary);
+ this._sortTags(expandedTags);
+
+ const definition = this._createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, expandedTags, expandedStats);
+ definitions.push(definition);
+ }
+
+ await this._buildKanjiMeta(definitions, enabledDictionaryMap);
+
+ return definitions;
+ }
+
+ // Find terms core functions
+
+ async _findTermsSimple(text, options) {
+ const {enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
+ this._sortDefinitions(definitions, false);
+ return [definitions, length];
+ }
+
+ async _findTermsSplit(text, options) {
+ const {enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
+ await this._buildTermMeta(definitions, enabledDictionaryMap);
+ this._sortDefinitions(definitions, true);
+ return [definitions, length];
+ }
+
+ async _findTermsGrouped(text, options) {
+ const {enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
+
+ const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
+ await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
+ this._sortDefinitions(groupedDefinitions, false);
+
+ for (const definition of groupedDefinitions) {
+ this._flagRedundantDefinitionTags(definition.definitions);
+ }
+
+ return [groupedDefinitions, length];
+ }
+
+ async _findTermsMerged(text, options) {
+ const {mainDictionary, enabledDictionaryMap} = options;
+ const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
+
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
+ const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
+ const definitionsMerged = [];
+ const usedDefinitions = new Set();
+
+ for (const {sourceDefinitions, relatedDefinitions} of sequencedDefinitions) {
+ const result = await this._getMergedDefinition(
+ sourceDefinitions,
+ relatedDefinitions,
+ unsequencedDefinitions,
+ secondarySearchDictionaryMap,
+ usedDefinitions
+ );
+ definitionsMerged.push(result);
+ }
+
+ const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
+ for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
+ const {reasons, score, expression, reading, source, rawSource, sourceTerm, furiganaSegments, termTags, definitions: definitions2} = groupedDefinition;
+ const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
+ const compatibilityDefinition = this._createMergedTermDefinition(
+ source,
+ rawSource,
+ this._convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions2),
+ [expression],
+ [reading],
+ termDetailsList,
+ reasons,
+ score
+ );
+ definitionsMerged.push(compatibilityDefinition);
+ }
+
+ await this._buildTermMeta(definitionsMerged, enabledDictionaryMap);
+ this._sortDefinitions(definitionsMerged, false);
+
+ for (const definition of definitionsMerged) {
+ this._flagRedundantDefinitionTags(definition.definitions);
+ }
+
+ return [definitionsMerged, length];
+ }
+
+ // Find terms internal implementation
+
+ async _findTermsInternal(text, enabledDictionaryMap, options) {
+ const {alphanumeric, wildcard} = options;
+ text = this._getSearchableText(text, alphanumeric);
+ if (text.length === 0) {
+ return [[], 0];
+ }
+
+ const deinflections = (
+ wildcard ?
+ await this._findTermWildcard(text, enabledDictionaryMap, wildcard) :
+ await this._findTermDeinflections(text, enabledDictionaryMap, options)
+ );
+
+ let maxLength = 0;
+ const definitions = [];
+ for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) {
+ if (databaseDefinitions.length === 0) { continue; }
+ maxLength = Math.max(maxLength, rawSource.length);
+ for (const databaseDefinition of databaseDefinitions) {
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, enabledDictionaryMap);
+ definitions.push(definition);
+ }
+ }
+
+ this._removeDuplicateDefinitions(definitions);
+ return [definitions, maxLength];
+ }
+
+ async _findTermWildcard(text, enabledDictionaryMap, wildcard) {
+ const databaseDefinitions = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard);
+ if (databaseDefinitions.length === 0) {
+ return [];
+ }
+
+ return [{
+ source: text,
+ rawSource: text,
+ term: text,
+ rules: 0,
+ reasons: [],
+ databaseDefinitions
+ }];
+ }
+
+ async _findTermDeinflections(text, enabledDictionaryMap, options) {
+ const deinflections = this._getAllDeinflections(text, options);
+
+ if (deinflections.length === 0) {
+ return [];
+ }
+
+ const uniqueDeinflectionTerms = [];
+ const uniqueDeinflectionArrays = [];
+ const uniqueDeinflectionsMap = new Map();
+ for (const deinflection of deinflections) {
+ const term = deinflection.term;
+ let deinflectionArray = uniqueDeinflectionsMap.get(term);
+ if (typeof deinflectionArray === 'undefined') {
+ deinflectionArray = [];
+ uniqueDeinflectionTerms.push(term);
+ uniqueDeinflectionArrays.push(deinflectionArray);
+ uniqueDeinflectionsMap.set(term, deinflectionArray);
+ }
+ deinflectionArray.push(deinflection);
+ }
+
+ const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null);
+
+ for (const databaseDefinition of databaseDefinitions) {
+ const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
+ for (const deinflection of uniqueDeinflectionArrays[databaseDefinition.index]) {
+ const deinflectionRules = deinflection.rules;
+ if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
+ deinflection.databaseDefinitions.push(databaseDefinition);
+ }
+ }
+ }
+
+ return deinflections;
+ }
+
+ _getAllDeinflections(text, options) {
+ const textOptionVariantArray = [
+ this._getTextReplacementsVariants(options),
+ this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
+ this._getTextOptionEntryVariants(options.convertNumericCharacters),
+ this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
+ this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
+ this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
+ this._getCollapseEmphaticOptions(options)
+ ];
+
+ const jp = this._japaneseUtil;
+ const deinflections = [];
+ const used = new Set();
+ for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
+ let text2 = text;
+ const sourceMap = new TextSourceMap(text2);
+ if (textReplacements !== null) {
+ text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
+ }
+ if (halfWidth) {
+ text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
+ }
+ if (numeric) {
+ text2 = jp.convertNumericToFullWidth(text2);
+ }
+ if (alphabetic) {
+ text2 = jp.convertAlphabeticToKana(text2, sourceMap);
+ }
+ if (katakana) {
+ text2 = jp.convertHiraganaToKatakana(text2);
+ }
+ if (hiragana) {
+ text2 = jp.convertKatakanaToHiragana(text2);
+ }
+ if (collapseEmphatic) {
+ text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
+ }
+
+ for (let i = text2.length; i > 0; --i) {
+ const text2Substring = text2.substring(0, i);
+ if (used.has(text2Substring)) { break; }
+ used.add(text2Substring);
+ const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
+ for (const deinflection of this._deinflector.deinflect(text2Substring, rawSource)) {
+ deinflections.push(deinflection);
+ }
+ }
+ }
+ return deinflections;
+ }
+
+ async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
+ const sequenceList = [];
+ const sequencedDefinitionMap = new Map();
+ const sequencedDefinitions = [];
+ const unsequencedDefinitions = [];
+ for (const definition of definitions) {
+ const {sequence, dictionary} = definition;
+ if (mainDictionary === dictionary && sequence >= 0) {
+ let sequencedDefinition = sequencedDefinitionMap.get(sequence);
+ if (typeof sequencedDefinition === 'undefined') {
+ sequencedDefinition = {
+ sourceDefinitions: [],
+ relatedDefinitions: [],
+ relatedDefinitionIds: new Set()
+ };
+ sequencedDefinitionMap.set(sequence, sequencedDefinition);
+ sequencedDefinitions.push(sequencedDefinition);
+ sequenceList.push(sequence);
+ }
+ sequencedDefinition.sourceDefinitions.push(definition);
+ sequencedDefinition.relatedDefinitions.push(definition);
+ sequencedDefinition.relatedDefinitionIds.add(definition.id);
+ } else {
+ unsequencedDefinitions.push(definition);
+ }
+ }
+
+ if (sequenceList.length > 0) {
+ const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
+ for (const databaseDefinition of databaseDefinitions) {
+ const {relatedDefinitions, relatedDefinitionIds} = sequencedDefinitions[databaseDefinition.index];
+ const {id} = databaseDefinition;
+ if (relatedDefinitionIds.has(id)) { continue; }
+
+ const {source, rawSource, sourceTerm} = relatedDefinitions[0];
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, [], enabledDictionaryMap);
+ relatedDefinitions.push(definition);
+ }
+ }
+
+ for (const {relatedDefinitions} of sequencedDefinitions) {
+ this._sortDefinitionsById(relatedDefinitions);
+ }
+
+ return {sequencedDefinitions, unsequencedDefinitions};
+ }
+
+ async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
+ if (secondarySearchDictionaryMap.size === 0) {
+ return [];
+ }
+
+ const expressionList = [];
+ const readingList = [];
+ for (const [expression, readingMap] of expressionsMap.entries()) {
+ for (const reading of readingMap.keys()) {
+ expressionList.push(expression);
+ readingList.push(reading);
+ }
+ }
+
+ const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
+ this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
+
+ const definitions = [];
+ for (const databaseDefinition of databaseDefinitions) {
+ const source = expressionList[databaseDefinition.index];
+ const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap);
+ definitions.push(definition);
+ }
+
+ return definitions;
+ }
+
+ async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
+ const {reasons, source, rawSource} = sourceDefinitions[0];
+ const score = this._getMaxDefinitionScore(sourceDefinitions);
+ const termInfoMap = new Map();
+ const glossaryDefinitions = [];
+ const glossaryDefinitionGroupMap = new Map();
+
+ this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
+ this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
+
+ let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
+ secondaryDefinitions = [...unsequencedDefinitions, ...secondaryDefinitions];
+
+ this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
+ this._removeDuplicateDefinitions(secondaryDefinitions);
+
+ this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
+
+ const allExpressions = new Set();
+ const allReadings = new Set();
+ for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
+ for (const expression of expressions) { allExpressions.add(expression); }
+ for (const reading of readings) { allReadings.add(reading); }
+ }
+
+ for (const {expressions, readings, definitions} of glossaryDefinitionGroupMap.values()) {
+ const glossaryDefinition = this._createMergedGlossaryTermDefinition(
+ source,
+ rawSource,
+ definitions,
+ expressions,
+ readings,
+ allExpressions,
+ allReadings
+ );
+ glossaryDefinitions.push(glossaryDefinition);
+ }
+
+ this._sortDefinitions(glossaryDefinitions, true);
+
+ const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
+
+ return this._createMergedTermDefinition(
+ source,
+ rawSource,
+ glossaryDefinitions,
+ [...allExpressions],
+ [...allReadings],
+ termDetailsList,
+ reasons,
+ score
+ );
+ }
+
+ _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
+ for (let i = 0, ii = definitions.length; i < ii; ++i) {
+ const definition = definitions[i];
+ const {expression, reading} = definition;
+ const expressionMap = termInfoMap.get(expression);
+ if (
+ typeof expressionMap !== 'undefined' &&
+ typeof expressionMap.get(reading) !== 'undefined'
+ ) {
+ usedDefinitions.add(definition);
+ } else {
+ definitions.splice(i, 1);
+ --i;
+ --ii;
+ }
+ }
+ }
+
+ _getUniqueDefinitionTags(definitions) {
+ const definitionTagsMap = new Map();
+ for (const {definitionTags} of definitions) {
+ for (const tag of definitionTags) {
+ const {name} = tag;
+ if (definitionTagsMap.has(name)) { continue; }
+ definitionTagsMap.set(name, this._cloneTag(tag));
+ }
+ }
+ return [...definitionTagsMap.values()];
+ }
+
+ _removeDuplicateDefinitions(definitions) {
+ const definitionGroups = new Map();
+ for (let i = 0, ii = definitions.length; i < ii; ++i) {
+ const definition = definitions[i];
+ const {id} = definition;
+ const existing = definitionGroups.get(id);
+ if (typeof existing === 'undefined') {
+ definitionGroups.set(id, [i, definition]);
+ continue;
+ }
+
+ let removeIndex = i;
+ if (definition.source.length > existing[1].source.length) {
+ definitionGroups.set(id, [i, definition]);
+ removeIndex = existing[0];
+ }
+
+ definitions.splice(removeIndex, 1);
+ --i;
+ --ii;
+ }
+ }
+
+ _flagRedundantDefinitionTags(definitions) {
+ let lastDictionary = null;
+ let lastPartOfSpeech = '';
+ const removeCategoriesSet = new Set();
+
+ for (const {dictionary, definitionTags} of definitions) {
+ const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
+
+ if (lastDictionary !== dictionary) {
+ lastDictionary = dictionary;
+ lastPartOfSpeech = '';
+ }
+
+ if (lastPartOfSpeech === partOfSpeech) {
+ removeCategoriesSet.add('partOfSpeech');
+ } else {
+ lastPartOfSpeech = partOfSpeech;
+ }
+
+ if (removeCategoriesSet.size > 0) {
+ this._flagTagsWithCategoryAsRedundant(definitionTags, removeCategoriesSet);
+ removeCategoriesSet.clear();
+ }
+ }
+ }
+
+ _groupTerms(definitions) {
+ const groups = new Map();
+ for (const definition of definitions) {
+ const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]);
+ let groupDefinitions = groups.get(key);
+ if (typeof groupDefinitions === 'undefined') {
+ groupDefinitions = [];
+ groups.set(key, groupDefinitions);
+ }
+
+ groupDefinitions.push(definition);
+ }
+
+ const results = [];
+ for (const groupDefinitions of groups.values()) {
+ this._sortDefinitions(groupDefinitions, true);
+ const definition = this._createGroupedTermDefinition(groupDefinitions);
+ results.push(definition);
+ }
+
+ return results;
+ }
+
+ _mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
+ for (const definition of definitions) {
+ const {expression, reading, dictionary, glossary, id} = definition;
+
+ const key = this._createMapKey([dictionary, ...glossary]);
+ let group = glossaryDefinitionGroupMap.get(key);
+ if (typeof group === 'undefined') {
+ group = {
+ expressions: new Set(),
+ readings: new Set(),
+ definitions: [],
+ definitionIds: new Set()
+ };
+ glossaryDefinitionGroupMap.set(key, group);
+ }
+
+ const {definitionIds} = group;
+ if (definitionIds.has(id)) { continue; }
+ definitionIds.add(id);
+ group.expressions.add(expression);
+ group.readings.add(reading);
+ group.definitions.push(definition);
+ }
+ }
+
+ _addUniqueTermInfos(definitions, termInfoMap) {
+ for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
+ let readingMap = termInfoMap.get(expression);
+ if (typeof readingMap === 'undefined') {
+ readingMap = new Map();
+ termInfoMap.set(expression, readingMap);
+ }
+
+ let termInfo = readingMap.get(reading);
+ if (typeof termInfo === 'undefined') {
+ termInfo = {
+ sourceTerm,
+ furiganaSegments,
+ termTagsMap: new Map()
+ };
+ readingMap.set(reading, termInfo);
+ }
+
+ const {termTagsMap} = termInfo;
+ for (const tag of termTags) {
+ const {name} = tag;
+ if (termTagsMap.has(name)) { continue; }
+ termTagsMap.set(name, this._cloneTag(tag));
+ }
+ }
+ }
+
+ _convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions) {
+ const convertedDefinitions = [];
+ for (const definition of definitions) {
+ const {source, rawSource, expression, reading} = definition;
+ const expressions = new Set([expression]);
+ const readings = new Set([reading]);
+ const convertedDefinition = this._createMergedGlossaryTermDefinition(source, rawSource, [definition], expressions, readings, expressions, readings);
+ convertedDefinitions.push(convertedDefinition);
+ }
+ return convertedDefinitions;
+ }
+
+ // Metadata building
+
+ async _buildTermMeta(definitions, enabledDictionaryMap) {
+ const addMetadataTargetInfo = (targetMap1, target, parents) => {
+ let {expression, reading} = target;
+ if (!reading) { reading = expression; }
+
+ let targetMap2 = targetMap1.get(expression);
+ if (typeof targetMap2 === 'undefined') {
+ targetMap2 = new Map();
+ targetMap1.set(expression, targetMap2);
+ }
+
+ let targets = targetMap2.get(reading);
+ if (typeof targets === 'undefined') {
+ targets = new Set([target, ...parents]);
+ targetMap2.set(reading, targets);
+ } else {
+ targets.add(target);
+ for (const parent of parents) {
+ targets.add(parent);
+ }
+ }
+ };
+
+ const targetMap = new Map();
+ const definitionsQueue = definitions.map((definition) => ({definition, parents: []}));
+ while (definitionsQueue.length > 0) {
+ const {definition, parents} = definitionsQueue.shift();
+ const childDefinitions = definition.definitions;
+ if (Array.isArray(childDefinitions)) {
+ for (const definition2 of childDefinitions) {
+ definitionsQueue.push({definition: definition2, parents: [...parents, definition]});
+ }
+ } else {
+ addMetadataTargetInfo(targetMap, definition, parents);
+ }
+
+ for (const target of definition.expressions) {
+ addMetadataTargetInfo(targetMap, target, []);
+ }
+ }
+ const targetMapEntries = [...targetMap.entries()];
+ const uniqueExpressions = targetMapEntries.map(([expression]) => expression);
+
+ const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap);
+ for (const {expression, mode, data, dictionary, index} of metas) {
+ const targetMap2 = targetMapEntries[index][1];
+ for (const [reading, targets] of targetMap2) {
+ switch (mode) {
+ case 'freq':
+ {
+ const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data);
+ if (frequencyData === null) { continue; }
+ for (const {frequencies} of targets) { frequencies.push(frequencyData); }
+ }
+ break;
+ case 'pitch':
+ {
+ const pitchData = await this._getPitchData(expression, reading, dictionary, data);
+ if (pitchData === null) { continue; }
+ for (const {pitches} of targets) { pitches.push(pitchData); }
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ async _buildKanjiMeta(definitions, enabledDictionaryMap) {
+ const kanjiList = [];
+ for (const {character} of definitions) {
+ kanjiList.push(character);
+ }
+
+ const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
+ for (const {character, mode, data, dictionary, index} of metas) {
+ switch (mode) {
+ case 'freq':
+ {
+ const frequencyData = this._getKanjiFrequencyData(character, dictionary, data);
+ definitions[index].frequencies.push(frequencyData);
+ }
+ break;
+ }
+ }
+ }
+
+ async _expandTags(names, dictionary) {
+ const tagMetaList = await this._getTagMetaList(names, dictionary);
+ const results = [];
+ for (let i = 0, ii = tagMetaList.length; i < ii; ++i) {
+ const meta = tagMetaList[i];
+ const name = names[i];
+ const {category, notes, order, score} = (meta !== null ? meta : {});
+ const tag = this._createTag(name, category, notes, order, score, dictionary, false);
+ results.push(tag);
+ }
+ return results;
+ }
+
+ async _expandStats(items, dictionary) {
+ const names = Object.keys(items);
+ const tagMetaList = await this._getTagMetaList(names, dictionary);
+
+ const statsGroups = new Map();
+ for (let i = 0; i < names.length; ++i) {
+ const name = names[i];
+ const meta = tagMetaList[i];
+ if (meta === null) { continue; }
+
+ const {category, notes, order, score} = meta;
+ let group = statsGroups.get(category);
+ if (typeof group === 'undefined') {
+ group = [];
+ statsGroups.set(category, group);
+ }
+
+ const value = items[name];
+ const stat = this._createKanjiStat(name, category, notes, order, score, dictionary, value);
+ group.push(stat);
+ }
+
+ const stats = {};
+ for (const [category, group] of statsGroups.entries()) {
+ this._sortKanjiStats(group);
+ stats[category] = group;
+ }
+ return stats;
+ }
+
+ async _getTagMetaList(names, dictionary) {
+ const tagMetaList = [];
+ let cache = this._tagCache.get(dictionary);
+ if (typeof cache === 'undefined') {
+ cache = new Map();
+ this._tagCache.set(dictionary, cache);
+ }
+
+ for (const name of names) {
+ const base = this._getNameBase(name);
+
+ let tagMeta = cache.get(base);
+ if (typeof tagMeta === 'undefined') {
+ tagMeta = await this._database.findTagForTitle(base, dictionary);
+ cache.set(base, tagMeta);
+ }
+
+ tagMetaList.push(tagMeta);
+ }
+
+ return tagMetaList;
+ }
+
+ _getTermFrequencyData(expression, reading, dictionary, data) {
+ let frequency = data;
+ const hasReading = (data !== null && typeof data === 'object');
+ if (hasReading) {
+ if (data.reading !== reading) { return null; }
+ frequency = data.frequency;
+ }
+ return {dictionary, expression, reading, hasReading, frequency};
+ }
+
+ _getKanjiFrequencyData(character, dictionary, data) {
+ return {dictionary, character, frequency: data};
+ }
+
+ async _getPitchData(expression, reading, dictionary, data) {
+ if (data.reading !== reading) { return null; }
+
+ const pitches = [];
+ for (let {position, tags} of data.pitches) {
+ tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
+ pitches.push({position, tags});
+ }
+
+ return {expression, reading, dictionary, pitches};
+ }
+
+ // Simple helpers
+
+ _scoreToTermFrequency(score) {
+ if (score > 0) {
+ return 'popular';
+ } else if (score < 0) {
+ return 'rare';
+ } else {
+ return 'normal';
+ }
+ }
+
+ _getNameBase(name) {
+ const pos = name.indexOf(':');
+ return (pos >= 0 ? name.substring(0, pos) : name);
+ }
+
+ _getSearchableText(text, allowAlphanumericCharacters) {
+ if (allowAlphanumericCharacters) {
+ return text;
+ }
+
+ const jp = this._japaneseUtil;
+ let newText = '';
+ for (const c of text) {
+ if (!jp.isCodePointJapanese(c.codePointAt(0))) {
+ break;
+ }
+ newText += c;
+ }
+ return newText;
+ }
+
+ _getTextOptionEntryVariants(value) {
+ switch (value) {
+ case 'true': return [true];
+ case 'variant': return [false, true];
+ default: return [false];
+ }
+ }
+
+ _getCollapseEmphaticOptions(options) {
+ const collapseEmphaticOptions = [[false, false]];
+ switch (options.collapseEmphaticSequences) {
+ case 'true':
+ collapseEmphaticOptions.push([true, false]);
+ break;
+ case 'full':
+ collapseEmphaticOptions.push([true, false], [true, true]);
+ break;
+ }
+ return collapseEmphaticOptions;
+ }
+
+ _getTextReplacementsVariants(options) {
+ return options.textReplacements;
+ }
+
+ _getSecondarySearchDictionaryMap(enabledDictionaryMap) {
+ const secondarySearchDictionaryMap = new Map();
+ for (const [dictionary, details] of enabledDictionaryMap.entries()) {
+ if (!details.allowSecondarySearches) { continue; }
+ secondarySearchDictionaryMap.set(dictionary, details);
+ }
+ return secondarySearchDictionaryMap;
+ }
+
+ _getDictionaryPriority(dictionary, enabledDictionaryMap) {
+ const info = enabledDictionaryMap.get(dictionary);
+ return typeof info !== 'undefined' ? info.priority : 0;
+ }
+
+ _getTagNamesWithCategory(tags, category) {
+ const results = [];
+ for (const tag of tags) {
+ if (tag.category !== category) { continue; }
+ results.push(tag.name);
+ }
+ results.sort();
+ return results;
+ }
+
+ _flagTagsWithCategoryAsRedundant(tags, removeCategoriesSet) {
+ for (const tag of tags) {
+ if (removeCategoriesSet.has(tag.category)) {
+ tag.redundant = true;
+ }
+ }
+ }
+
+ _getUniqueDictionaryNames(definitions) {
+ const uniqueDictionaryNames = new Set();
+ for (const {dictionaryNames} of definitions) {
+ for (const dictionaryName of dictionaryNames) {
+ uniqueDictionaryNames.add(dictionaryName);
+ }
+ }
+ return [...uniqueDictionaryNames];
+ }
+
+ _getUniqueTermTags(definitions) {
+ const newTermTags = [];
+ if (definitions.length <= 1) {
+ for (const {termTags} of definitions) {
+ for (const tag of termTags) {
+ newTermTags.push(this._cloneTag(tag));
+ }
+ }
+ } else {
+ const tagsSet = new Set();
+ let checkTagsMap = false;
+ for (const {termTags} of definitions) {
+ for (const tag of termTags) {
+ const key = this._getTagMapKey(tag);
+ if (checkTagsMap && tagsSet.has(key)) { continue; }
+ tagsSet.add(key);
+ newTermTags.push(this._cloneTag(tag));
+ }
+ checkTagsMap = true;
+ }
+ }
+ return newTermTags;
+ }
+
+ *_getArrayVariants(arrayVariants) {
+ const ii = arrayVariants.length;
+
+ let total = 1;
+ for (let i = 0; i < ii; ++i) {
+ total *= arrayVariants[i].length;
+ }
+
+ for (let a = 0; a < total; ++a) {
+ const variant = [];
+ let index = a;
+ for (let i = 0; i < ii; ++i) {
+ const entryVariants = arrayVariants[i];
+ variant.push(entryVariants[index % entryVariants.length]);
+ index = Math.floor(index / entryVariants.length);
+ }
+ yield variant;
+ }
+ }
+
+ _areSetsEqual(set1, set2) {
+ if (set1.size !== set2.size) {
+ return false;
+ }
+
+ for (const value of set1) {
+ if (!set2.has(value)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ _getSetIntersection(set1, set2) {
+ const result = [];
+ for (const value of set1) {
+ if (set2.has(value)) {
+ result.push(value);
+ }
+ }
+ return result;
+ }
+
+ // Reduction functions
+
+ _getTermTagsScoreSum(termTags) {
+ let result = 0;
+ for (const {score} of termTags) {
+ result += score;
+ }
+ return result;
+ }
+
+ _getSourceTermMatchCountSum(definitions) {
+ let result = 0;
+ for (const {sourceTermExactMatchCount} of definitions) {
+ result += sourceTermExactMatchCount;
+ }
+ return result;
+ }
+
+ _getMaxDefinitionScore(definitions) {
+ let result = Number.MIN_SAFE_INTEGER;
+ for (const {score} of definitions) {
+ if (score > result) { result = score; }
+ }
+ return result;
+ }
+
+ _getMaxDictionaryPriority(definitions) {
+ let result = Number.MIN_SAFE_INTEGER;
+ for (const {dictionaryPriority} of definitions) {
+ if (dictionaryPriority > result) { result = dictionaryPriority; }
+ }
+ return result;
+ }
+
+ // Common data creation and cloning functions
+
+ _cloneTag(tag) {
+ const {name, category, notes, order, score, dictionary, redundant} = tag;
+ return this._createTag(name, category, notes, order, score, dictionary, redundant);
+ }
+
+ _getTagMapKey(tag) {
+ const {name, category, notes} = tag;
+ return this._createMapKey([name, category, notes]);
+ }
+
+ _createMapKey(array) {
+ return JSON.stringify(array);
+ }
+
+ _createTag(name, category, notes, order, score, dictionary, redundant) {
+ return {
+ name,
+ category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
+ notes: (typeof notes === 'string' ? notes : ''),
+ order: (typeof order === 'number' ? order : 0),
+ score: (typeof score === 'number' ? score : 0),
+ dictionary: (typeof dictionary === 'string' ? dictionary : null),
+ redundant
+ };
+ }
+
+ _createKanjiStat(name, category, notes, order, score, dictionary, value) {
+ return {
+ name,
+ category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
+ notes: (typeof notes === 'string' ? notes : ''),
+ order: (typeof order === 'number' ? order : 0),
+ score: (typeof score === 'number' ? score : 0),
+ dictionary: (typeof dictionary === 'string' ? dictionary : null),
+ value
+ };
+ }
+
+ _createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, tags, stats) {
+ return {
+ type: 'kanji',
+ character,
+ dictionary,
+ onyomi,
+ kunyomi,
+ glossary,
+ tags,
+ stats,
+ frequencies: []
+ };
+ }
+
+ async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, reasons, enabledDictionaryMap) {
+ const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
+ const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
+ const termTagsExpanded = await this._expandTags(termTags, dictionary);
+ const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
+
+ this._sortTags(definitionTagsExpanded);
+ this._sortTags(termTagsExpanded);
+
+ const furiganaSegments = this._japaneseUtil.distributeFurigana(expression, reading);
+ const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTagsExpanded)];
+ const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
+
+ return {
+ type: 'term',
+ id,
+ source,
+ rawSource,
+ sourceTerm,
+ reasons,
+ score,
+ sequence,
+ dictionary,
+ dictionaryPriority,
+ dictionaryNames: [dictionary],
+ expression,
+ reading,
+ expressions: termDetailsList,
+ furiganaSegments,
+ glossary,
+ definitionTags: definitionTagsExpanded,
+ termTags: termTagsExpanded,
+ // definitions
+ frequencies: [],
+ pitches: [],
+ // only
+ sourceTermExactMatchCount
+ };
+ }
+
+ _createGroupedTermDefinition(definitions) {
+ const {expression, reading, furiganaSegments, reasons, source, rawSource, sourceTerm} = definitions[0];
+ const score = this._getMaxDefinitionScore(definitions);
+ const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
+ const dictionaryNames = this._getUniqueDictionaryNames(definitions);
+ const termTags = this._getUniqueTermTags(definitions);
+ const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
+ const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
+ return {
+ type: 'termGrouped',
+ // id
+ source,
+ rawSource,
+ sourceTerm,
+ reasons: [...reasons],
+ score,
+ // sequence
+ dictionary: dictionaryNames[0],
+ dictionaryPriority,
+ dictionaryNames,
+ expression,
+ reading,
+ expressions: termDetailsList,
+ furiganaSegments, // Contains duplicate data
+ // glossary
+ // definitionTags
+ termTags,
+ definitions, // type: 'term'
+ frequencies: [],
+ pitches: [],
+ // only
+ sourceTermExactMatchCount
+ };
+ }
+
+ _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, score) {
+ const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
+ const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
+ const dictionaryNames = this._getUniqueDictionaryNames(definitions);
+ return {
+ type: 'termMerged',
+ // id
+ source,
+ rawSource,
+ // sourceTerm
+ reasons,
+ score,
+ // sequence
+ dictionary: dictionaryNames[0],
+ dictionaryPriority,
+ dictionaryNames,
+ expression: expressions,
+ reading: readings,
+ expressions: termDetailsList,
+ // furiganaSegments
+ // glossary
+ // definitionTags
+ // termTags
+ definitions, // type: 'termMergedByGlossary'
+ frequencies: [],
+ pitches: [],
+ // only
+ sourceTermExactMatchCount
+ };
+ }
+
+ _createMergedGlossaryTermDefinition(source, rawSource, definitions, expressions, readings, allExpressions, allReadings) {
+ const only = [];
+ if (!this._areSetsEqual(expressions, allExpressions)) {
+ only.push(...this._getSetIntersection(expressions, allExpressions));
+ }
+ if (!this._areSetsEqual(readings, allReadings)) {
+ only.push(...this._getSetIntersection(readings, allReadings));
+ }
+
+ const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
+ const dictionaryNames = this._getUniqueDictionaryNames(definitions);
+
+ const termInfoMap = new Map();
+ this._addUniqueTermInfos(definitions, termInfoMap);
+ const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
+
+ const definitionTags = this._getUniqueDefinitionTags(definitions);
+ this._sortTags(definitionTags);
+
+ const {glossary} = definitions[0];
+ const score = this._getMaxDefinitionScore(definitions);
+ const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
+ return {
+ type: 'termMergedByGlossary',
+ // id
+ source,
+ rawSource,
+ // sourceTerm
+ reasons: [],
+ score,
+ // sequence
+ dictionary: dictionaryNames[0],
+ dictionaryPriority,
+ dictionaryNames,
+ expression: [...expressions],
+ reading: [...readings],
+ expressions: termDetailsList,
+ // furiganaSegments
+ glossary: [...glossary],
+ definitionTags,
+ // termTags
+ definitions, // type: 'term'; contains duplicate data
+ frequencies: [],
+ pitches: [],
+ only,
+ sourceTermExactMatchCount
+ };
+ }
+
+ _createTermDetailsListFromTermInfoMap(termInfoMap) {
+ const termDetailsList = [];
+ for (const [expression, readingMap] of termInfoMap.entries()) {
+ for (const [reading, {termTagsMap, sourceTerm, furiganaSegments}] of readingMap.entries()) {
+ const termTags = [...termTagsMap.values()];
+ this._sortTags(termTags);
+ termDetailsList.push(this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags));
+ }
+ }
+ return termDetailsList;
+ }
+
+ _createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags) {
+ const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
+ return {
+ sourceTerm,
+ expression,
+ reading,
+ furiganaSegments, // Contains duplicate data
+ termTags,
+ termFrequency,
+ frequencies: [],
+ pitches: []
+ };
+ }
+
+ // Sorting functions
+
+ _sortTags(tags) {
+ if (tags.length <= 1) { return; }
+ const stringComparer = this._stringComparer;
+ tags.sort((v1, v2) => {
+ const i = v1.order - v2.order;
+ if (i !== 0) { return i; }
+
+ return stringComparer.compare(v1.name, v2.name);
+ });
+ }
+
+ _sortDefinitions(definitions, useDictionaryPriority) {
+ if (definitions.length <= 1) { return; }
+ const stringComparer = this._stringComparer;
+ const compareFunction1 = (v1, v2) => {
+ let i = v2.source.length - v1.source.length;
+ if (i !== 0) { return i; }
+
+ i = v1.reasons.length - v2.reasons.length;
+ if (i !== 0) { return i; }
+
+ i = v2.sourceTermExactMatchCount - v1.sourceTermExactMatchCount;
+ if (i !== 0) { return i; }
+
+ i = v2.score - v1.score;
+ if (i !== 0) { return i; }
+
+ const expression1 = v1.expression;
+ const expression2 = v2.expression;
+ if (typeof expression1 !== 'string' || typeof expression2 !== 'string') { return 0; } // Skip if either is not a string (array)
+
+ i = expression2.length - expression1.length;
+ if (i !== 0) { return i; }
+
+ return stringComparer.compare(expression1, expression2);
+ };
+ const compareFunction2 = (v1, v2) => {
+ const i = v2.dictionaryPriority - v1.dictionaryPriority;
+ return (i !== 0) ? i : compareFunction1(v1, v2);
+ };
+ definitions.sort(useDictionaryPriority ? compareFunction2 : compareFunction1);
+ }
+
+ _sortDatabaseDefinitionsByIndex(definitions) {
+ if (definitions.length <= 1) { return; }
+ definitions.sort((a, b) => a.index - b.index);
+ }
+
+ _sortDefinitionsById(definitions) {
+ if (definitions.length <= 1) { return; }
+ definitions.sort((a, b) => a.id - b.id);
+ }
+
+ _sortKanjiStats(stats) {
+ if (stats.length <= 1) { return; }
+ const stringComparer = this._stringComparer;
+ stats.sort((v1, v2) => {
+ const i = v1.order - v2.order;
+ if (i !== 0) { return i; }
+
+ return stringComparer.compare(v1.notes, v2.notes);
+ });
+ }
+
+ // Regex functions
+
+ _applyTextReplacements(text, sourceMap, replacements) {
+ for (const {pattern, replacement} of replacements) {
+ text = this._applyTextReplacement(text, sourceMap, pattern, replacement);
+ }
+ return text;
+ }
+
+ _applyTextReplacement(text, sourceMap, pattern, replacement) {
+ const isGlobal = pattern.global;
+ if (isGlobal) { pattern.lastIndex = 0; }
+ for (let loop = true; loop; loop = isGlobal) {
+ const match = pattern.exec(text);
+ if (match === null) { break; }
+
+ const matchText = match[0];
+ const index = match.index;
+ const actualReplacement = this._applyMatchReplacement(replacement, match);
+ const actualReplacementLength = actualReplacement.length;
+ const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1);
+
+ text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`;
+ pattern.lastIndex += delta;
+
+ if (actualReplacementLength > 0) {
+ sourceMap.combine(Math.max(0, index - 1), matchText.length);
+ sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0)));
+ } else {
+ sourceMap.combine(index, matchText.length);
+ }
+ }
+ return text;
+ }
+
+ _applyMatchReplacement(replacement, match) {
+ const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g;
+ return replacement.replace(pattern, (g0, g1, g2) => {
+ if (typeof g1 !== 'undefined') {
+ const matchIndex = Number.parseInt(g1, 10);
+ if (matchIndex >= 1 && matchIndex <= match.length) {
+ return match[matchIndex];
+ }
+ } else if (typeof g2 !== 'undefined') {
+ const {groups} = match;
+ if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) {
+ return groups[g2];
+ }
+ } else {
+ switch (g0) {
+ case '$': return '$';
+ case '&': return match[0];
+ case '`': return replacement.substring(0, match.index);
+ case '\'': return replacement.substring(match.index + g0.length);
+ }
+ }
+ return g0;
+ });
+ }
+}