diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-03-25 19:55:31 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-25 19:55:31 -0400 |
commit | 4be5c8fd9f7860e701d0b7d3c8c0ee934bc60a4f (patch) | |
tree | dcd78316afdf00bbb67d3d1aa6555a9c8ea3efec /ext | |
parent | e7035dcff41d94f20c0bc8865d413412afc7c229 (diff) |
Refactor Translator and dictionary entry format (#1553)
* Update test data
* Move translator.js
* Create new version of Translator
* Update Backend
* Update DictionaryDataUtil
* Update DisplayGenerator
* Create AnkiNoteDataCreator
* Replace AnkiNoteData with AnkiNoteDataCreator
* Update tests
* Remove AnkiNoteData
* Update test data
* Remove translator-old.js
* Add TypeScript interface definitions for the new translator data format
Diffstat (limited to 'ext')
-rw-r--r-- | ext/js/background/backend.js | 28 | ||||
-rw-r--r-- | ext/js/data/anki-note-data-creator.js | 598 | ||||
-rw-r--r-- | ext/js/data/anki-note-data.js | 299 | ||||
-rw-r--r-- | ext/js/display/display-generator.js | 141 | ||||
-rw-r--r-- | ext/js/language/dictionary-data-util.js | 136 | ||||
-rw-r--r-- | ext/js/language/translator.js | 1572 | ||||
-rw-r--r-- | ext/js/templates/template-renderer-frame-main.js | 5 | ||||
-rw-r--r-- | ext/template-renderer.html | 2 |
8 files changed, 1519 insertions, 1262 deletions
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 5cbb65cb..82457a7e 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -414,9 +414,9 @@ class Backend { const options = this._getProfileOptions(optionsContext); const {general: {resultOutputMode: mode, maxResults}} = options; const findTermsOptions = this._getTranslatorFindTermsOptions(details, options); - const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions); - definitions.splice(maxResults); - return {length, definitions}; + const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(mode, text, findTermsOptions); + dictionaryEntries.splice(maxResults); + return {length: originalTextLength, definitions: dictionaryEntries}; } async _onApiTextParse({text, optionsContext}) { @@ -1050,7 +1050,7 @@ class Backend { let i = 0; const ii = text.length; while (i < ii) { - const [definitions, sourceLength] = await this._translator.findTerms( + const {dictionaryEntries, originalTextLength} = await this._translator.findTerms( 'simple', text.substring(i, i + scanningLength), findTermsOptions @@ -1058,20 +1058,20 @@ class Backend { const codePoint = text.codePointAt(i); const character = String.fromCodePoint(codePoint); if ( - definitions.length > 0 && - sourceLength > 0 && - (sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint)) + dictionaryEntries.length > 0 && + originalTextLength > 0 && + (originalTextLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint)) ) { previousUngroupedSegment = null; - const {expression, reading} = definitions[0]; - const source = text.substring(i, i + sourceLength); - const term = []; - for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { + const {headwords: [{term, reading}]} = dictionaryEntries[0]; + const source = text.substring(i, i + originalTextLength); + const textSegments = []; + for (const {text: text2, furigana} of jp.distributeFuriganaInflected(term, reading, source)) { const reading2 = jp.convertReading(text2, furigana, readingMode); - term.push({text: text2, reading: reading2}); + textSegments.push({text: text2, reading: reading2}); } - results.push(term); - i += sourceLength; + results.push(textSegments); + i += originalTextLength; } else { if (previousUngroupedSegment === null) { previousUngroupedSegment = {text: character, reading: ''}; diff --git a/ext/js/data/anki-note-data-creator.js b/ext/js/data/anki-note-data-creator.js new file mode 100644 index 00000000..c7047633 --- /dev/null +++ b/ext/js/data/anki-note-data-creator.js @@ -0,0 +1,598 @@ +/* + * Copyright (C) 2021 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/* global + * DictionaryDataUtil + */ + +/** + * This class is used to convert the internal dictionary entry format to the + * format used by Anki, for backwards compatibility. + */ +class AnkiNoteDataCreator { + /** + * Creates a new instance. + * @param japaneseUtil An instance of `JapaneseUtil`. + */ + constructor(japaneseUtil) { + this._japaneseUtil = japaneseUtil; + } + + /** + * Creates a compatibility representation of the specified data. + * @param marker The marker that is being used for template rendering. + * @returns An object used for rendering Anki templates. + */ + create(marker, { + definition: dictionaryEntry, + resultOutputMode, + mode, + glossaryLayoutMode, + compactTags, + context, + injectedMedia=null + }) { + const self = this; + const definition = this.createCachedValue(this._getDefinition.bind(this, dictionaryEntry, injectedMedia, context, resultOutputMode)); + const uniqueExpressions = this.createCachedValue(this._getUniqueExpressions.bind(this, dictionaryEntry)); + const uniqueReadings = this.createCachedValue(this._getUniqueReadings.bind(this, dictionaryEntry)); + const context2 = this.createCachedValue(this._getPublicContext.bind(this, context)); + const pitches = this.createCachedValue(this._getPitches.bind(this, dictionaryEntry)); + const pitchCount = this.createCachedValue(this._getPitchCount.bind(this, pitches)); + return { + marker, + get definition() { return self.getCachedValue(definition); }, + glossaryLayoutMode, + compactTags, + group: (resultOutputMode === 'group'), + merge: (resultOutputMode === 'merge'), + modeTermKanji: (mode === 'term-kanji'), + modeTermKana: (mode === 'term-kana'), + modeKanji: (mode === 'kanji'), + compactGlossaries: (glossaryLayoutMode === 'compact'), + get uniqueExpressions() { return self.getCachedValue(uniqueExpressions); }, + get uniqueReadings() { return self.getCachedValue(uniqueReadings); }, + get pitches() { return self.getCachedValue(pitches); }, + get pitchCount() { return self.getCachedValue(pitchCount); }, + get context() { return self.getCachedValue(context2); } + }; + } + + /** + * Creates a deferred-evaluation value. + * @param getter The function to invoke to get the return value. + * @returns An object which can be passed into `getCachedValue`. + */ + createCachedValue(getter) { + return {getter, hasValue: false, value: void 0}; + } + + /** + * Gets the value of a cached object. + * @param item An object that was returned from `createCachedValue`. + * @returns The result of evaluating the getter, which is cached after the first invocation. + */ + getCachedValue(item) { + if (item.hasValue) { return item.value; } + const value = item.getter(); + item.value = value; + item.hasValue = true; + return value; + } + + // Private + + _asObject(value) { + return (typeof value === 'object' && value !== null ? value : {}); + } + + _getPrimarySource(dictionaryEntry) { + for (const headword of dictionaryEntry.headwords) { + for (const source of headword.sources) { + if (source.isPrimary) { return source; } + } + } + return null; + } + + _getUniqueExpressions(dictionaryEntry) { + if (dictionaryEntry.type === 'term') { + const results = new Set(); + for (const {term} of dictionaryEntry.headwords) { + results.add(term); + } + return [...results]; + } else { + return []; + } + } + + _getUniqueReadings(dictionaryEntry) { + if (dictionaryEntry.type === 'term') { + const results = new Set(); + for (const {reading} of dictionaryEntry.headwords) { + results.add(reading); + } + return [...results]; + } else { + return []; + } + } + + _getPublicContext(context) { + let {documentTitle} = this._asObject(context); + if (typeof documentTitle !== 'string') { documentTitle = ''; } + return { + document: { + title: documentTitle + } + }; + } + + _getPitches(dictionaryEntry) { + const results = []; + if (dictionaryEntry.type === 'term') { + for (const {dictionary, pitches} of DictionaryDataUtil.getPitchAccentInfos(dictionaryEntry)) { + const pitches2 = []; + for (const {terms, reading, position, tags, exclusiveTerms, exclusiveReadings} of pitches) { + pitches2.push({ + expressions: terms, + reading, + position, + tags, + exclusiveExpressions: exclusiveTerms, + exclusiveReadings + }); + } + results.push({dictionary, pitches: pitches2}); + } + } + return results; + } + + _getPitchCount(cachedPitches) { + const pitches = this.getCachedValue(cachedPitches); + return pitches.reduce((i, v) => i + v.pitches.length, 0); + } + + _getDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode) { + switch (dictionaryEntry.type) { + case 'term': + return this._getTermDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode); + case 'kanji': + return this._getKanjiDefinition(dictionaryEntry, injectedMedia, context); + default: + return {}; + } + } + + _getKanjiDefinition(dictionaryEntry, injectedMedia, context) { + const self = this; + + const {character, dictionary, onyomi, kunyomi, definitions} = dictionaryEntry; + + const { + screenshotFileName=null, + clipboardImageFileName=null, + clipboardText=null, + audioFileName=null + } = this._asObject(injectedMedia); + + let {url} = this._asObject(context); + if (typeof url !== 'string') { url = ''; } + + const stats = this.createCachedValue(this._getKanjiStats.bind(this, dictionaryEntry)); + const tags = this.createCachedValue(this._convertTags.bind(this, dictionaryEntry.tags)); + const frequencies = this.createCachedValue(this._getKanjiFrequencies.bind(this, dictionaryEntry)); + const cloze = this.createCachedValue(this._getCloze.bind(this, dictionaryEntry, context)); + + return { + type: 'kanji', + character, + dictionary, + onyomi, + kunyomi, + glossary: definitions, + get tags() { return self.getCachedValue(tags); }, + get stats() { return self.getCachedValue(stats); }, + get frequencies() { return self.getCachedValue(frequencies); }, + screenshotFileName, + clipboardImageFileName, + clipboardText, + audioFileName, + url, + get cloze() { return self.getCachedValue(cloze); } + }; + } + + _getKanjiStats(dictionaryEntry) { + const results = {}; + for (const [key, value] of Object.entries(dictionaryEntry.stats)) { + results[key] = value.map(this._convertKanjiStat.bind(this)); + } + return results; + } + + _convertKanjiStat({name, category, content, order, score, dictionary, value}) { + return { + name, + category, + notes: content, + order, + score, + dictionary, + value + }; + } + + _getKanjiFrequencies(dictionaryEntry) { + const results = []; + for (const {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency} of dictionaryEntry.frequencies) { + results.push({ + index, + dictionary, + dictionaryOrder: { + index: dictionaryIndex, + priority: dictionaryPriority + }, + character, + frequency + }); + } + return results; + } + + _getTermDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode) { + const self = this; + + let type = 'term'; + switch (resultOutputMode) { + case 'group': type = 'termGrouped'; break; + case 'merge': type = 'termMerged'; break; + } + + const {id, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount} = dictionaryEntry; + + const { + screenshotFileName=null, + clipboardImageFileName=null, + clipboardText=null, + audioFileName=null + } = this._asObject(injectedMedia); + + let {url} = this._asObject(context); + if (typeof url !== 'string') { url = ''; } + + const primarySource = this._getPrimarySource(dictionaryEntry); + + const dictionaryNames = this.createCachedValue(this._getTermDictionaryNames.bind(this, dictionaryEntry)); + const commonInfo = this.createCachedValue(this._getTermDictionaryEntryCommonInfo.bind(this, dictionaryEntry, type)); + const termTags = this.createCachedValue(this._getTermTags.bind(this, dictionaryEntry, type)); + const expressions = this.createCachedValue(this._getTermExpressions.bind(this, dictionaryEntry)); + const frequencies = this.createCachedValue(this._getTermFrequencies.bind(this, dictionaryEntry)); + const pitches = this.createCachedValue(this._getTermPitches.bind(this, dictionaryEntry)); + const glossary = this.createCachedValue(this._getTermGlossaryArray.bind(this, dictionaryEntry, type)); + const cloze = this.createCachedValue(this._getCloze.bind(this, dictionaryEntry, context)); + const furiganaSegments = this.createCachedValue(this._getTermFuriganaSegments.bind(this, dictionaryEntry, type)); + + return { + type, + id: (type === 'term' ? id : void 0), + source: (primarySource !== null ? primarySource.transformedText : null), + rawSource: (primarySource !== null ? primarySource.originalText : null), + sourceTerm: (type !== 'termMerged' ? (primarySource !== null ? primarySource.deinflectedText : null) : void 0), + reasons: inflections, + score, + isPrimary: (type === 'term' ? dictionaryEntry.isPrimary : void 0), + sequence: (type === 'term' ? dictionaryEntry.sequence : void 0), + get dictionary() { return self.getCachedValue(dictionaryNames)[0]; }, + dictionaryOrder: { + index: dictionaryIndex, + priority: dictionaryPriority + }, + get dictionaryNames() { return self.getCachedValue(dictionaryNames); }, + get expression() { + const {uniqueTerms} = self.getCachedValue(commonInfo); + return (type === 'term' || type === 'termGrouped' ? uniqueTerms[0] : uniqueTerms); + }, + get reading() { + const {uniqueReadings} = self.getCachedValue(commonInfo); + return (type === 'term' || type === 'termGrouped' ? uniqueReadings[0] : uniqueReadings); + }, + get expressions() { return self.getCachedValue(expressions); }, + get glossary() { return self.getCachedValue(glossary); }, + get definitionTags() { return type === 'term' ? self.getCachedValue(commonInfo).definitionTags : void 0; }, + get termTags() { return self.getCachedValue(termTags); }, + get definitions() { return self.getCachedValue(commonInfo).definitions; }, + get frequencies() { return self.getCachedValue(frequencies); }, + get pitches() { return self.getCachedValue(pitches); }, + sourceTermExactMatchCount, + screenshotFileName, + clipboardImageFileName, + clipboardText, + audioFileName, + url, + get cloze() { return self.getCachedValue(cloze); }, + get furiganaSegments() { return self.getCachedValue(furiganaSegments); } + }; + } + + _getTermDictionaryNames(dictionaryEntry) { + const dictionaryNames = new Set(); + for (const {dictionary} of dictionaryEntry.definitions) { + dictionaryNames.add(dictionary); + } + return [...dictionaryNames]; + } + + _getTermDictionaryEntryCommonInfo(dictionaryEntry, type) { + const merged = (type === 'termMerged'); + const hasDefinitions = (type !== 'term'); + + const allTermsSet = new Set(); + const allReadingsSet = new Set(); + for (const {term, reading} of dictionaryEntry.headwords) { + allTermsSet.add(term); + allReadingsSet.add(reading); + } + const uniqueTerms = [...allTermsSet]; + const uniqueReadings = [...allReadingsSet]; + + const definitions = []; + const definitionTags = []; + for (const {tags, headwordIndices, entries, dictionary} of dictionaryEntry.definitions) { + const definitionTags2 = []; + for (const tag of tags) { + definitionTags.push(this._convertTag(tag)); + definitionTags2.push(this._convertTag(tag)); + } + if (!hasDefinitions) { continue; } + const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0; + definitions.push({ + dictionary, + glossary: entries, + definitionTags: definitionTags2, + only + }); + } + + return { + uniqueTerms, + uniqueReadings, + definitionTags, + definitions: hasDefinitions ? definitions : void 0 + }; + } + + _getTermFrequencies(dictionaryEntry) { + const results = []; + const {headwords} = dictionaryEntry; + for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of dictionaryEntry.frequencies) { + const {term, reading} = headwords[headwordIndex]; + results.push({ + index: results.length, + expressionIndex: headwordIndex, + dictionary, + dictionaryOrder: { + index: dictionaryIndex, + priority: dictionaryPriority + }, + expression: term, + reading, + hasReading, + frequency + }); + } + return results; + } + + _getTermPitches(dictionaryEntry) { + const self = this; + const results = []; + const {headwords} = dictionaryEntry; + for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of dictionaryEntry.pronunciations) { + const {term, reading} = headwords[headwordIndex]; + const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches)); + results.push({ + index: results.length, + expressionIndex: headwordIndex, + dictionary, + dictionaryOrder: { + index: dictionaryIndex, + priority: dictionaryPriority + }, + expression: term, + reading, + get pitches() { return self.getCachedValue(cachedPitches); } + }); + } + return results; + } + + _getTermPitchesInner(pitches) { + const self = this; + const results = []; + for (const {position, tags} of pitches) { + const cachedTags = this.createCachedValue(this._convertTags.bind(this, tags)); + results.push({ + position, + get tags() { return self.getCachedValue(cachedTags); } + }); + } + return results; + } + + _getTermExpressions(dictionaryEntry) { + const self = this; + const results = []; + const {headwords} = dictionaryEntry; + for (let i = 0, ii = headwords.length; i < ii; ++i) { + const {term, reading, tags, sources: [{deinflectedText}]} = headwords[i]; + const termTags = this.createCachedValue(this._convertTags.bind(this, tags)); + const frequencies = this.createCachedValue(this._getTermExpressionFrequencies.bind(this, dictionaryEntry, i)); + const pitches = this.createCachedValue(this._getTermExpressionPitches.bind(this, dictionaryEntry, i)); + const termFrequency = this.createCachedValue(this._getTermExpressionTermFrequency.bind(this, termTags)); + const furiganaSegments = this.createCachedValue(this._getTermHeadwordFuriganaSegments.bind(this, term, reading)); + const item = { + sourceTerm: deinflectedText, + expression: term, + reading, + get termTags() { return self.getCachedValue(termTags); }, + get frequencies() { return self.getCachedValue(frequencies); }, + get pitches() { return self.getCachedValue(pitches); }, + get furiganaSegments() { return self.getCachedValue(furiganaSegments); }, + get termFrequency() { return self.getCachedValue(termFrequency); } + }; + results.push(item); + } + return results; + } + + _getTermExpressionFrequencies(dictionaryEntry, i) { + const results = []; + const {headwords, frequencies} = dictionaryEntry; + for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of frequencies) { + if (headwordIndex !== i) { continue; } + const {term, reading} = headwords[headwordIndex]; + results.push({ + index: results.length, + expressionIndex: headwordIndex, + dictionary, + dictionaryOrder: { + index: dictionaryIndex, + priority: dictionaryPriority + }, + expression: term, + reading, + hasReading, + frequency + }); + } + return results; + } + + _getTermExpressionPitches(dictionaryEntry, i) { + const self = this; + const results = []; + const {headwords, pronunciations} = dictionaryEntry; + for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of pronunciations) { + if (headwordIndex !== i) { continue; } + const {term, reading} = headwords[headwordIndex]; + const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches)); + results.push({ + index: results.length, + expressionIndex: headwordIndex, + dictionary, + dictionaryOrder: { + index: dictionaryIndex, + priority: dictionaryPriority + }, + expression: term, + reading, + get pitches() { return self.getCachedValue(cachedPitches); } + }); + } + return results; + } + + _getTermExpressionTermFrequency(cachedTermTags) { + const termTags = this.getCachedValue(cachedTermTags); + return DictionaryDataUtil.getTermFrequency(termTags); + } + + _getTermGlossaryArray(dictionaryEntry, type) { + if (type === 'term') { + const results = []; + for (const {entries} of dictionaryEntry.definitions) { + results.push(...entries); + } + return results; + } + return void 0; + } + + _getTermTags(dictionaryEntry, type) { + if (type !== 'termMerged') { + const results = []; + for (const {tag} of DictionaryDataUtil.groupTermTags(dictionaryEntry)) { + results.push(this._convertTag(tag)); + } + return results; + } + return void 0; + } + + _convertTags(tags) { + const results = []; + for (const tag of tags) { + results.push(this._convertTag(tag)); + } + return results; + } + + _convertTag({name, category, content, order, score, dictionaries, redundant}) { + return { + name, + category, + notes: (content.length > 0 ? content[0] : ''), + order, + score, + dictionary: (dictionaries.length > 0 ? dictionaries[0] : ''), + redundant + }; + } + + _getCloze(dictionaryEntry, context) { + let originalText = ''; + switch (dictionaryEntry.type) { + case 'term': + { + const primarySource = this._getPrimarySource(dictionaryEntry); + if (primarySource !== null) { originalText = primarySource.originalText; } + } + break; + case 'kanji': + originalText = dictionaryEntry.character; + break; + } + + const {sentence} = this._asObject(context); + let {text, offset} = this._asObject(sentence); + if (typeof text !== 'string') { text = ''; } + if (typeof offset !== 'number') { offset = 0; } + + return { + sentence: text, + prefix: text.substring(0, offset), + body: text.substring(offset, offset + originalText.length), + suffix: text.substring(offset + originalText.length) + }; + } + + _getTermFuriganaSegments(dictionaryEntry, type) { + if (type === 'term') { + for (const {term, reading} of dictionaryEntry.headwords) { + return this._getTermHeadwordFuriganaSegments(term, reading); + } + } + return void 0; + } + + _getTermHeadwordFuriganaSegments(term, reading) { + return this._japaneseUtil.distributeFurigana(term, reading); + } +} diff --git a/ext/js/data/anki-note-data.js b/ext/js/data/anki-note-data.js deleted file mode 100644 index f7f4c641..00000000 --- a/ext/js/data/anki-note-data.js +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (C) 2021 Yomichan Authors - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -/* global - * DictionaryDataUtil - */ - -/** - * This class represents the data that is exposed to the Anki template renderer. - * The public properties and data should be backwards compatible. - */ -class AnkiNoteData { - constructor(japaneseUtil, marker, { - definition, - resultOutputMode, - mode, - glossaryLayoutMode, - compactTags, - context, - injectedMedia=null - }) { - this._japaneseUtil = japaneseUtil; - this._definition = definition; - this._resultOutputMode = resultOutputMode; - this._mode = mode; - this._glossaryLayoutMode = glossaryLayoutMode; - this._compactTags = compactTags; - this._context = context; - this._marker = marker; - this._injectedMedia = injectedMedia; - this._pitches = null; - this._pitchCount = null; - this._uniqueExpressions = null; - this._uniqueReadings = null; - this._publicContext = null; - this._cloze = null; - this._furiganaSegmentsCache = null; - - this._prepareDefinition(definition, injectedMedia, context); - } - - get marker() { - return this._marker; - } - - set marker(value) { - this._marker = value; - } - - get definition() { - return this._definition; - } - - get uniqueExpressions() { - if (this._uniqueExpressions === null) { - this._uniqueExpressions = this._getUniqueExpressions(); - } - return this._uniqueExpressions; - } - - get uniqueReadings() { - if (this._uniqueReadings === null) { - this._uniqueReadings = this._getUniqueReadings(); - } - return this._uniqueReadings; - } - - get pitches() { - if (this._pitches === null) { - this._pitches = DictionaryDataUtil.getPitchAccentInfos(this._definition); - } - return this._pitches; - } - - get pitchCount() { - if (this._pitchCount === null) { - this._pitchCount = this.pitches.reduce((i, v) => i + v.pitches.length, 0); - } - return this._pitchCount; - } - - get group() { - return this._resultOutputMode === 'group'; - } - - get merge() { - return this._resultOutputMode === 'merge'; - } - - get modeTermKanji() { - return this._mode === 'term-kanji'; - } - - get modeTermKana() { - return this._mode === 'term-kana'; - } - - get modeKanji() { - return this._mode === 'kanji'; - } - - get compactGlossaries() { - return this._glossaryLayoutMode === 'compact'; - } - - get glossaryLayoutMode() { - return this._glossaryLayoutMode; - } - - get compactTags() { - return this._compactTags; - } - - get context() { - if (this._publicContext === null) { - this._publicContext = this._getPublicContext(); - } - return this._publicContext; - } - - createPublic() { - const self = this; - return { - get marker() { return self.marker; }, - set marker(value) { self.marker = value; }, - get definition() { return self.definition; }, - get glossaryLayoutMode() { return self.glossaryLayoutMode; }, - get compactTags() { return self.compactTags; }, - get group() { return self.group; }, - get merge() { return self.merge; }, - get modeTermKanji() { return self.modeTermKanji; }, - get modeTermKana() { return self.modeTermKana; }, - get modeKanji() { return self.modeKanji; }, - get compactGlossaries() { return self.compactGlossaries; }, - get uniqueExpressions() { return self.uniqueExpressions; }, - get uniqueReadings() { return self.uniqueReadings; }, - get pitches() { return self.pitches; }, - get pitchCount() { return self.pitchCount; }, - get context() { return self.context; } - }; - } - - // Private - - _asObject(value) { - return (typeof value === 'object' && value !== null ? value : {}); - } - - _getUniqueExpressions() { - const results = new Set(); - const definition = this._definition; - if (definition.type !== 'kanji') { - for (const {expression} of definition.expressions) { - results.add(expression); - } - } - return [...results]; - } - - _getUniqueReadings() { - const results = new Set(); - const definition = this._definition; - if (definition.type !== 'kanji') { - for (const {reading} of definition.expressions) { - results.add(reading); - } - } - return [...results]; - } - - _getPublicContext() { - let {documentTitle} = this._asObject(this._context); - if (typeof documentTitle !== 'string') { documentTitle = ''; } - - return { - document: { - title: documentTitle - } - }; - } - - _getCloze() { - const {sentence} = this._asObject(this._context); - let {text, offset} = this._asObject(sentence); - if (typeof text !== 'string') { text = ''; } - if (typeof offset !== 'number') { offset = 0; } - - const definition = this._definition; - const source = definition.type === 'kanji' ? definition.character : definition.rawSource; - - return { - sentence: text, - prefix: text.substring(0, offset), - body: text.substring(offset, offset + source.length), - suffix: text.substring(offset + source.length) - }; - } - - _getClozeCached() { - if (this._cloze === null) { - this._cloze = this._getCloze(); - } - return this._cloze; - } - - _prepareDefinition(definition, injectedMedia, context) { - const { - screenshotFileName=null, - clipboardImageFileName=null, - clipboardText=null, - audioFileName=null - } = this._asObject(injectedMedia); - - let {url} = this._asObject(context); - if (typeof url !== 'string') { url = ''; } - - definition.screenshotFileName = screenshotFileName; - definition.clipboardImageFileName = clipboardImageFileName; - definition.clipboardText = clipboardText; - definition.audioFileName = audioFileName; - definition.url = url; - Object.defineProperty(definition, 'cloze', { - configurable: true, - enumerable: true, - get: this._getClozeCached.bind(this) - }); - - for (const definition2 of this._getAllDefinitions(definition)) { - if (definition2.type === 'term') { - this._defineFuriganaSegments(definition2); - } - if (definition2.type === 'kanji') { continue; } - for (const expression of definition2.expressions) { - this._defineFuriganaSegments(expression); - this._defineTermFrequency(expression); - } - } - } - - _defineFuriganaSegments(object) { - Object.defineProperty(object, 'furiganaSegments', { - configurable: true, - enumerable: true, - get: this._getFuriganaSegments.bind(this, object) - }); - } - - _defineTermFrequency(object) { - Object.defineProperty(object, 'termFrequency', { - configurable: true, - enumerable: true, - get: this._getTermFrequency.bind(this, object) - }); - } - - _getFuriganaSegments(object) { - if (this._furiganaSegmentsCache !== null) { - const cachedResult = this._furiganaSegmentsCache.get(object); - if (typeof cachedResult !== 'undefined') { return cachedResult; } - } else { - this._furiganaSegmentsCache = new Map(); - } - - const {expression, reading} = object; - const result = this._japaneseUtil.distributeFurigana(expression, reading); - this._furiganaSegmentsCache.set(object, result); - return result; - } - - _getTermFrequency(object) { - const {termTags} = object; - return DictionaryDataUtil.getTermFrequency(termTags); - } - - _getAllDefinitions(definition) { - const definitions = [definition]; - for (let i = 0; i < definitions.length; ++i) { - const childDefinitions = definitions[i].definitions; - if (Array.isArray(childDefinitions)) { - definitions.push(...childDefinitions); - } - } - return definitions; - } -} diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js index 3977815b..be5d5e66 100644 --- a/ext/js/display/display-generator.js +++ b/ext/js/display/display-generator.js @@ -60,23 +60,20 @@ class DisplayGenerator { const definitionsContainer = node.querySelector('.definition-list'); const termTagsContainer = node.querySelector('.expression-list-tag-list'); - const {expressions, type, reasons, frequencies} = details; - const definitions = (type === 'term' ? [details] : details.definitions); - const merged = (type === 'termMerged' || type === 'termMergedByGlossary'); + const {headwords: expressions, type, inflections: reasons, definitions, frequencies, pronunciations} = details; const pitches = DictionaryDataUtil.getPitchAccentInfos(details); const pitchCount = pitches.reduce((i, v) => i + v.pitches.length, 0); - const groupedFrequencies = DictionaryDataUtil.groupTermFrequencies(frequencies); + const groupedFrequencies = DictionaryDataUtil.groupTermFrequencies(details); const termTags = DictionaryDataUtil.groupTermTags(details); const uniqueExpressions = new Set(); const uniqueReadings = new Set(); - for (const {expression, reading} of expressions) { + for (const {term: expression, reading} of expressions) { uniqueExpressions.add(expression); uniqueReadings.add(reading); } node.dataset.format = type; - node.dataset.expressionMulti = `${merged}`; node.dataset.expressionCount = `${expressions.length}`; node.dataset.definitionCount = `${definitions.length}`; node.dataset.pitchAccentDictionaryCount = `${pitches.length}`; @@ -86,7 +83,13 @@ class DisplayGenerator { node.dataset.frequencyCount = `${frequencies.length}`; node.dataset.groupedFrequencyCount = `${groupedFrequencies.length}`; - this._appendMultiple(expressionsContainer, this._createTermExpression.bind(this), expressions); + for (let i = 0, ii = expressions.length; i < ii; ++i) { + const node2 = this._createTermExpression(expressions[i], i, pronunciations); + node2.dataset.index = `${i}`; + expressionsContainer.appendChild(node2); + } + expressionsContainer.dataset.count = `${expressions.length}`; + this._appendMultiple(reasonsContainer, this._createTermReason.bind(this), reasons); this._appendMultiple(frequencyGroupListContainer, this._createFrequencyGroup.bind(this), groupedFrequencies, false); this._appendMultiple(pitchesContainer, this._createPitches.bind(this), pitches); @@ -114,7 +117,7 @@ class DisplayGenerator { dictionaryTag.name = dictionary; } - const node2 = this._createTermDefinitionItem(definition, dictionaryTag); + const node2 = this._createTermDefinitionItem(definition, dictionaryTag, expressions, uniqueExpressions, uniqueReadings); node2.dataset.index = `${i}`; definitionsContainer.appendChild(node2); } @@ -144,7 +147,7 @@ class DisplayGenerator { this._appendMultiple(frequencyGroupListContainer, this._createFrequencyGroup.bind(this), groupedFrequencies, true); this._appendMultiple(tagContainer, this._createTag.bind(this), [...details.tags, dictionaryTag]); - this._appendMultiple(glossaryContainer, this._createKanjiGlossaryItem.bind(this), details.glossary); + this._appendMultiple(glossaryContainer, this._createKanjiGlossaryItem.bind(this), details.definitions); this._appendMultiple(chineseReadingsContainer, this._createKanjiReading.bind(this), details.onyomi); this._appendMultiple(japaneseReadingsContainer, this._createKanjiReading.bind(this), details.kunyomi); @@ -229,8 +232,8 @@ class DisplayGenerator { // Private - _createTermExpression(details) { - const {expression, reading, termTags, pitches} = details; + _createTermExpression(headword, headwordIndex, pronunciations) { + const {term: expression, reading, tags: termTags} = headword; const searchQueries = []; if (expression) { searchQueries.push(expression); } @@ -244,7 +247,7 @@ class DisplayGenerator { node.dataset.readingIsSame = `${reading === expression}`; node.dataset.frequency = DictionaryDataUtil.getTermFrequency(termTags); - const pitchAccentCategories = this._getPitchAccentCategories(pitches); + const pitchAccentCategories = this._getPitchAccentCategories(reading, pronunciations, headwordIndex); if (pitchAccentCategories !== null) { node.dataset.pitchAccentCategories = pitchAccentCategories; } @@ -266,19 +269,21 @@ class DisplayGenerator { return fragment; } - _createTermDefinitionItem(details, dictionaryTag) { + _createTermDefinitionItem(details, dictionaryTag, headwords, uniqueTerms, uniqueReadings) { + const {dictionary, tags, headwordIndices, entries} = details; + const disambiguations = DictionaryDataUtil.getDisambiguations(headwords, headwordIndices, uniqueTerms, uniqueReadings); + const node = this._templates.instantiate('definition-item'); const tagListContainer = node.querySelector('.definition-tag-list'); const onlyListContainer = node.querySelector('.definition-disambiguation-list'); const glossaryContainer = node.querySelector('.glossary-list'); - const {dictionary, definitionTags} = details; node.dataset.dictionary = dictionary; - this._appendMultiple(tagListContainer, this._createTag.bind(this), [...definitionTags, dictionaryTag]); - this._appendMultiple(onlyListContainer, this._createTermDisambiguation.bind(this), details.only); - this._appendMultiple(glossaryContainer, this._createTermGlossaryItem.bind(this), details.glossary, dictionary); + this._appendMultiple(tagListContainer, this._createTag.bind(this), [...tags, dictionaryTag]); + this._appendMultiple(onlyListContainer, this._createTermDisambiguation.bind(this), disambiguations); + this._appendMultiple(glossaryContainer, this._createTermGlossaryItem.bind(this), entries, dictionary); return node; } @@ -406,11 +411,12 @@ class DisplayGenerator { } _createKanjiInfoTableItem(details) { + const {content, name, value} = details; const node = this._templates.instantiate('kanji-info-table-item'); const nameNode = node.querySelector('.kanji-info-table-item-header'); const valueNode = node.querySelector('.kanji-info-table-item-value'); - this._setTextContent(nameNode, details.notes || details.name); - this._setTextContent(valueNode, details.value); + this._setTextContent(nameNode, content.length > 0 ? content : name); + this._setTextContent(valueNode, value); return node; } @@ -419,37 +425,46 @@ class DisplayGenerator { } _createTag(details) { - const {notes, name, category, redundant} = details; + const {content, name, category, redundant} = details; const node = this._templates.instantiate('tag'); const inner = node.querySelector('.tag-label-content'); - node.title = notes; + const contentString = content.join('\n'); + + node.title = contentString; this._setTextContent(inner, name); - node.dataset.details = notes || name; + node.dataset.details = contentString.length > 0 ? contentString : name; node.dataset.category = category; if (redundant) { node.dataset.redundant = 'true'; } return node; } - _createTermTag(details, totalExpressionCount) { - const {tag, expressions} = details; + _createTermTag(details, totalHeadwordCount) { + const {tag, headwordIndices} = details; const node = this._createTag(tag); - node.dataset.disambiguation = `${JSON.stringify(expressions)}`; - node.dataset.totalExpressionCount = `${totalExpressionCount}`; - node.dataset.matchedExpressionCount = `${expressions.length}`; - node.dataset.unmatchedExpressionCount = `${Math.max(0, totalExpressionCount - expressions.length)}`; + node.dataset.headwords = headwordIndices.join(' '); + node.dataset.totalExpressionCount = `${totalHeadwordCount}`; + node.dataset.matchedExpressionCount = `${headwordIndices.length}`; + node.dataset.unmatchedExpressionCount = `${Math.max(0, totalHeadwordCount - headwordIndices.length)}`; return node; } - _createSearchTag(text) { - return this._createTag({ - notes: '', - name: text, - category: 'search', + _createTagData(name, category) { + return { + name, + category, + order: 0, + score: 0, + content: [], + dictionaries: [], redundant: false - }); + }; + } + + _createSearchTag(text) { + return this._createTag(this._createTagData(text, 'search')); } _createPitches(details) { @@ -462,7 +477,7 @@ class DisplayGenerator { node.dataset.pitchesMulti = 'true'; node.dataset.pitchesCount = `${pitches.length}`; - const tag = this._createTag({notes: '', name: dictionary, category: 'pitch-accent-dictionary'}); + const tag = this._createTag(this._createTagData(dictionary, 'pitch-accent-dictionary')); node.querySelector('.pitch-accent-group-tag-list').appendChild(tag); let hasTags = false; @@ -482,7 +497,7 @@ class DisplayGenerator { _createPitch(details) { const jp = this._japaneseUtil; - const {reading, position, tags, exclusiveExpressions, exclusiveReadings} = details; + const {reading, position, tags, exclusiveTerms, exclusiveReadings} = details; const morae = jp.getKanaMorae(reading); const node = this._templates.instantiate('pitch-accent'); @@ -497,7 +512,7 @@ class DisplayGenerator { this._appendMultiple(n, this._createTag.bind(this), tags); n = node.querySelector('.pitch-accent-disambiguation-list'); - this._createPitchAccentDisambiguations(n, exclusiveExpressions, exclusiveReadings); + this._createPitchAccentDisambiguations(n, exclusiveTerms, exclusiveReadings); n = node.querySelector('.pitch-accent-characters'); for (let i = 0, ii = morae.length; i < ii; ++i) { @@ -523,9 +538,9 @@ class DisplayGenerator { return node; } - _createPitchAccentDisambiguations(container, exclusiveExpressions, exclusiveReadings) { + _createPitchAccentDisambiguations(container, exclusiveTerms, exclusiveReadings) { const templateName = 'pitch-accent-disambiguation'; - for (const exclusiveExpression of exclusiveExpressions) { + for (const exclusiveExpression of exclusiveTerms) { const node = this._templates.instantiate(templateName); node.dataset.type = 'expression'; this._setTextContent(node, exclusiveExpression, 'ja'); @@ -539,8 +554,8 @@ class DisplayGenerator { container.appendChild(node); } - container.dataset.count = `${exclusiveExpressions.length + exclusiveReadings.length}`; - container.dataset.expressionCount = `${exclusiveExpressions.length}`; + container.dataset.count = `${exclusiveTerms.length + exclusiveReadings.length}`; + container.dataset.expressionCount = `${exclusiveTerms.length}`; container.dataset.readingCount = `${exclusiveReadings.length}`; } @@ -586,7 +601,7 @@ class DisplayGenerator { } _createFrequencyGroup(details, kanji) { - const {dictionary, frequencyData} = details; + const {dictionary, frequencies} = details; const node = this._templates.instantiate('frequency-group-item'); const body = node.querySelector('.tag-body-content'); @@ -594,36 +609,37 @@ class DisplayGenerator { this._setTextContent(node.querySelector('.tag-label-content'), dictionary); node.dataset.details = dictionary; - for (let i = 0, ii = frequencyData.length; i < ii; ++i) { - const item = frequencyData[i]; + const ii = frequencies.length; + for (let i = 0; i < ii; ++i) { + const item = frequencies[i]; const itemNode = (kanji ? this._createKanjiFrequency(item, dictionary) : this._createTermFrequency(item, dictionary)); itemNode.dataset.index = `${i}`; body.appendChild(itemNode); } - body.dataset.count = `${frequencyData.length}`; - node.dataset.count = `${frequencyData.length}`; + body.dataset.count = `${ii}`; + node.dataset.count = `${ii}`; node.dataset.details = dictionary; return node; } _createTermFrequency(details, dictionary) { - const {expression, reading, frequencies} = details; + const {term, reading, values} = details; const node = this._templates.instantiate('term-frequency-item'); this._setTextContent(node.querySelector('.tag-label-content'), dictionary); - const frequency = frequencies.join(', '); + const frequency = values.join(', '); - this._setTextContent(node.querySelector('.frequency-disambiguation-expression'), expression, 'ja'); + this._setTextContent(node.querySelector('.frequency-disambiguation-expression'), term, 'ja'); this._setTextContent(node.querySelector('.frequency-disambiguation-reading'), (reading !== null ? reading : ''), 'ja'); this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja'); - node.dataset.expression = expression; + node.dataset.expression = term; node.dataset.reading = reading; node.dataset.hasReading = `${reading !== null}`; - node.dataset.readingIsSame = `${reading === expression}`; + node.dataset.readingIsSame = `${reading === term}`; node.dataset.dictionary = dictionary; node.dataset.frequency = `${frequency}`; node.dataset.details = dictionary; @@ -632,10 +648,10 @@ class DisplayGenerator { } _createKanjiFrequency(details, dictionary) { - const {character, frequencies} = details; + const {character, values} = details; const node = this._templates.instantiate('kanji-frequency-item'); - const frequency = frequencies.join(', '); + const frequency = values.join(', '); this._setTextContent(node.querySelector('.tag-label-content'), dictionary); this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja'); @@ -707,15 +723,7 @@ class DisplayGenerator { } _createDictionaryTag(dictionary) { - return { - name: dictionary, - category: 'dictionary', - notes: '', - order: 100, - score: 0, - dictionary, - redundant: false - }; + return this._createTagData(dictionary, 'dictionary'); } _setTextContent(node, value, language) { @@ -751,11 +759,12 @@ class DisplayGenerator { } } - _getPitchAccentCategories(pitches) { - if (pitches.length === 0) { return null; } + _getPitchAccentCategories(reading, pronunciations, headwordIndex) { + if (pronunciations.length === 0) { return null; } const categories = new Set(); - for (const {reading, pitches: pitches2} of pitches) { - for (const {position} of pitches2) { + for (const pronunciation of pronunciations) { + if (pronunciation.headwordIndex !== headwordIndex) { continue; } + for (const {position} of pronunciation.pitches) { const category = this._japaneseUtil.getPitchCategory(reading, position, false); if (category !== null) { categories.add(category); diff --git a/ext/js/language/dictionary-data-util.js b/ext/js/language/dictionary-data-util.js index dff9d212..f44b81c5 100644 --- a/ext/js/language/dictionary-data-util.js +++ b/ext/js/language/dictionary-data-util.js @@ -16,40 +16,41 @@ */ class DictionaryDataUtil { - static groupTermTags(definition) { - const {expressions} = definition; - const expressionsLength = expressions.length; - const uniqueCheck = (expressionsLength > 1); - const resultsMap = new Map(); + static groupTermTags(dictionaryEntry) { + const {headwords} = dictionaryEntry; + const headwordCount = headwords.length; + const uniqueCheck = (headwordCount > 1); + const resultsIndexMap = new Map(); const results = []; - for (let i = 0; i < expressionsLength; ++i) { - const {termTags, expression, reading} = expressions[i]; - for (const tag of termTags) { + for (let i = 0; i < headwordCount; ++i) { + const {tags} = headwords[i]; + for (const tag of tags) { if (uniqueCheck) { const {name, category, notes, dictionary} = tag; const key = this._createMapKey([name, category, notes, dictionary]); - const index = resultsMap.get(key); + const index = resultsIndexMap.get(key); if (typeof index !== 'undefined') { const existingItem = results[index]; - existingItem.expressions.push({index: i, expression, reading}); + existingItem.headwordIndices.push(i); continue; } - resultsMap.set(key, results.length); + resultsIndexMap.set(key, results.length); } - const item = { - tag, - expressions: [{index: i, expression, reading}] - }; + const item = {tag, headwordIndices: [i]}; results.push(item); } } return results; } - static groupTermFrequencies(frequencies) { + static groupTermFrequencies(dictionaryEntry) { + const {headwords, frequencies} = dictionaryEntry; + const map1 = new Map(); - for (const {dictionary, expression, reading, hasReading, frequency} of frequencies) { + for (const {headwordIndex, dictionary, hasReading, frequency} of frequencies) { + const {term, reading} = headwords[headwordIndex]; + let map2 = map1.get(dictionary); if (typeof map2 === 'undefined') { map2 = new Map(); @@ -57,14 +58,14 @@ class DictionaryDataUtil { } const readingKey = hasReading ? reading : null; - const key = this._createMapKey([expression, readingKey]); + const key = this._createMapKey([term, readingKey]); let frequencyData = map2.get(key); if (typeof frequencyData === 'undefined') { - frequencyData = {expression, reading: readingKey, frequencies: new Set()}; + frequencyData = {term, reading: readingKey, values: new Set()}; map2.set(key, frequencyData); } - frequencyData.frequencies.add(frequency); + frequencyData.values.add(frequency); } return this._createFrequencyGroupsFromMap(map1); } @@ -80,64 +81,66 @@ class DictionaryDataUtil { let frequencyData = map2.get(character); if (typeof frequencyData === 'undefined') { - frequencyData = {character, frequencies: new Set()}; + frequencyData = {character, values: new Set()}; map2.set(character, frequencyData); } - frequencyData.frequencies.add(frequency); + frequencyData.values.add(frequency); } return this._createFrequencyGroupsFromMap(map1); } - static getPitchAccentInfos(definition) { - if (definition.type === 'kanji') { return []; } + static getPitchAccentInfos(dictionaryEntry) { + const {headwords, pronunciations} = dictionaryEntry; - const results = new Map(); const allExpressions = new Set(); const allReadings = new Set(); - - for (const {expression, reading, pitches: expressionPitches} of definition.expressions) { - allExpressions.add(expression); + for (const {term, reading} of headwords) { + allExpressions.add(term); allReadings.add(reading); + } - for (const {pitches, dictionary} of expressionPitches) { - let dictionaryResults = results.get(dictionary); - if (typeof dictionaryResults === 'undefined') { - dictionaryResults = []; - results.set(dictionary, dictionaryResults); - } - - for (const {position, tags} of pitches) { - let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryResults); - if (pitchAccentInfo === null) { - pitchAccentInfo = {expressions: new Set(), reading, position, tags}; - dictionaryResults.push(pitchAccentInfo); - } - pitchAccentInfo.expressions.add(expression); + const pitchAccentInfoMap = new Map(); + for (const {headwordIndex, dictionary, pitches} of pronunciations) { + const {term, reading} = headwords[headwordIndex]; + let dictionaryPitchAccentInfoList = pitchAccentInfoMap.get(dictionary); + if (typeof dictionaryPitchAccentInfoList === 'undefined') { + dictionaryPitchAccentInfoList = []; + pitchAccentInfoMap.set(dictionary, dictionaryPitchAccentInfoList); + } + for (const {position, tags} of pitches) { + let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryPitchAccentInfoList); + if (pitchAccentInfo === null) { + pitchAccentInfo = { + terms: new Set(), + reading, + position, + tags, + exclusiveTerms: [], + exclusiveReadings: [] + }; + dictionaryPitchAccentInfoList.push(pitchAccentInfo); } + pitchAccentInfo.terms.add(term); } } const multipleReadings = (allReadings.size > 1); - for (const dictionaryResults of results.values()) { - for (const result of dictionaryResults) { - const exclusiveExpressions = []; - const exclusiveReadings = []; - const resultExpressions = result.expressions; - if (!this._areSetsEqual(resultExpressions, allExpressions)) { - exclusiveExpressions.push(...this._getSetIntersection(resultExpressions, allExpressions)); + for (const dictionaryPitchAccentInfoList of pitchAccentInfoMap.values()) { + for (const pitchAccentInfo of dictionaryPitchAccentInfoList) { + const {terms, reading, exclusiveTerms, exclusiveReadings} = pitchAccentInfo; + if (!this._areSetsEqual(terms, allExpressions)) { + exclusiveTerms.push(...this._getSetIntersection(terms, allExpressions)); } if (multipleReadings) { - exclusiveReadings.push(result.reading); + exclusiveReadings.push(reading); } - result.expressions = [...resultExpressions]; - result.exclusiveExpressions = exclusiveExpressions; - result.exclusiveReadings = exclusiveReadings; + pitchAccentInfo.terms = [...terms]; } } const results2 = []; - for (const [dictionary, pitches] of results.entries()) { + for (const [dictionary, pitches] of pitchAccentInfoMap.entries()) { results2.push({dictionary, pitches}); } return results2; @@ -157,17 +160,34 @@ class DictionaryDataUtil { } } + static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) { + if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; } + + const terms = new Set(); + const readings = new Set(); + for (const headwordIndex of headwordIndices) { + const {term, reading} = headwords[headwordIndex]; + terms.add(term); + readings.add(reading); + } + + const disambiguations = []; + if (!this._areSetsEqual(terms, allTermsSet)) { disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); } + if (!this._areSetsEqual(readings, allReadingsSet)) { disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); } + return disambiguations; + } + // Private static _createFrequencyGroupsFromMap(map) { const results = []; for (const [dictionary, map2] of map.entries()) { - const frequencyDataArray = []; + const frequencies = []; for (const frequencyData of map2.values()) { - frequencyData.frequencies = [...frequencyData.frequencies]; - frequencyDataArray.push(frequencyData); + frequencyData.values = [...frequencyData.values]; + frequencies.push(frequencyData); } - results.push({dictionary, frequencyData: frequencyDataArray}); + results.push({dictionary, frequencies}); } return results; } diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 151b1172..934c8e4a 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -22,7 +22,7 @@ */ /** - * Class which finds term and kanji definitions for text. + * Class which finds term and kanji dictionary entries for text. */ class Translator { /** @@ -59,6 +59,7 @@ class Translator { * One of: 'group', 'merge', 'split', 'simple' * @param text The text to find terms for. * @param options An object using the following structure: + * ``` * { * wildcard: (enum: null, 'prefix', 'suffix'), * mainDictionary: (string), @@ -85,22 +86,35 @@ class Translator { * } * ]) * } - * @returns An array of [definitions, textLength]. The structure of each definition depends on the - * mode parameter, see the _create?TermDefinition?() functions for structure details. + * ``` + * @returns An object of the structure `{dictionaryEntries, originalTextLength}`. */ async findTerms(mode, text, options) { + const {enabledDictionaryMap} = options; + let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options); + switch (mode) { case 'group': - return await this._findTermsGrouped(text, options); + dictionaryEntries = this._groupDictionaryEntriesByHeadword(dictionaryEntries); + break; case 'merge': - return await this._findTermsMerged(text, options); - case 'split': - return await this._findTermsSplit(text, options); - case 'simple': - return await this._findTermsSimple(text, options); - default: - return [[], 0]; + dictionaryEntries = await this._getRelatedDictionaryEntries(dictionaryEntries, options.mainDictionary, enabledDictionaryMap); + break; } + + if (dictionaryEntries.length > 1) { + this._sortTermDictionaryEntries(dictionaryEntries); + } + + if (mode === 'simple') { + this._clearTermTags(dictionaryEntries); + } else { + await this._addTermMeta(dictionaryEntries, enabledDictionaryMap); + await this._expandTermTags(dictionaryEntries); + this._sortTermDictionaryEntryData(dictionaryEntries); + } + + return {dictionaryEntries, originalTextLength}; } /** @@ -127,93 +141,28 @@ class Translator { kanjiUnique.add(c); } - const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap); - if (databaseDefinitions.length === 0) { return []; } - - this._sortDatabaseDefinitionsByIndex(databaseDefinitions); - - const definitions = []; - for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) { - const expandedStats = await this._expandStats(stats, dictionary); - const expandedTags = await this._expandTags(tags, dictionary); - this._sortTags(expandedTags); - - const definition = this._createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, expandedTags, expandedStats); - definitions.push(definition); - } - - await this._buildKanjiMeta(definitions, enabledDictionaryMap); - - return definitions; - } - - // Find terms core functions - - async _findTermsSimple(text, options) { - const {enabledDictionaryMap} = options; - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); - this._sortDefinitions(definitions); - return [definitions, length]; - } - - async _findTermsSplit(text, options) { - const {enabledDictionaryMap} = options; - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); - await this._buildTermMeta(definitions, enabledDictionaryMap); - this._sortDefinitions(definitions); - return [definitions, length]; - } - - async _findTermsGrouped(text, options) { - const {enabledDictionaryMap} = options; - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); - - const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap); - await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap); - this._sortDefinitions(groupedDefinitions); + const databaseEntries = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap); + if (databaseEntries.length === 0) { return []; } - for (const definition of groupedDefinitions) { - this._flagRedundantDefinitionTags(definition.definitions); - } + this._sortDatabaseEntriesByIndex(databaseEntries); - return [groupedDefinitions, length]; - } + const dictionaryEntries = []; + for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseEntries) { + const expandedStats = await this._expandKanjiStats(stats, dictionary); - async _findTermsMerged(text, options) { - const {mainDictionary, enabledDictionaryMap} = options; - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); - const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap); - const definitionsMerged = []; + const tagGroups = []; + if (tags.length > 0) { tagGroups.push(this._createTagGroup(dictionary, tags)); } - for (const {relatedDefinitions, secondaryDefinitions} of sequencedDefinitions) { - const mergedDefinition = this._getMergedDefinition(relatedDefinitions, secondaryDefinitions); - definitionsMerged.push(mergedDefinition); + const dictionaryEntry = this._createKanjiDictionaryEntry(character, dictionary, onyomi, kunyomi, tagGroups, expandedStats, glossary); + dictionaryEntries.push(dictionaryEntry); } - for (const groupedDefinition of this._groupTerms(unsequencedDefinitions, enabledDictionaryMap)) { - const {reasons, score, expression, reading, source, rawSource, definitions: definitions2} = groupedDefinition; - const termDetailsList = this._createTermDetailsList(definitions2); - const compatibilityDefinition = this._createMergedTermDefinition( - source, - rawSource, - this._convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions2), - [expression], - [reading], - termDetailsList, - reasons, - score - ); - definitionsMerged.push(compatibilityDefinition); - } - - await this._buildTermMeta(definitionsMerged, enabledDictionaryMap); - this._sortDefinitions(definitionsMerged); + await this._addKanjiMeta(dictionaryEntries, enabledDictionaryMap); + await this._expandKanjiTags(dictionaryEntries); - for (const definition of definitionsMerged) { - this._flagRedundantDefinitionTags(definition.definitions); - } + this._sortKanjiDictionaryEntryData(dictionaryEntries); - return [definitionsMerged, length]; + return dictionaryEntries; } // Find terms internal implementation @@ -225,33 +174,33 @@ class Translator { return [[], 0]; } - const deinflections = ( + const deinflections = await ( wildcard ? - await this._findTermWildcard(text, enabledDictionaryMap, wildcard) : - await this._findTermDeinflections(text, enabledDictionaryMap, options) + this._findTermsWildcard(text, enabledDictionaryMap, wildcard) : + this._findTermDeinflections(text, enabledDictionaryMap, options) ); - let maxLength = 0; - const definitions = []; - const definitionIds = new Set(); - for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) { - if (databaseDefinitions.length === 0) { continue; } - maxLength = Math.max(maxLength, rawSource.length); - for (const databaseDefinition of databaseDefinitions) { - const {id} = databaseDefinition; - if (definitionIds.has(id)) { continue; } - const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, true, enabledDictionaryMap); - definitions.push(definition); - definitionIds.add(id); + let originalTextLength = 0; + const dictionaryEntries = []; + const ids = new Set(); + for (const {databaseEntries, originalText, transformedText, deinflectedText, reasons} of deinflections) { + if (databaseEntries.length === 0) { continue; } + originalTextLength = Math.max(originalTextLength, originalText.length); + for (const databaseEntry of databaseEntries) { + const {id} = databaseEntry; + if (ids.has(id)) { continue; } + const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, reasons, true, enabledDictionaryMap); + dictionaryEntries.push(dictionaryEntry); + ids.add(id); } } - return [definitions, maxLength]; + return {dictionaryEntries, originalTextLength}; } - async _findTermWildcard(text, enabledDictionaryMap, wildcard) { - const databaseDefinitions = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard); - return databaseDefinitions.length > 0 ? [this._createDeinflection(text, text, text, 0, [], databaseDefinitions)] : []; + async _findTermsWildcard(text, enabledDictionaryMap, wildcard) { + const databaseEntries = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard); + return databaseEntries.length > 0 ? [this._createDeinflection(text, text, text, 0, [], databaseEntries)] : []; } async _findTermDeinflections(text, enabledDictionaryMap, options) { @@ -265,7 +214,7 @@ class Translator { const uniqueDeinflectionArrays = []; const uniqueDeinflectionsMap = new Map(); for (const deinflection of deinflections) { - const term = deinflection.term; + const term = deinflection.deinflectedText; let deinflectionArray = uniqueDeinflectionsMap.get(term); if (typeof deinflectionArray === 'undefined') { deinflectionArray = []; @@ -276,14 +225,14 @@ class Translator { deinflectionArray.push(deinflection); } - const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null); + const databaseEntries = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null); - for (const databaseDefinition of databaseDefinitions) { - const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules); - for (const deinflection of uniqueDeinflectionArrays[databaseDefinition.index]) { + for (const databaseEntry of databaseEntries) { + const definitionRules = Deinflector.rulesToRuleFlags(databaseEntry.rules); + for (const deinflection of uniqueDeinflectionArrays[databaseEntry.index]) { const deinflectionRules = deinflection.rules; if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) { - deinflection.databaseDefinitions.push(databaseDefinition); + deinflection.databaseEntries.push(databaseEntry); } } } @@ -291,6 +240,8 @@ class Translator { return deinflections; } + // Deinflections and text transformations + _getAllDeinflections(text, options) { const textOptionVariantArray = [ this._getTextReplacementsVariants(options), @@ -336,120 +287,159 @@ class Translator { used.add(source); const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); for (const {term, rules, reasons} of this._deinflector.deinflect(source)) { - deinflections.push(this._createDeinflection(source, rawSource, term, rules, reasons, [])); + deinflections.push(this._createDeinflection(rawSource, source, term, rules, reasons, [])); } } } return deinflections; } - _createDeinflection(source, rawSource, term, rules, reasons, databaseDefinitions) { - return {source, rawSource, term, rules, reasons, databaseDefinitions}; + _applyTextReplacements(text, sourceMap, replacements) { + for (const {pattern, replacement} of replacements) { + text = RegexUtil.applyTextReplacement(text, sourceMap, pattern, replacement); + } + return text; + } + + _getSearchableText(text, allowAlphanumericCharacters) { + if (allowAlphanumericCharacters) { return text; } + const jp = this._japaneseUtil; + let length = 0; + for (const c of text) { + if (!jp.isCodePointJapanese(c.codePointAt(0))) { break; } + length += c.length; + } + return length >= text.length ? text : text.substring(0, length); } - /** - * @param definitions An array of 'term' definitions. - * @param mainDictionary The name of the main dictionary. - * @param enabledDictionaryMap The map of enabled dictionaries and their settings. - */ - async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) { - const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap); + _getTextOptionEntryVariants(value) { + switch (value) { + case 'true': return [true]; + case 'variant': return [false, true]; + default: return [false]; + } + } + + _getCollapseEmphaticOptions(options) { + const collapseEmphaticOptions = [[false, false]]; + switch (options.collapseEmphaticSequences) { + case 'true': + collapseEmphaticOptions.push([true, false]); + break; + case 'full': + collapseEmphaticOptions.push([true, false], [true, true]); + break; + } + return collapseEmphaticOptions; + } + + _getTextReplacementsVariants(options) { + return options.textReplacements; + } + + _createDeinflection(originalText, transformedText, deinflectedText, rules, reasons, databaseEntries) { + return {originalText, transformedText, deinflectedText, rules, reasons, databaseEntries}; + } + + // Term dictionary entry grouping + + async _getRelatedDictionaryEntries(dictionaryEntries, mainDictionary, enabledDictionaryMap) { const sequenceList = []; - const sequencedDefinitionMap = new Map(); - const sequencedDefinitions = []; - const unsequencedDefinitions = new Map(); - for (const definition of definitions) { - const {sequence, dictionary, id} = definition; + const groupedDictionaryEntries = []; + const groupedDictionaryEntriesMap = new Map(); + const ungroupedDictionaryEntriesMap = new Map(); + for (const dictionaryEntry of dictionaryEntries) { + const {id, sequence, definitions: [{dictionary}]} = dictionaryEntry; if (mainDictionary === dictionary && sequence >= 0) { - let sequencedDefinition = sequencedDefinitionMap.get(sequence); - if (typeof sequencedDefinition === 'undefined') { - sequencedDefinition = { - relatedDefinitions: [], - definitionIds: new Set(), - secondaryDefinitions: [] - }; - sequencedDefinitionMap.set(sequence, sequencedDefinition); - sequencedDefinitions.push(sequencedDefinition); - sequenceList.push(sequence); + let group = groupedDictionaryEntriesMap.get(sequence); + if (typeof group === 'undefined') { + group = {ids: new Set(), dictionaryEntries: []}; + sequenceList.push({query: sequence, dictionary}); + groupedDictionaryEntries.push(group); + groupedDictionaryEntriesMap.set(sequence, group); } - sequencedDefinition.relatedDefinitions.push(definition); - sequencedDefinition.definitionIds.add(id); + group.dictionaryEntries.push(dictionaryEntry); + group.ids.add(id); } else { - unsequencedDefinitions.set(id, definition); + ungroupedDictionaryEntriesMap.set(id, dictionaryEntry); } } if (sequenceList.length > 0) { - await this._addRelatedDefinitions(sequencedDefinitions, unsequencedDefinitions, sequenceList, mainDictionary, enabledDictionaryMap); - await this._addSecondaryDefinitions(sequencedDefinitions, unsequencedDefinitions, enabledDictionaryMap, secondarySearchDictionaryMap); + const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap); + await this._addRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, sequenceList, mainDictionary, enabledDictionaryMap); + for (const group of groupedDictionaryEntries) { + this._sortTermDictionaryEntriesById(group.dictionaryEntries); + } + if (ungroupedDictionaryEntriesMap.size !== 0 || secondarySearchDictionaryMap.size !== 0) { + await this._addSecondaryRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, enabledDictionaryMap, secondarySearchDictionaryMap); + } } - for (const {relatedDefinitions} of sequencedDefinitions) { - this._sortDefinitionsById(relatedDefinitions); + const newDictionaryEntries = []; + for (const group of groupedDictionaryEntries) { + newDictionaryEntries.push(this._createGroupedDictionaryEntry(group.dictionaryEntries, true)); } - - return {sequencedDefinitions, unsequencedDefinitions: [...unsequencedDefinitions.values()]}; - } - - async _addRelatedDefinitions(sequencedDefinitions, unsequencedDefinitions, sequenceList, mainDictionary, enabledDictionaryMap) { - const items = sequenceList.map((query) => ({query, dictionary: mainDictionary})); - const databaseDefinitions = await this._database.findTermsBySequenceBulk(items); - for (const databaseDefinition of databaseDefinitions) { - const {relatedDefinitions, definitionIds} = sequencedDefinitions[databaseDefinition.index]; - const {id} = databaseDefinition; - if (definitionIds.has(id)) { continue; } - - const {source, rawSource, sourceTerm} = relatedDefinitions[0]; - const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, [], false, enabledDictionaryMap); - relatedDefinitions.push(definition); - definitionIds.add(id); - unsequencedDefinitions.delete(id); + newDictionaryEntries.push(...this._groupDictionaryEntriesByHeadword(ungroupedDictionaryEntriesMap.values())); + return newDictionaryEntries; + } + + async _addRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, sequenceList, mainDictionary, enabledDictionaryMap) { + const databaseEntries = await this._database.findTermsBySequenceBulk(sequenceList); + for (const databaseEntry of databaseEntries) { + const {dictionaryEntries, ids} = groupedDictionaryEntries[databaseEntry.index]; + const {id} = databaseEntry; + if (ids.has(id)) { continue; } + + const sourceText = databaseEntry.expression; + const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, sourceText, sourceText, sourceText, [], false, enabledDictionaryMap); + dictionaryEntries.push(dictionaryEntry); + ids.add(id); + ungroupedDictionaryEntriesMap.delete(id); } } - async _addSecondaryDefinitions(sequencedDefinitions, unsequencedDefinitions, enabledDictionaryMap, secondarySearchDictionaryMap) { - if (unsequencedDefinitions.length === 0 && secondarySearchDictionaryMap.size === 0) { return; } - + async _addSecondaryRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, enabledDictionaryMap, secondarySearchDictionaryMap) { // Prepare grouping info const termList = []; const targetList = []; const targetMap = new Map(); - for (const sequencedDefinition of sequencedDefinitions) { - const {relatedDefinitions} = sequencedDefinition; - for (const definition of relatedDefinitions) { - const {expressions: [{expression, reading}]} = definition; - const key = this._createMapKey([expression, reading]); + for (const group of groupedDictionaryEntries) { + const {dictionaryEntries} = group; + for (const dictionaryEntry of dictionaryEntries) { + const {term, reading} = dictionaryEntry.headwords[0]; + const key = this._createMapKey([term, reading]); let target = targetMap.get(key); if (typeof target === 'undefined') { target = { - sequencedDefinitions: [], + groups: [], searchSecondary: false }; targetMap.set(key, target); } - target.sequencedDefinitions.push(sequencedDefinition); - if (!definition.isPrimary && !target.searchSecondary) { + target.groups.push(group); + if (!dictionaryEntry.isPrimary && !target.searchSecondary) { target.searchSecondary = true; - termList.push({expression, reading}); + termList.push({expression: term, reading}); targetList.push(target); } } } - // Group unsequenced definitions with sequenced definitions that have a matching [expression, reading]. - for (const [id, definition] of unsequencedDefinitions.entries()) { - const {expressions: [{expression, reading}]} = definition; - const key = this._createMapKey([expression, reading]); + // Group unsequenced dictionary entries with sequenced entries that have a matching [expression, reading]. + for (const [id, dictionaryEntry] of ungroupedDictionaryEntriesMap.entries()) { + const {term, reading} = dictionaryEntry.headwords[0]; + const key = this._createMapKey([term, reading]); const target = targetMap.get(key); if (typeof target === 'undefined') { continue; } - for (const {definitionIds, secondaryDefinitions} of target.sequencedDefinitions) { - if (definitionIds.has(id)) { continue; } + for (const {ids, dictionaryEntries} of target.groups) { + if (ids.has(id)) { continue; } - secondaryDefinitions.push(definition); - definitionIds.add(id); - unsequencedDefinitions.delete(id); + dictionaryEntries.push(dictionaryEntry); + ids.add(id); + ungroupedDictionaryEntriesMap.delete(id); break; } } @@ -457,102 +447,200 @@ class Translator { // Search database for additional secondary terms if (termList.length === 0 || secondarySearchDictionaryMap.size === 0) { return; } - const databaseDefinitions = await this._database.findTermsExactBulk(termList, secondarySearchDictionaryMap); - this._sortDatabaseDefinitionsByIndex(databaseDefinitions); + const databaseEntries = await this._database.findTermsExactBulk(termList, secondarySearchDictionaryMap); + this._sortDatabaseEntriesByIndex(databaseEntries); - for (const databaseDefinition of databaseDefinitions) { - const {index, id} = databaseDefinition; - const source = termList[index].expression; + for (const databaseEntry of databaseEntries) { + const {index, id} = databaseEntry; + const sourceText = termList[index].expression; const target = targetList[index]; - for (const {definitionIds, secondaryDefinitions} of target.sequencedDefinitions) { - if (definitionIds.has(id)) { continue; } + for (const {ids, dictionaryEntries} of target.groups) { + if (ids.has(id)) { continue; } - const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], false, enabledDictionaryMap); - secondaryDefinitions.push(definition); - definitionIds.add(id); - unsequencedDefinitions.delete(id); + const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, sourceText, sourceText, sourceText, [], false, enabledDictionaryMap); + dictionaryEntries.push(dictionaryEntry); + ids.add(id); + ungroupedDictionaryEntriesMap.delete(id); } } } - _getMergedDefinition(relatedDefinitions, secondaryDefinitions) { - const {reasons, source, rawSource} = relatedDefinitions[0]; - const allDefinitions = secondaryDefinitions.length > 0 ? [...relatedDefinitions, ...secondaryDefinitions] : relatedDefinitions; - const score = this._getMaxPrimaryDefinitionScore(allDefinitions); + _groupDictionaryEntriesByHeadword(dictionaryEntries) { + const groups = new Map(); + for (const dictionaryEntry of dictionaryEntries) { + const {inflections, headwords: [{term, reading}]} = dictionaryEntry; + const key = this._createMapKey([term, reading, ...inflections]); + let dictionaryEntries2 = groups.get(key); + if (typeof dictionaryEntries2 === 'undefined') { + dictionaryEntries2 = []; + groups.set(key, dictionaryEntries2); + } + dictionaryEntries2.push(dictionaryEntry); + } - // Merge by glossary - const allExpressions = new Set(); - const allReadings = new Set(); - const glossaryDefinitionGroupMap = new Map(); - for (const definition of allDefinitions) { - const {dictionary, glossary, expressions: [{expression, reading}]} = definition; + const results = []; + for (const dictionaryEntries2 of groups.values()) { + const dictionaryEntry = this._createGroupedDictionaryEntry(dictionaryEntries2, false); + results.push(dictionaryEntry); + } + return results; + } - const key = this._createMapKey([dictionary, ...glossary]); - let group = glossaryDefinitionGroupMap.get(key); - if (typeof group === 'undefined') { - group = { - expressions: new Set(), - readings: new Set(), - definitions: [] - }; - glossaryDefinitionGroupMap.set(key, group); + // Tags + + _getTermTagTargets(dictionaryEntries) { + const tagTargets = []; + for (const {headwords, definitions, pronunciations} of dictionaryEntries) { + this._addTagExpansionTargets(tagTargets, headwords); + this._addTagExpansionTargets(tagTargets, definitions); + for (const {pitches} of pronunciations) { + this._addTagExpansionTargets(tagTargets, pitches); } + } + return tagTargets; + } + + _clearTermTags(dictionaryEntries) { + this._getTermTagTargets(dictionaryEntries); + } + + async _expandTermTags(dictionaryEntries) { + const tagTargets = this._getTermTagTargets(dictionaryEntries); + await this._expandTagGroups(tagTargets); + this._groupTags(tagTargets); + } - allExpressions.add(expression); - allReadings.add(reading); - group.expressions.add(expression); - group.readings.add(reading); - group.definitions.push(definition); + async _expandKanjiTags(dictionaryEntries) { + const tagTargets = []; + this._addTagExpansionTargets(tagTargets, dictionaryEntries); + await this._expandTagGroups(tagTargets); + this._groupTags(tagTargets); + } + + async _expandTagGroups(tagTargets) { + const allItems = []; + const targetMap = new Map(); + for (const {tagGroups, tags} of tagTargets) { + for (const {dictionary, tagNames} of tagGroups) { + let dictionaryItems = targetMap.get(dictionary); + if (typeof dictionaryItems === 'undefined') { + dictionaryItems = new Map(); + targetMap.set(dictionary, dictionaryItems); + } + for (const tagName of tagNames) { + let item = dictionaryItems.get(tagName); + if (typeof item === 'undefined') { + const query = this._getNameBase(tagName); + item = {query, dictionary, tagName, cache: null, databaseTag: null, targets: []}; + dictionaryItems.set(tagName, item); + allItems.push(item); + } + item.targets.push(tags); + } + } } - const glossaryDefinitions = []; - for (const {expressions, readings, definitions} of glossaryDefinitionGroupMap.values()) { - const glossaryDefinition = this._createMergedGlossaryTermDefinition( - source, - rawSource, - definitions, - expressions, - readings, - allExpressions, - allReadings - ); - glossaryDefinitions.push(glossaryDefinition); + const nonCachedItems = []; + const tagCache = this._tagCache; + for (const [dictionary, dictionaryItems] of targetMap.entries()) { + let cache = tagCache.get(dictionary); + if (typeof cache === 'undefined') { + cache = new Map(); + tagCache.set(dictionary, cache); + } + for (const item of dictionaryItems.values()) { + const databaseTag = cache.get(item.query); + if (typeof databaseTag !== 'undefined') { + item.databaseTag = databaseTag; + } else { + item.cache = cache; + nonCachedItems.push(item); + } + } } - this._sortDefinitions(glossaryDefinitions, false); - const termDetailsList = this._createTermDetailsList(allDefinitions); + const nonCachedItemCount = nonCachedItems.length; + if (nonCachedItemCount > 0) { + const databaseTags = await this._database.findTagMetaBulk(nonCachedItems); + for (let i = 0; i < nonCachedItemCount; ++i) { + const item = nonCachedItems[i]; + let databaseTag = databaseTags[i]; + if (typeof databaseTag === 'undefined') { databaseTag = null; } + item.databaseTag = databaseTag; + item.cache.set(item.query, databaseTag); + } + } - return this._createMergedTermDefinition( - source, - rawSource, - glossaryDefinitions, - [...allExpressions], - [...allReadings], - termDetailsList, - reasons, - score - ); + for (const {dictionary, tagName, databaseTag, targets} of allItems) { + for (const tags of targets) { + tags.push(this._createTag(databaseTag, tagName, dictionary)); + } + } } - _getUniqueDefinitionTags(definitions) { - const definitionTagsMap = new Map(); - for (const {definitionTags} of definitions) { - for (const tag of definitionTags) { - const {name} = tag; - if (definitionTagsMap.has(name)) { continue; } - definitionTagsMap.set(name, this._cloneTag(tag)); + _groupTags(tagTargets) { + const stringComparer = this._stringComparer; + const compare = (v1, v2) => { + const i = v1.order - v2.order; + return i !== 0 ? i : stringComparer.compare(v1.name, v2.name); + }; + + for (const {tags} of tagTargets) { + if (tags.length <= 1) { continue; } + this._mergeSimilarTags(tags); + tags.sort(compare); + } + } + + _addTagExpansionTargets(tagTargets, objects) { + for (const value of objects) { + const tagGroups = value.tags; + if (tagGroups.length === 0) { continue; } + const tags = []; + value.tags = tags; + tagTargets.push({tagGroups, tags}); + } + } + + _mergeSimilarTags(tags) { + let tagCount = tags.length; + for (let i = 0; i < tagCount; ++i) { + const tag1 = tags[i]; + const {category, name} = tag1; + for (let j = i + 1; j < tagCount; ++j) { + const tag2 = tags[j]; + if (tag2.name !== name || tag2.category !== category) { continue; } + // Merge tag + tag1.order = Math.min(tag1.order, tag2.order); + tag1.score = Math.max(tag1.score, tag2.score); + tag1.dictionaries.push(...tag2.dictionaries); + this._addUniqueStrings(tag1.content, tag2.content); + tags.splice(j, 1); + --tagCount; + --j; } } - return [...definitionTagsMap.values()]; + } + + _getTagNamesWithCategory(tags, category) { + const results = []; + for (const tag of tags) { + if (tag.category !== category) { continue; } + results.push(tag.name); + } + results.sort(); + return results; } _flagRedundantDefinitionTags(definitions) { + if (definitions.length === 0) { return; } + let lastDictionary = null; let lastPartOfSpeech = ''; const removeCategoriesSet = new Set(); - for (const {dictionary, definitionTags} of definitions) { - const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech')); + for (const {dictionary, tags} of definitions) { + const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(tags, 'partOfSpeech')); if (lastDictionary !== dictionary) { lastDictionary = dictionary; @@ -566,87 +654,46 @@ class Translator { } if (removeCategoriesSet.size > 0) { - this._flagTagsWithCategoryAsRedundant(definitionTags, removeCategoriesSet); + for (const tag of tags) { + if (removeCategoriesSet.has(tag.category)) { + tag.redundant = true; + } + } removeCategoriesSet.clear(); } } } - /** - * Groups definitions with the same [source, expression, reading, reasons]. - * @param definitions An array of 'term' definitions. - * @returns An array of 'termGrouped' definitions. - */ - _groupTerms(definitions) { - const groups = new Map(); - for (const definition of definitions) { - const {source, reasons, expressions: [{expression, reading}]} = definition; - const key = this._createMapKey([source, expression, reading, ...reasons]); - let groupDefinitions = groups.get(key); - if (typeof groupDefinitions === 'undefined') { - groupDefinitions = []; - groups.set(key, groupDefinitions); - } - - groupDefinitions.push(definition); - } - - const results = []; - for (const groupDefinitions of groups.values()) { - this._sortDefinitions(groupDefinitions, false); - const definition = this._createGroupedTermDefinition(groupDefinitions); - results.push(definition); - } - - return results; - } - - _convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions) { - const convertedDefinitions = []; - for (const definition of definitions) { - const {source, rawSource, expression, reading} = definition; - const expressions = new Set([expression]); - const readings = new Set([reading]); - const convertedDefinition = this._createMergedGlossaryTermDefinition(source, rawSource, [definition], expressions, readings, expressions, readings); - convertedDefinitions.push(convertedDefinition); - } - return convertedDefinitions; - } - - // Metadata building + // Metadata - async _buildTermMeta(definitions, enabledDictionaryMap) { - const allDefinitions = this._getAllDefinitions(definitions); - const expressionMap = new Map(); - const expressionValues = []; - const expressionKeys = []; + async _addTermMeta(dictionaryEntries, enabledDictionaryMap) { + const headwordMap = new Map(); + const headwordMapKeys = []; + const headwordReadingMaps = []; - for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) { - for (let i = 0, ii = expressions.length; i < ii; ++i) { - const {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i]; - let readingMap = expressionMap.get(expression); + for (const {headwords, pronunciations, frequencies} of dictionaryEntries) { + for (let i = 0, ii = headwords.length; i < ii; ++i) { + const {term, reading} = headwords[i]; + let readingMap = headwordMap.get(term); if (typeof readingMap === 'undefined') { readingMap = new Map(); - expressionMap.set(expression, readingMap); - expressionValues.push(readingMap); - expressionKeys.push(expression); + headwordMap.set(term, readingMap); + headwordMapKeys.push(term); + headwordReadingMaps.push(readingMap); } let targets = readingMap.get(reading); if (typeof targets === 'undefined') { targets = []; readingMap.set(reading, targets); } - targets.push( - {frequencies: frequencies1, pitches: pitches1, index: i}, - {frequencies: frequencies2, pitches: pitches2, index: i} - ); + targets.push({headwordIndex: i, pronunciations, frequencies}); } } - const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap); - for (const {expression, mode, data, dictionary, index} of metas) { - const dictionaryOrder = this._getDictionaryOrder(dictionary, enabledDictionaryMap); - const map2 = expressionValues[index]; + const metas = await this._database.findTermMetaBulk(headwordMapKeys, enabledDictionaryMap); + for (const {mode, data, dictionary, index} of metas) { + const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap); + const map2 = headwordReadingMaps[index]; for (const [reading, targets] of map2.entries()) { switch (mode) { case 'freq': @@ -657,171 +704,124 @@ class Translator { if (data.reading !== reading) { continue; } frequency = data.frequency; } - for (const {frequencies, index: expressionIndex} of targets) { - frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryOrder, expression, reading, hasReading, frequency}); + for (const {frequencies, headwordIndex} of targets) { + frequencies.push(this._createTermFrequency( + frequencies.length, + headwordIndex, + dictionary, + dictionaryIndex, + dictionaryPriority, + hasReading, + frequency + )); } } break; case 'pitch': { if (data.reading !== reading) { continue; } - const pitches2 = []; - for (let {position, tags} of data.pitches) { - tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : []; - pitches2.push({position, tags}); + const pitches = []; + for (const {position, tags} of data.pitches) { + const tags2 = []; + if (Array.isArray(tags) && tags.length > 0) { + tags2.push(this._createTagGroup(dictionary, tags)); + } + pitches.push({position, tags: tags2}); } - for (const {pitches, index: expressionIndex} of targets) { - pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryOrder, expression, reading, pitches: pitches2}); + for (const {pronunciations, headwordIndex} of targets) { + pronunciations.push(this._createTermPronunciation( + pronunciations.length, + headwordIndex, + dictionary, + dictionaryIndex, + dictionaryPriority, + pitches + )); } } break; } } } - - for (const definition of allDefinitions) { - this._sortTermDefinitionMeta(definition); - } } - async _buildKanjiMeta(definitions, enabledDictionaryMap) { + async _addKanjiMeta(dictionaryEntries, enabledDictionaryMap) { const kanjiList = []; - for (const {character} of definitions) { + for (const {character} of dictionaryEntries) { kanjiList.push(character); } const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap); for (const {character, mode, data, dictionary, index} of metas) { - const dictionaryOrder = this._getDictionaryOrder(dictionary, enabledDictionaryMap); + const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap); switch (mode) { case 'freq': { - const {frequencies} = definitions[index]; - frequencies.push({index: frequencies.length, dictionary, dictionaryOrder, character, frequency: data}); + const {frequencies} = dictionaryEntries[index]; + frequencies.push(this._createKanjiFrequency( + frequencies.length, + dictionary, + dictionaryIndex, + dictionaryPriority, + character, + data + )); } break; } } - - for (const definition of definitions) { - this._sortKanjiDefinitionMeta(definition); - } } - async _expandTags(names, dictionary) { - const tagMetaList = await this._getTagMetaList(names, dictionary); - const results = []; - for (let i = 0, ii = tagMetaList.length; i < ii; ++i) { - const meta = tagMetaList[i]; - const name = names[i]; - const {category, notes, order, score} = (meta !== null ? meta : {}); - const tag = this._createTag(name, category, notes, order, score, dictionary, false); - results.push(tag); + async _expandKanjiStats(stats, dictionary) { + const statsEntries = Object.entries(stats); + const items = []; + for (const [name] of statsEntries) { + const query = this._getNameBase(name); + items.push({query, dictionary}); } - return results; - } - async _expandStats(items, dictionary) { - const names = Object.keys(items); - const tagMetaList = await this._getTagMetaList(names, dictionary); + const databaseInfos = await this._database.findTagMetaBulk(items); const statsGroups = new Map(); - for (let i = 0; i < names.length; ++i) { - const name = names[i]; - const meta = tagMetaList[i]; - if (meta === null) { continue; } + for (let i = 0, ii = statsEntries.length; i < ii; ++i) { + const databaseInfo = databaseInfos[i]; + if (databaseInfo === null) { continue; } - const {category, notes, order, score} = meta; + const [name, value] = statsEntries[i]; + const {category} = databaseInfo; let group = statsGroups.get(category); if (typeof group === 'undefined') { group = []; statsGroups.set(category, group); } - const value = items[name]; - const stat = this._createKanjiStat(name, category, notes, order, score, dictionary, value); - group.push(stat); + group.push(this._createKanjiStat(name, value, databaseInfo, dictionary)); } - const stats = {}; + const groupedStats = {}; for (const [category, group] of statsGroups.entries()) { this._sortKanjiStats(group); - stats[category] = group; + groupedStats[category] = group; } - return stats; + return groupedStats; } - async _getTagMetaList(names, dictionary) { - const tagMetaList = []; - let cache = this._tagCache.get(dictionary); - if (typeof cache === 'undefined') { - cache = new Map(); - this._tagCache.set(dictionary, cache); - } - - for (const name of names) { - const base = this._getNameBase(name); - - let tagMeta = cache.get(base); - if (typeof tagMeta === 'undefined') { - tagMeta = await this._database.findTagForTitle(base, dictionary); - cache.set(base, tagMeta); - } - - tagMetaList.push(tagMeta); - } - - return tagMetaList; + _sortKanjiStats(stats) { + if (stats.length <= 1) { return; } + const stringComparer = this._stringComparer; + stats.sort((v1, v2) => { + const i = v1.order - v2.order; + return (i !== 0) ? i : stringComparer.compare(v1.content, v2.content); + }); } - // Simple helpers + // Helpers _getNameBase(name) { const pos = name.indexOf(':'); return (pos >= 0 ? name.substring(0, pos) : name); } - _getSearchableText(text, allowAlphanumericCharacters) { - if (allowAlphanumericCharacters) { - return text; - } - - const jp = this._japaneseUtil; - let newText = ''; - for (const c of text) { - if (!jp.isCodePointJapanese(c.codePointAt(0))) { - break; - } - newText += c; - } - return newText; - } - - _getTextOptionEntryVariants(value) { - switch (value) { - case 'true': return [true]; - case 'variant': return [false, true]; - default: return [false]; - } - } - - _getCollapseEmphaticOptions(options) { - const collapseEmphaticOptions = [[false, false]]; - switch (options.collapseEmphaticSequences) { - case 'true': - collapseEmphaticOptions.push([true, false]); - break; - case 'full': - collapseEmphaticOptions.push([true, false], [true, true]); - break; - } - return collapseEmphaticOptions; - } - - _getTextReplacementsVariants(options) { - return options.textReplacements; - } - _getSecondarySearchDictionaryMap(enabledDictionaryMap) { const secondarySearchDictionaryMap = new Map(); for (const [dictionary, details] of enabledDictionaryMap.entries()) { @@ -837,58 +837,6 @@ class Translator { return {index, priority}; } - _getTagNamesWithCategory(tags, category) { - const results = []; - for (const tag of tags) { - if (tag.category !== category) { continue; } - results.push(tag.name); - } - results.sort(); - return results; - } - - _flagTagsWithCategoryAsRedundant(tags, removeCategoriesSet) { - for (const tag of tags) { - if (removeCategoriesSet.has(tag.category)) { - tag.redundant = true; - } - } - } - - _getUniqueDictionaryNames(definitions) { - const uniqueDictionaryNames = new Set(); - for (const {dictionaryNames} of definitions) { - for (const dictionaryName of dictionaryNames) { - uniqueDictionaryNames.add(dictionaryName); - } - } - return [...uniqueDictionaryNames]; - } - - _getUniqueTermTags(definitions) { - const newTermTags = []; - if (definitions.length <= 1) { - for (const {termTags} of definitions) { - for (const tag of termTags) { - newTermTags.push(this._cloneTag(tag)); - } - } - } else { - const tagsSet = new Set(); - let checkTagsMap = false; - for (const {termTags} of definitions) { - for (const tag of termTags) { - const key = this._getTagMapKey(tag); - if (checkTagsMap && tagsSet.has(key)) { continue; } - tagsSet.add(key); - newTermTags.push(this._cloneTag(tag)); - } - checkTagsMap = true; - } - } - return newTermTags; - } - *_getArrayVariants(arrayVariants) { const ii = arrayVariants.length; @@ -909,110 +857,18 @@ class Translator { } } - _areSetsEqual(set1, set2) { - if (set1.size !== set2.size) { - return false; - } - - for (const value of set1) { - if (!set2.has(value)) { - return false; - } - } - - return true; - } - - _getSetIntersection(set1, set2) { - const result = []; - for (const value of set1) { - if (set2.has(value)) { - result.push(value); - } - } - return result; - } - - _getAllDefinitions(definitions) { - definitions = [...definitions]; - for (let i = 0; i < definitions.length; ++i) { - const childDefinitions = definitions[i].definitions; - if (Array.isArray(childDefinitions)) { - definitions.push(...childDefinitions); - } - } - return definitions; - } - - // Reduction functions - - _getSourceTermMatchCountSum(definitions) { - let result = 0; - for (const {sourceTermExactMatchCount} of definitions) { - result += sourceTermExactMatchCount; - } - return result; - } - - _getMaxDefinitionScore(definitions) { - let result = Number.MIN_SAFE_INTEGER; - for (const {score} of definitions) { - if (score > result) { result = score; } - } - return result; - } - - _getMaxPrimaryDefinitionScore(definitions) { - let result = Number.MIN_SAFE_INTEGER; - for (const {isPrimary, score} of definitions) { - if (isPrimary && score > result) { result = score; } - } - return result; - } - - _getBestDictionaryOrder(definitions) { - let index = Number.MAX_SAFE_INTEGER; - let priority = Number.MIN_SAFE_INTEGER; - for (const {dictionaryOrder: {index: index2, priority: priority2}} of definitions) { - if (index2 < index) { index = index2; } - if (priority2 > priority) { priority = priority2; } - } - return {index, priority}; - } - - // Common data creation and cloning functions - - _cloneTag(tag) { - const {name, category, notes, order, score, dictionary, redundant} = tag; - return this._createTag(name, category, notes, order, score, dictionary, redundant); - } - - _getTagMapKey(tag) { - const {name, category, notes} = tag; - return this._createMapKey([name, category, notes]); - } - _createMapKey(array) { return JSON.stringify(array); } - _createTag(name, category, notes, order, score, dictionary, redundant) { - return { - name, - category: (typeof category === 'string' && category.length > 0 ? category : 'default'), - notes: (typeof notes === 'string' ? notes : ''), - order: (typeof order === 'number' ? order : 0), - score: (typeof score === 'number' ? score : 0), - dictionary: (typeof dictionary === 'string' ? dictionary : null), - redundant - }; - } + // Kanji data - _createKanjiStat(name, category, notes, order, score, dictionary, value) { + _createKanjiStat(name, value, databaseInfo, dictionary) { + const {category, notes, order, score} = databaseInfo; return { name, category: (typeof category === 'string' && category.length > 0 ? category : 'default'), - notes: (typeof notes === 'string' ? notes : ''), + content: (typeof notes === 'string' ? notes : ''), order: (typeof order === 'number' ? order : 0), score: (typeof score === 'number' ? score : 0), dictionary: (typeof dictionary === 'string' ? dictionary : null), @@ -1020,322 +876,404 @@ class Translator { }; } - _createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, tags, stats) { + _createKanjiFrequency(index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency) { + return {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency}; + } + + _createKanjiDictionaryEntry(character, dictionary, onyomi, kunyomi, tags, stats, definitions) { return { type: 'kanji', character, dictionary, onyomi, kunyomi, - glossary, tags, stats, + definitions, frequencies: [] }; } - async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, reasons, isPrimary, enabledDictionaryMap) { - const {expression, reading: rawReading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition; - const reading = (rawReading.length > 0 ? rawReading : expression); - const dictionaryOrder = this._getDictionaryOrder(dictionary, enabledDictionaryMap); - const termTagsExpanded = await this._expandTags(termTags, dictionary); - const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary); + // Term data - this._sortTags(definitionTagsExpanded); - this._sortTags(termTagsExpanded); + _createTag(databaseTag, name, dictionary) { + const {category, notes, order, score} = (databaseTag !== null ? databaseTag : {}); + return { + name, + category: (typeof category === 'string' && category.length > 0 ? category : 'default'), + order: (typeof order === 'number' ? order : 0), + score: (typeof score === 'number' ? score : 0), + content: (typeof notes === 'string' && notes.length > 0 ? [notes] : []), + dictionaries: [dictionary], + redundant: false + }; + } - const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, termTagsExpanded)]; - const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0); + _createTagGroup(dictionary, tagNames) { + return {dictionary, tagNames}; + } + + _createSource(originalText, transformedText, deinflectedText, isPrimary) { + return {originalText, transformedText, deinflectedText, isPrimary}; + } + + _createTermHeadword(index, term, reading, sources, tags) { + return {index, term, reading, sources, tags}; + } + + _createTermDefinition(index, headwordIndices, dictionary, tags, entries) { + return {index, headwordIndices, dictionary, tags, entries}; + } + _createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) { + return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches}; + } + + _createTermFrequency(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency) { + return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency}; + } + + _createTermDictionaryEntry(id, isPrimary, sequence, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxDeinflectedTextLength, headwords, definitions) { return { type: 'term', id, - source, - rawSource, - sourceTerm, - reasons, - score, isPrimary, sequence, - dictionary, - dictionaryOrder, - dictionaryNames: [dictionary], - expression, - reading, - expressions: termDetailsList, - glossary, - definitionTags: definitionTagsExpanded, - termTags: termTagsExpanded, - // definitions - frequencies: [], - pitches: [], - // only - sourceTermExactMatchCount - }; - } - - /** - * Creates a grouped definition from an array of 'term' definitions. - * @param definitions An array of 'term' definitions. - * @returns A single 'termGrouped' definition. - */ - _createGroupedTermDefinition(definitions) { - const {reasons, source, rawSource, sourceTerm, expressions: [{expression, reading}]} = definitions[0]; - const score = this._getMaxDefinitionScore(definitions); - const dictionaryOrder = this._getBestDictionaryOrder(definitions); - const dictionaryNames = this._getUniqueDictionaryNames(definitions); - const termTags = this._getUniqueTermTags(definitions); - const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, termTags)]; - const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0); - return { - type: 'termGrouped', - // id - source, - rawSource, - sourceTerm, - reasons: [...reasons], + inflections, score, - // isPrimary - // sequence - dictionary: dictionaryNames[0], - dictionaryOrder, - dictionaryNames, - expression, - reading, - expressions: termDetailsList, - // glossary - // definitionTags - termTags, - definitions, // type: 'term' - frequencies: [], - pitches: [], - // only - sourceTermExactMatchCount + dictionaryIndex, + dictionaryPriority, + sourceTermExactMatchCount, + maxDeinflectedTextLength, + headwords, + definitions, + pronunciations: [], + frequencies: [] }; } - _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, score) { - const dictionaryOrder = this._getBestDictionaryOrder(definitions); - const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions); - const dictionaryNames = this._getUniqueDictionaryNames(definitions); - return { - type: 'termMerged', - // id - source, - rawSource, - // sourceTerm + _createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, reasons, isPrimary, enabledDictionaryMap) { + const {expression, reading: rawReading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseEntry; + const reading = (rawReading.length > 0 ? rawReading : expression); + const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap); + const sourceTermExactMatchCount = (isPrimary && deinflectedText === expression ? 1 : 0); + const source = this._createSource(originalText, transformedText, deinflectedText, isPrimary); + const maxDeinflectedTextLength = deinflectedText.length; + + const headwordTagGroups = []; + const definitionTagGroups = []; + if (termTags.length > 0) { headwordTagGroups.push(this._createTagGroup(dictionary, termTags)); } + if (definitionTags.length > 0) { definitionTagGroups.push(this._createTagGroup(dictionary, definitionTags)); } + + return this._createTermDictionaryEntry( + id, + isPrimary, + sequence, reasons, score, - // isPrimary - // sequence - dictionary: dictionaryNames[0], - dictionaryOrder, - dictionaryNames, - expression: expressions, - reading: readings, - expressions: termDetailsList, - // glossary - // definitionTags - // termTags - definitions, // type: 'termMergedByGlossary' - frequencies: [], - pitches: [], - // only - sourceTermExactMatchCount - }; + dictionaryIndex, + dictionaryPriority, + sourceTermExactMatchCount, + maxDeinflectedTextLength, + [this._createTermHeadword(0, expression, reading, [source], headwordTagGroups)], + [this._createTermDefinition(0, [0], dictionary, definitionTagGroups, glossary)] + ); } - _createMergedGlossaryTermDefinition(source, rawSource, definitions, expressions, readings, allExpressions, allReadings) { - const only = []; - if (!this._areSetsEqual(expressions, allExpressions)) { - only.push(...this._getSetIntersection(expressions, allExpressions)); + _createGroupedDictionaryEntry(dictionaryEntries, checkDuplicateDefinitions) { + // Headwords are generated before sorting, so that the order of dictionaryEntries can be maintained + const definitionEntries = []; + const headwords = new Map(); + for (const dictionaryEntry of dictionaryEntries) { + const headwordIndexMap = this._addTermHeadwords(headwords, dictionaryEntry.headwords); + definitionEntries.push({index: definitionEntries.length, dictionaryEntry, headwordIndexMap}); } - if (!this._areSetsEqual(readings, allReadings)) { - only.push(...this._getSetIntersection(readings, allReadings)); + + // Sort + if (definitionEntries.length > 1) { + this._sortTermDefinitionEntries(definitionEntries); + } else { + checkDuplicateDefinitions = false; } - const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions); - const dictionaryNames = this._getUniqueDictionaryNames(definitions); + // Merge dictionary entry data + let score = Number.MIN_SAFE_INTEGER; + let dictionaryIndex = Number.MAX_SAFE_INTEGER; + let dictionaryPriority = Number.MIN_SAFE_INTEGER; + let maxDeinflectedTextLength = 0; + let sourceTermExactMatchCount = 0; + let isPrimary = false; + const definitions = []; + const definitionsMap = checkDuplicateDefinitions ? new Map() : null; + let inflections = null; + + for (const {dictionaryEntry, headwordIndexMap} of definitionEntries) { + score = Math.max(score, dictionaryEntry.score); + dictionaryIndex = Math.min(dictionaryIndex, dictionaryEntry.dictionaryIndex); + dictionaryPriority = Math.max(dictionaryPriority, dictionaryEntry.dictionaryPriority); + if (dictionaryEntry.isPrimary) { + isPrimary = true; + maxDeinflectedTextLength = Math.max(maxDeinflectedTextLength, dictionaryEntry.maxDeinflectedTextLength); + sourceTermExactMatchCount += dictionaryEntry.sourceTermExactMatchCount; + const dictionaryEntryInflections = dictionaryEntry.inflections; + if (inflections === null || dictionaryEntryInflections.length < inflections.length) { + inflections = dictionaryEntryInflections; + } + } + if (checkDuplicateDefinitions) { + this._addTermDefinitions2(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap); + } else { + this._addTermDefinitions(definitions, dictionaryEntry.definitions, headwordIndexMap); + } + } - const termDetailsList = this._createTermDetailsList(definitions); + return this._createTermDictionaryEntry( + -1, + isPrimary, + -1, + inflections !== null ? inflections : [], + score, + dictionaryIndex, + dictionaryPriority, + sourceTermExactMatchCount, + maxDeinflectedTextLength, + [...headwords.values()], + definitions + ); + } - const definitionTags = this._getUniqueDefinitionTags(definitions); - this._sortTags(definitionTags); + // Data collection addition functions - const {glossary} = definitions[0]; - const score = this._getMaxDefinitionScore(definitions); - const dictionaryOrder = this._getBestDictionaryOrder(definitions); - return { - type: 'termMergedByGlossary', - // id - source, - rawSource, - // sourceTerm - reasons: [], - score, - // isPrimary - // sequence - dictionary: dictionaryNames[0], - dictionaryOrder, - dictionaryNames, - expression: [...expressions], - reading: [...readings], - expressions: termDetailsList, - glossary: [...glossary], - definitionTags, - // termTags - definitions, // type: 'term'; contains duplicate data - frequencies: [], - pitches: [], - only, - sourceTermExactMatchCount - }; + _addUniqueStrings(list, newItems) { + for (const item of newItems) { + if (!list.includes(item)) { + list.push(item); + } + } } - /** - * Creates a list of term details from an array of 'term' definitions. - * @param definitions An array of 'term' definitions. - * @returns An array of term details. - */ - _createTermDetailsList(definitions) { - const termInfoMap = new Map(); - for (const {expression, reading, sourceTerm, termTags} of definitions) { - let readingMap = termInfoMap.get(expression); - if (typeof readingMap === 'undefined') { - readingMap = new Map(); - termInfoMap.set(expression, readingMap); + _addUniqueSources(sources, newSources) { + if (newSources.length === 0) { return; } + if (sources.length === 0) { + sources.push(...newSources); + return; + } + for (const newSource of newSources) { + const {originalText, transformedText, deinflectedText, isPrimary} = newSource; + let has = false; + for (const source of sources) { + if ( + source.deinflectedText === deinflectedText && + source.transformedText === transformedText && + source.originalText === originalText + ) { + if (isPrimary) { source.isPrimary = true; } + has = true; + break; + } } + if (!has) { + sources.push(newSource); + } + } + } - let termInfo = readingMap.get(reading); - if (typeof termInfo === 'undefined') { - termInfo = { - sourceTerm, - termTagsMap: new Map() - }; - readingMap.set(reading, termInfo); + _addUniqueTagGroups(tagGroups, newTagGroups) { + if (newTagGroups.length === 0) { return; } + for (const newTagGroup of newTagGroups) { + const {dictionary} = newTagGroup; + const ii = tagGroups.length; + if (ii > 0) { + let i = 0; + for (; i < ii; ++i) { + const tagGroup = tagGroups[i]; + if (tagGroup.dictionary === dictionary) { + this._addUniqueStrings(tagGroup.tagNames, newTagGroup.tagNames); + break; + } + } + if (i < ii) { continue; } } + tagGroups.push(newTagGroup); + } + } - const {termTagsMap} = termInfo; - for (const tag of termTags) { - const {name} = tag; - if (termTagsMap.has(name)) { continue; } - termTagsMap.set(name, this._cloneTag(tag)); + _addTermHeadwords(headwordsMap, headwords) { + const headwordIndexMap = []; + for (const {term, reading, sources, tags} of headwords) { + const key = this._createMapKey([term, reading]); + let headword = headwordsMap.get(key); + if (typeof headword === 'undefined') { + headword = this._createTermHeadword(headwordsMap.size, term, reading, [], []); + headwordsMap.set(key, headword); } + this._addUniqueSources(headword.sources, sources); + this._addUniqueTagGroups(headword.tags, tags); + headwordIndexMap.push(headword.index); + } + return headwordIndexMap; + } + + _addUniqueTermHeadwordIndex(headwordIndices, headwordIndex) { + let end = headwordIndices.length; + if (end === 0) { + headwordIndices.push(headwordIndex); + return; } - const termDetailsList = []; - for (const [expression, readingMap] of termInfoMap.entries()) { - for (const [reading, {termTagsMap, sourceTerm}] of readingMap.entries()) { - const termTags = [...termTagsMap.values()]; - this._sortTags(termTags); - termDetailsList.push(this._createTermDetails(sourceTerm, expression, reading, termTags)); + let start = 0; + while (start < end) { + const mid = Math.floor((start + end) / 2); + const value = headwordIndices[mid]; + if (headwordIndex === value) { return; } + if (headwordIndex > value) { + start = mid + 1; + } else { + end = mid; } } - return termDetailsList; + + if (headwordIndex === headwordIndices[start]) { return; } + headwordIndices.splice(start, 0, headwordIndex); } - _createTermDetails(sourceTerm, expression, reading, termTags) { - return { - sourceTerm, - expression, - reading, - termTags, - frequencies: [], - pitches: [] - }; + _addTermDefinitions(definitions, newDefinitions, headwordIndexMap) { + for (const {headwordIndices, dictionary, tags, entries} of newDefinitions) { + const headwordIndicesNew = []; + for (const headwordIndex of headwordIndices) { + headwordIndicesNew.push(headwordIndexMap[headwordIndex]); + } + definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, tags, entries)); + } } - // Sorting functions + _addTermDefinitions2(definitions, definitionsMap, newDefinitions, headwordIndexMap) { + for (const {headwordIndices, dictionary, tags, entries} of newDefinitions) { + const key = this._createMapKey([dictionary, ...entries]); + let definition = definitionsMap.get(key); + if (typeof definition === 'undefined') { + definition = this._createTermDefinition(definitions.length, [], dictionary, [], [...entries]); + definitions.push(definition); + definitionsMap.set(key, definition); + } - _sortTags(tags) { - if (tags.length <= 1) { return; } - const stringComparer = this._stringComparer; - tags.sort((v1, v2) => { - const i = v1.order - v2.order; - if (i !== 0) { return i; } + const newHeadwordIndices = definition.headwordIndices; + for (const headwordIndex of headwordIndices) { + this._addUniqueTermHeadwordIndex(newHeadwordIndices, headwordIndexMap[headwordIndex]); + } + this._addUniqueTagGroups(definition.tags, tags); + } + } - return stringComparer.compare(v1.name, v2.name); - }); + // Sorting functions + + _sortDatabaseEntriesByIndex(databaseEntries) { + if (databaseEntries.length <= 1) { return; } + databaseEntries.sort((a, b) => a.index - b.index); } - _sortDefinitions(definitions, topLevel=true) { - if (definitions.length <= 1) { return; } + _sortTermDictionaryEntries(dictionaryEntries) { const stringComparer = this._stringComparer; const compareFunction = (v1, v2) => { - let i; - if (topLevel) { - // Sort by length of source term - i = v2.source.length - v1.source.length; - if (i !== 0) { return i; } + // Sort by length of source term + let i = v2.maxDeinflectedTextLength - v1.maxDeinflectedTextLength; + if (i !== 0) { return i; } - // Sort by the number of inflection reasons - i = v1.reasons.length - v2.reasons.length; - if (i !== 0) { return i; } + // Sort by the number of inflection reasons + i = v1.inflections.length - v2.inflections.length; + if (i !== 0) { return i; } - // Sort by how many terms exactly match the source (e.g. for exact kana prioritization) - i = v2.sourceTermExactMatchCount - v1.sourceTermExactMatchCount; - if (i !== 0) { return i; } - } + // Sort by how many terms exactly match the source (e.g. for exact kana prioritization) + i = v2.sourceTermExactMatchCount - v1.sourceTermExactMatchCount; + if (i !== 0) { return i; } // Sort by dictionary priority - i = v2.dictionaryOrder.priority - v1.dictionaryOrder.priority; + i = v2.dictionaryPriority - v1.dictionaryPriority; if (i !== 0) { return i; } // Sort by term score i = v2.score - v1.score; if (i !== 0) { return i; } - // Sort by expression string comparison (skip if either expression is not a string, e.g. array) - const expression1 = v1.expression; - const expression2 = v2.expression; - if (typeof expression1 === 'string' && typeof expression2 === 'string') { - i = expression2.length - expression1.length; + // Sort by expression text + const headwords1 = v1.headwords; + const headwords2 = v2.headwords; + for (let j = 0, jj = Math.min(headwords1.length, headwords2.length); j < jj; ++j) { + const term1 = headwords1[j].term; + const term2 = headwords2[j].term; + + i = term2.length - term1.length; if (i !== 0) { return i; } - i = stringComparer.compare(expression1, expression2); + i = stringComparer.compare(term1, term2); if (i !== 0) { return i; } } // Sort by dictionary order - i = v1.dictionaryOrder.index - v2.dictionaryOrder.index; + i = v1.dictionaryIndex - v2.dictionaryIndex; return i; }; - definitions.sort(compareFunction); + dictionaryEntries.sort(compareFunction); } - _sortDatabaseDefinitionsByIndex(definitions) { - if (definitions.length <= 1) { return; } - definitions.sort((a, b) => a.index - b.index); - } + _sortTermDefinitionEntries(definitionEntries) { + const compareFunction = (e1, e2) => { + const v1 = e1.dictionaryEntry; + const v2 = e2.dictionaryEntry; - _sortDefinitionsById(definitions) { - if (definitions.length <= 1) { return; } - definitions.sort((a, b) => a.id - b.id); - } + // Sort by dictionary priority + let i = v2.dictionaryPriority - v1.dictionaryPriority; + if (i !== 0) { return i; } - _sortKanjiStats(stats) { - if (stats.length <= 1) { return; } - const stringComparer = this._stringComparer; - stats.sort((v1, v2) => { - const i = v1.order - v2.order; + // Sort by term score + i = v2.score - v1.score; if (i !== 0) { return i; } - return stringComparer.compare(v1.notes, v2.notes); - }); + // Sort by definition headword index + const definitions1 = v1.definitions; + const definitions2 = v2.definitions; + const headwordIndexMap1 = e1.headwordIndexMap; + const headwordIndexMap2 = e2.headwordIndexMap; + for (let j = 0, jj = Math.min(definitions1.length, definitions2.length); j < jj; ++j) { + const headwordIndices1 = definitions1[j].headwordIndices; + const headwordIndices2 = definitions2[j].headwordIndices; + const kk = headwordIndices1.length; + i = headwordIndices2.length - kk; + if (i !== 0) { return i; } + for (let k = 0; k < kk; ++k) { + i = headwordIndexMap1[headwordIndices1[k]] - headwordIndexMap2[headwordIndices2[k]]; + if (i !== 0) { return i; } + } + } + + // Sort by dictionary order + i = v1.dictionaryIndex - v2.dictionaryIndex; + if (i !== 0) { return i; } + + // Sort by original order + i = e1.index - e2.index; + return i; + }; + definitionEntries.sort(compareFunction); } - _sortTermDefinitionMeta(definition) { - const compareFunction = (v1, v2) => { + _sortTermDictionaryEntriesById(dictionaryEntries) { + if (dictionaryEntries.length <= 1) { return; } + dictionaryEntries.sort((a, b) => a.id - b.id); + } + + _sortTermDictionaryEntryData(dictionaryEntries) { + const compare = (v1, v2) => { // Sort by dictionary priority - let i = v2.dictionaryOrder.priority - v1.dictionaryOrder.priority; + let i = v2.dictionaryPriority - v1.dictionaryPriority; if (i !== 0) { return i; } // Sory by expression order - i = v1.expressionIndex - v2.expressionIndex; + i = v1.headwordIndex - v2.headwordIndex; if (i !== 0) { return i; } // Sort by dictionary order - i = v1.dictionaryOrder.index - v2.dictionaryOrder.index; + i = v1.dictionaryIndex - v2.dictionaryIndex; if (i !== 0) { return i; } // Default order @@ -1343,23 +1281,21 @@ class Translator { return i; }; - const {expressions, frequencies: frequencies1, pitches: pitches1} = definition; - frequencies1.sort(compareFunction); - pitches1.sort(compareFunction); - for (const {frequencies: frequencies2, pitches: pitches2} of expressions) { - frequencies2.sort(compareFunction); - pitches2.sort(compareFunction); + for (const {definitions, frequencies, pronunciations} of dictionaryEntries) { + this._flagRedundantDefinitionTags(definitions); + frequencies.sort(compare); + pronunciations.sort(compare); } } - _sortKanjiDefinitionMeta(definition) { - const compareFunction = (v1, v2) => { + _sortKanjiDictionaryEntryData(dictionaryEntries) { + const compare = (v1, v2) => { // Sort by dictionary priority - let i = v2.dictionaryOrder.priority - v1.dictionaryOrder.priority; + let i = v2.dictionaryPriority - v1.dictionaryPriority; if (i !== 0) { return i; } // Sort by dictionary order - i = v1.dictionaryOrder.index - v2.dictionaryOrder.index; + i = v1.dictionaryIndex - v2.dictionaryIndex; if (i !== 0) { return i; } // Default order @@ -1367,16 +1303,8 @@ class Translator { return i; }; - const {frequencies} = definition; - frequencies.sort(compareFunction); - } - - // Regex functions - - _applyTextReplacements(text, sourceMap, replacements) { - for (const {pattern, replacement} of replacements) { - text = RegexUtil.applyTextReplacement(text, sourceMap, pattern, replacement); + for (const {frequencies} of dictionaryEntries) { + frequencies.sort(compare); } - return text; } } diff --git a/ext/js/templates/template-renderer-frame-main.js b/ext/js/templates/template-renderer-frame-main.js index bfa18c82..c915d6b0 100644 --- a/ext/js/templates/template-renderer-frame-main.js +++ b/ext/js/templates/template-renderer-frame-main.js @@ -16,7 +16,7 @@ */ /* globals - * AnkiNoteData + * AnkiNoteDataCreator * JapaneseUtil * TemplateRenderer * TemplateRendererFrameApi @@ -25,8 +25,9 @@ (() => { const japaneseUtil = new JapaneseUtil(null); const templateRenderer = new TemplateRenderer(japaneseUtil); + const ankiNoteDataCreator = new AnkiNoteDataCreator(japaneseUtil); templateRenderer.registerDataType('ankiNote', { - modifier: ({data, marker}) => new AnkiNoteData(japaneseUtil, marker, data).createPublic() + modifier: ({data, marker}) => ankiNoteDataCreator.create(marker, data) }); const templateRendererFrameApi = new TemplateRendererFrameApi(templateRenderer); templateRendererFrameApi.prepare(); diff --git a/ext/template-renderer.html b/ext/template-renderer.html index eb3695e1..74167551 100644 --- a/ext/template-renderer.html +++ b/ext/template-renderer.html @@ -17,7 +17,7 @@ <!-- Scripts --> <script src="/lib/handlebars.min.js"></script> -<script src="/js/data/anki-note-data.js"></script> +<script src="/js/data/anki-note-data-creator.js"></script> <script src="/js/language/dictionary-data-util.js"></script> <script src="/js/language/japanese-util.js"></script> <script src="/js/templates/template-renderer.js"></script> |