diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-08-09 13:21:14 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-09 13:21:14 -0400 |
commit | 480e0e15e3109165d077c18985893d7cca79959e (patch) | |
tree | a4f4712ba27796b72136fc19b982b6e864074d17 /ext/bg/js/translator.js | |
parent | 427cf99b9fb08ac8066fdbcefe5f9069e87aa972 (diff) |
Translator refactor (#720)
* Move simple find sort into translator.js
* Remove unused
* Use direct reference to database
* Make field private
* Remove statics
* Convert functions to private
* Organize by public/private
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r-- | ext/bg/js/translator.js | 244 |
1 files changed, 124 insertions, 120 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index a1f30bd2..7005b4b5 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -33,21 +33,70 @@ class Translator { constructor(database) { - this.database = database; - this.deinflector = null; - this.tagCache = new Map(); + this._database = database; + this._deinflector = null; + this._tagCache = new Map(); } async prepare() { const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json'); - this.deinflector = new Deinflector(reasons); + this._deinflector = new Deinflector(reasons); } clearDatabaseCaches() { - this.tagCache.clear(); + this._tagCache.clear(); } - async getSequencedDefinitions(definitions, mainDictionary) { + async findTerms(mode, text, details, options) { + switch (mode) { + case 'group': + return await this._findTermsGrouped(text, details, options); + case 'merge': + return await this._findTermsMerged(text, details, options); + case 'split': + return await this._findTermsSplit(text, details, options); + case 'simple': + return await this._findTermsSimple(text, details, options); + default: + return [[], 0]; + } + } + + async findKanji(text, options) { + const dictionaries = dictEnabledSet(options); + const kanjiUnique = new Set(); + for (const c of text) { + kanjiUnique.add(c); + } + + const definitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries); + if (definitions.length === 0) { + return definitions; + } + + if (definitions.length > 1) { + definitions.sort((a, b) => a.index - b.index); + } + + for (const definition of definitions) { + const tags = await this._expandTags(definition.tags, definition.dictionary); + tags.push(dictTagBuildSource(definition.dictionary)); + dictTagsSort(tags); + + const stats = await this._expandStats(definition.stats, definition.dictionary); + + definition.tags = tags; + definition.stats = stats; + } + + await this._buildKanjiMeta(definitions, dictionaries); + + return definitions; + } + + // Private + + async _getSequencedDefinitions(definitions, mainDictionary) { const [definitionsBySequence, defaultDefinitions] = dictTermsMergeBySequence(definitions, mainDictionary); const sequenceList = []; @@ -57,14 +106,14 @@ class Translator { sequencedDefinitions.push({definitions: value, rawDefinitions: []}); } - for (const definition of await this.database.findTermsBySequenceBulk(sequenceList, mainDictionary)) { + for (const definition of await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary)) { sequencedDefinitions[definition.index].rawDefinitions.push(definition); } return {sequencedDefinitions, defaultDefinitions}; } - async getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) { + async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) { if (secondarySearchDictionaries.size === 0) { return []; } @@ -79,12 +128,12 @@ class Translator { } } - const definitions = await this.database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries); + const definitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries); for (const definition of definitions) { - const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); + const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary); definitionTags.push(dictTagBuildSource(definition.dictionary)); definition.definitionTags = definitionTags; - const termTags = await this.expandTags(definition.termTags, definition.dictionary); + const termTags = await this._expandTags(definition.termTags, definition.dictionary); definition.termTags = termTags; } @@ -95,20 +144,20 @@ class Translator { return definitions; } - async getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) { + async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) { const result = sequencedDefinition.definitions; const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions; for (const definition of rawDefinitionsBySequence) { - const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); + const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary); definitionTags.push(dictTagBuildSource(definition.dictionary)); definition.definitionTags = definitionTags; - const termTags = await this.expandTags(definition.termTags, definition.dictionary); + const termTags = await this._expandTags(definition.termTags, definition.dictionary); definition.termTags = termTags; } const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence); - const secondarySearchResults = await this.getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries); + const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries); dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices); @@ -124,7 +173,7 @@ class Translator { for (const [reading, termTagsMap] of readingMap.entries()) { const termTags = [...termTagsMap.values()]; const score = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0); - expressions.push(Translator.createExpression(expression, reading, dictTagsSort(termTags), Translator.scoreToTermFrequency(score))); + expressions.push(this._createExpression(expression, reading, dictTagsSort(termTags), this._scoreToTermFrequency(score))); } } @@ -135,27 +184,12 @@ class Translator { return result; } - async findTerms(mode, text, details, options) { - switch (mode) { - case 'group': - return await this.findTermsGrouped(text, details, options); - case 'merge': - return await this.findTermsMerged(text, details, options); - case 'split': - return await this.findTermsSplit(text, details, options); - case 'simple': - return await this.findTermsSimple(text, details, options); - default: - return [[], 0]; - } - } - - async findTermsGrouped(text, details, options) { + async _findTermsGrouped(text, details, options) { const dictionaries = dictEnabledSet(options); - const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options); + const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const definitionsGrouped = dictTermsGroup(definitions, dictionaries); - await this.buildTermMeta(definitionsGrouped, dictionaries); + await this._buildTermMeta(definitionsGrouped, dictionaries); if (options.general.compactTags) { for (const definition of definitionsGrouped) { @@ -166,7 +200,7 @@ class Translator { return [definitionsGrouped, length]; } - async findTermsMerged(text, details, options) { + async _findTermsMerged(text, details, options) { const dictionaries = dictEnabledSet(options); const secondarySearchDictionaries = new Map(); for (const [title, dictionary] of dictionaries.entries()) { @@ -174,13 +208,13 @@ class Translator { secondarySearchDictionaries.set(title, dictionary); } - const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options); - const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary); + const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); + const {sequencedDefinitions, defaultDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary); const definitionsMerged = []; const mergedByTermIndices = new Set(); for (const sequencedDefinition of sequencedDefinitions) { - const result = await this.getMergedDefinition( + const result = await this._getMergedDefinition( text, dictionaries, sequencedDefinition, @@ -200,7 +234,7 @@ class Translator { score, expression: [expression], reading: [reading], - expressions: [Translator.createExpression(groupedDefinition.expression, groupedDefinition.reading)], + expressions: [this._createExpression(groupedDefinition.expression, groupedDefinition.reading)], source, dictionary, definitions: groupedDefinition.definitions @@ -208,7 +242,7 @@ class Translator { definitionsMerged.push(compatibilityDefinition); } - await this.buildTermMeta(definitionsMerged, dictionaries); + await this._buildTermMeta(definitionsMerged, dictionaries); if (options.general.compactTags) { for (const definition of definitionsMerged) { @@ -219,38 +253,40 @@ class Translator { return [dictTermsSort(definitionsMerged), length]; } - async findTermsSplit(text, details, options) { + async _findTermsSplit(text, details, options) { const dictionaries = dictEnabledSet(options); - const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options); + const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); - await this.buildTermMeta(definitions, dictionaries); + await this._buildTermMeta(definitions, dictionaries); return [definitions, length]; } - async findTermsSimple(text, details, options) { + async _findTermsSimple(text, details, options) { const dictionaries = dictEnabledSet(options); - return await this.findTermsInternal(text, dictionaries, details, options); + const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); + dictTermsSort(definitions); + return [definitions, length]; } - async findTermsInternal(text, dictionaries, details, options) { - text = Translator.getSearchableText(text, options); + async _findTermsInternal(text, dictionaries, details, options) { + text = this._getSearchableText(text, options); if (text.length === 0) { return [[], 0]; } const deinflections = ( details.wildcard ? - await this.findTermWildcard(text, dictionaries, details.wildcard) : - await this.findTermDeinflections(text, dictionaries, options) + await this._findTermWildcard(text, dictionaries, details.wildcard) : + await this._findTermDeinflections(text, dictionaries, options) ); let definitions = []; for (const deinflection of deinflections) { for (const definition of deinflection.definitions) { - const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary); + const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary); definitionTags.push(dictTagBuildSource(definition.dictionary)); - const termTags = await this.expandTags(definition.termTags, definition.dictionary); + const termTags = await this._expandTags(definition.termTags, definition.dictionary); const {expression, reading} = definition; const furiganaSegments = jp.distributeFurigana(expression, reading); @@ -284,8 +320,8 @@ class Translator { return [definitions, length]; } - async findTermWildcard(text, dictionaries, wildcard) { - const definitions = await this.database.findTermsBulk([text], dictionaries, wildcard); + async _findTermWildcard(text, dictionaries, wildcard) { + const definitions = await this._database.findTermsBulk([text], dictionaries, wildcard); if (definitions.length === 0) { return []; } @@ -300,8 +336,8 @@ class Translator { }]; } - async findTermDeinflections(text, dictionaries, options) { - const deinflections = this.getAllDeinflections(text, options); + async _findTermDeinflections(text, dictionaries, options) { + const deinflections = this._getAllDeinflections(text, options); if (deinflections.length === 0) { return []; @@ -322,7 +358,7 @@ class Translator { deinflectionArray.push(deinflection); } - const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null); + const definitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null); for (const definition of definitions) { const definitionRules = Deinflector.rulesToRuleFlags(definition.rules); @@ -337,7 +373,7 @@ class Translator { return deinflections.filter((e) => e.definitions.length > 0); } - getAllDeinflections(text, options) { + _getAllDeinflections(text, options) { const translationOptions = options.translation; const collapseEmphaticOptions = [[false, false]]; switch (translationOptions.collapseEmphaticSequences) { @@ -349,17 +385,17 @@ class Translator { break; } const textOptionVariantArray = [ - Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), - Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters), - Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), - Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), - Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), + this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), + this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters), + this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), + this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), + this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), collapseEmphaticOptions ]; const deinflections = []; const used = new Set(); - for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) { + for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) { let text2 = text; const sourceMap = new TextSourceMap(text2); if (halfWidth) { @@ -385,7 +421,7 @@ class Translator { const text2Substring = text2.substring(0, i); if (used.has(text2Substring)) { break; } used.add(text2Substring); - for (const deinflection of this.deinflector.deinflect(text2Substring)) { + for (const deinflection of this._deinflector.deinflect(text2Substring)) { deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); deinflections.push(deinflection); } @@ -394,7 +430,7 @@ class Translator { return deinflections; } - static getTextOptionEntryVariants(value) { + _getTextOptionEntryVariants(value) { switch (value) { case 'true': return [true]; case 'variant': return [false, true]; @@ -402,39 +438,7 @@ class Translator { } } - async findKanji(text, options) { - const dictionaries = dictEnabledSet(options); - const kanjiUnique = new Set(); - for (const c of text) { - kanjiUnique.add(c); - } - - const definitions = await this.database.findKanjiBulk([...kanjiUnique], dictionaries); - if (definitions.length === 0) { - return definitions; - } - - if (definitions.length > 1) { - definitions.sort((a, b) => a.index - b.index); - } - - for (const definition of definitions) { - const tags = await this.expandTags(definition.tags, definition.dictionary); - tags.push(dictTagBuildSource(definition.dictionary)); - dictTagsSort(tags); - - const stats = await this.expandStats(definition.stats, definition.dictionary); - - definition.tags = tags; - definition.stats = stats; - } - - await this.buildKanjiMeta(definitions, dictionaries); - - return definitions; - } - - async buildTermMeta(definitions, dictionaries) { + async _buildTermMeta(definitions, dictionaries) { const terms = []; for (const definition of definitions) { if (definition.expressions) { @@ -469,19 +473,19 @@ class Translator { term.pitches = []; } - const metas = await this.database.findTermMetaBulk(expressionsUnique, dictionaries); + const metas = await this._database.findTermMetaBulk(expressionsUnique, dictionaries); for (const {expression, mode, data, dictionary, index} of metas) { switch (mode) { case 'freq': for (const term of termsUnique[index]) { - const frequencyData = this.getFrequencyData(expression, data, dictionary, term); + const frequencyData = this._getFrequencyData(expression, data, dictionary, term); if (frequencyData === null) { continue; } term.frequencies.push(frequencyData); } break; case 'pitch': for (const term of termsUnique[index]) { - const pitchData = await this.getPitchData(expression, data, dictionary, term); + const pitchData = await this._getPitchData(expression, data, dictionary, term); if (pitchData === null) { continue; } term.pitches.push(pitchData); } @@ -490,14 +494,14 @@ class Translator { } } - async buildKanjiMeta(definitions, dictionaries) { + async _buildKanjiMeta(definitions, dictionaries) { const kanjiList = []; for (const definition of definitions) { kanjiList.push(definition.character); definition.frequencies = []; } - const metas = await this.database.findKanjiMetaBulk(kanjiList, dictionaries); + const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries); for (const {character, mode, data, dictionary, index} of metas) { switch (mode) { case 'freq': @@ -507,8 +511,8 @@ class Translator { } } - async expandTags(names, title) { - const tagMetaList = await this.getTagMetaList(names, title); + async _expandTags(names, title) { + const tagMetaList = await this._getTagMetaList(names, title); return tagMetaList.map((meta, index) => { const name = names[index]; const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name})); @@ -516,9 +520,9 @@ class Translator { }); } - async expandStats(items, title) { + async _expandStats(items, title) { const names = Object.keys(items); - const tagMetaList = await this.getTagMetaList(names, title); + const tagMetaList = await this._getTagMetaList(names, title); const statsGroups = new Map(); for (let i = 0; i < names.length; ++i) { @@ -546,20 +550,20 @@ class Translator { return stats; } - async getTagMetaList(names, title) { + async _getTagMetaList(names, title) { const tagMetaList = []; - let cache = this.tagCache.get(title); + let cache = this._tagCache.get(title); if (typeof cache === 'undefined') { cache = new Map(); - this.tagCache.set(title, cache); + this._tagCache.set(title, cache); } for (const name of names) { - const base = Translator.getNameBase(name); + const base = this._getNameBase(name); let tagMeta = cache.get(base); if (typeof tagMeta === 'undefined') { - tagMeta = await this.database.findTagForTitle(base, title); + tagMeta = await this._database.findTagForTitle(base, title); cache.set(base, tagMeta); } @@ -569,7 +573,7 @@ class Translator { return tagMetaList; } - getFrequencyData(expression, data, dictionary, term) { + _getFrequencyData(expression, data, dictionary, term) { if (data !== null && typeof data === 'object') { const {frequency, reading} = data; @@ -581,21 +585,21 @@ class Translator { return {expression, frequency: data, dictionary}; } - async getPitchData(expression, data, dictionary, term) { + async _getPitchData(expression, data, dictionary, term) { const reading = data.reading; const termReading = term.reading || expression; if (reading !== termReading) { return null; } const pitches = []; for (let {position, tags} of data.pitches) { - tags = Array.isArray(tags) ? await this.getTagMetaList(tags, dictionary) : []; + tags = Array.isArray(tags) ? await this._getTagMetaList(tags, dictionary) : []; pitches.push({position, tags}); } return {reading, pitches, dictionary}; } - static createExpression(expression, reading, termTags=null, termFrequency=null) { + _createExpression(expression, reading, termTags=null, termFrequency=null) { const furiganaSegments = jp.distributeFurigana(expression, reading); return { expression, @@ -606,7 +610,7 @@ class Translator { }; } - static scoreToTermFrequency(score) { + _scoreToTermFrequency(score) { if (score > 0) { return 'popular'; } else if (score < 0) { @@ -616,12 +620,12 @@ class Translator { } } - static getNameBase(name) { + _getNameBase(name) { const pos = name.indexOf(':'); return (pos >= 0 ? name.substring(0, pos) : name); } - static *getArrayVariants(arrayVariants) { + *_getArrayVariants(arrayVariants) { const ii = arrayVariants.length; let total = 1; @@ -641,7 +645,7 @@ class Translator { } } - static getSearchableText(text, options) { + _getSearchableText(text, options) { if (!options.scanning.alphanumeric) { let newText = ''; for (const c of text) { |