aboutsummaryrefslogtreecommitdiff
path: root/ext/bg/js/translator.js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-08-09 13:21:14 -0400
committerGitHub <noreply@github.com>2020-08-09 13:21:14 -0400
commit480e0e15e3109165d077c18985893d7cca79959e (patch)
treea4f4712ba27796b72136fc19b982b6e864074d17 /ext/bg/js/translator.js
parent427cf99b9fb08ac8066fdbcefe5f9069e87aa972 (diff)
Translator refactor (#720)
* Move simple find sort into translator.js * Remove unused * Use direct reference to database * Make field private * Remove statics * Convert functions to private * Organize by public/private
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r--ext/bg/js/translator.js244
1 files changed, 124 insertions, 120 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index a1f30bd2..7005b4b5 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -33,21 +33,70 @@
class Translator {
constructor(database) {
- this.database = database;
- this.deinflector = null;
- this.tagCache = new Map();
+ this._database = database;
+ this._deinflector = null;
+ this._tagCache = new Map();
}
async prepare() {
const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json');
- this.deinflector = new Deinflector(reasons);
+ this._deinflector = new Deinflector(reasons);
}
clearDatabaseCaches() {
- this.tagCache.clear();
+ this._tagCache.clear();
}
- async getSequencedDefinitions(definitions, mainDictionary) {
+ async findTerms(mode, text, details, options) {
+ switch (mode) {
+ case 'group':
+ return await this._findTermsGrouped(text, details, options);
+ case 'merge':
+ return await this._findTermsMerged(text, details, options);
+ case 'split':
+ return await this._findTermsSplit(text, details, options);
+ case 'simple':
+ return await this._findTermsSimple(text, details, options);
+ default:
+ return [[], 0];
+ }
+ }
+
+ async findKanji(text, options) {
+ const dictionaries = dictEnabledSet(options);
+ const kanjiUnique = new Set();
+ for (const c of text) {
+ kanjiUnique.add(c);
+ }
+
+ const definitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries);
+ if (definitions.length === 0) {
+ return definitions;
+ }
+
+ if (definitions.length > 1) {
+ definitions.sort((a, b) => a.index - b.index);
+ }
+
+ for (const definition of definitions) {
+ const tags = await this._expandTags(definition.tags, definition.dictionary);
+ tags.push(dictTagBuildSource(definition.dictionary));
+ dictTagsSort(tags);
+
+ const stats = await this._expandStats(definition.stats, definition.dictionary);
+
+ definition.tags = tags;
+ definition.stats = stats;
+ }
+
+ await this._buildKanjiMeta(definitions, dictionaries);
+
+ return definitions;
+ }
+
+ // Private
+
+ async _getSequencedDefinitions(definitions, mainDictionary) {
const [definitionsBySequence, defaultDefinitions] = dictTermsMergeBySequence(definitions, mainDictionary);
const sequenceList = [];
@@ -57,14 +106,14 @@ class Translator {
sequencedDefinitions.push({definitions: value, rawDefinitions: []});
}
- for (const definition of await this.database.findTermsBySequenceBulk(sequenceList, mainDictionary)) {
+ for (const definition of await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary)) {
sequencedDefinitions[definition.index].rawDefinitions.push(definition);
}
return {sequencedDefinitions, defaultDefinitions};
}
- async getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
+ async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
if (secondarySearchDictionaries.size === 0) {
return [];
}
@@ -79,12 +128,12 @@ class Translator {
}
}
- const definitions = await this.database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
+ const definitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
for (const definition of definitions) {
- const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
+ const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
- const termTags = await this.expandTags(definition.termTags, definition.dictionary);
+ const termTags = await this._expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
@@ -95,20 +144,20 @@ class Translator {
return definitions;
}
- async getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
+ async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
const result = sequencedDefinition.definitions;
const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions;
for (const definition of rawDefinitionsBySequence) {
- const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
+ const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
- const termTags = await this.expandTags(definition.termTags, definition.dictionary);
+ const termTags = await this._expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
- const secondarySearchResults = await this.getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
+ const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
@@ -124,7 +173,7 @@ class Translator {
for (const [reading, termTagsMap] of readingMap.entries()) {
const termTags = [...termTagsMap.values()];
const score = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
- expressions.push(Translator.createExpression(expression, reading, dictTagsSort(termTags), Translator.scoreToTermFrequency(score)));
+ expressions.push(this._createExpression(expression, reading, dictTagsSort(termTags), this._scoreToTermFrequency(score)));
}
}
@@ -135,27 +184,12 @@ class Translator {
return result;
}
- async findTerms(mode, text, details, options) {
- switch (mode) {
- case 'group':
- return await this.findTermsGrouped(text, details, options);
- case 'merge':
- return await this.findTermsMerged(text, details, options);
- case 'split':
- return await this.findTermsSplit(text, details, options);
- case 'simple':
- return await this.findTermsSimple(text, details, options);
- default:
- return [[], 0];
- }
- }
-
- async findTermsGrouped(text, details, options) {
+ async _findTermsGrouped(text, details, options) {
const dictionaries = dictEnabledSet(options);
- const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options);
+ const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
- await this.buildTermMeta(definitionsGrouped, dictionaries);
+ await this._buildTermMeta(definitionsGrouped, dictionaries);
if (options.general.compactTags) {
for (const definition of definitionsGrouped) {
@@ -166,7 +200,7 @@ class Translator {
return [definitionsGrouped, length];
}
- async findTermsMerged(text, details, options) {
+ async _findTermsMerged(text, details, options) {
const dictionaries = dictEnabledSet(options);
const secondarySearchDictionaries = new Map();
for (const [title, dictionary] of dictionaries.entries()) {
@@ -174,13 +208,13 @@ class Translator {
secondarySearchDictionaries.set(title, dictionary);
}
- const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options);
- const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary);
+ const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
+ const {sequencedDefinitions, defaultDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary);
const definitionsMerged = [];
const mergedByTermIndices = new Set();
for (const sequencedDefinition of sequencedDefinitions) {
- const result = await this.getMergedDefinition(
+ const result = await this._getMergedDefinition(
text,
dictionaries,
sequencedDefinition,
@@ -200,7 +234,7 @@ class Translator {
score,
expression: [expression],
reading: [reading],
- expressions: [Translator.createExpression(groupedDefinition.expression, groupedDefinition.reading)],
+ expressions: [this._createExpression(groupedDefinition.expression, groupedDefinition.reading)],
source,
dictionary,
definitions: groupedDefinition.definitions
@@ -208,7 +242,7 @@ class Translator {
definitionsMerged.push(compatibilityDefinition);
}
- await this.buildTermMeta(definitionsMerged, dictionaries);
+ await this._buildTermMeta(definitionsMerged, dictionaries);
if (options.general.compactTags) {
for (const definition of definitionsMerged) {
@@ -219,38 +253,40 @@ class Translator {
return [dictTermsSort(definitionsMerged), length];
}
- async findTermsSplit(text, details, options) {
+ async _findTermsSplit(text, details, options) {
const dictionaries = dictEnabledSet(options);
- const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options);
+ const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
- await this.buildTermMeta(definitions, dictionaries);
+ await this._buildTermMeta(definitions, dictionaries);
return [definitions, length];
}
- async findTermsSimple(text, details, options) {
+ async _findTermsSimple(text, details, options) {
const dictionaries = dictEnabledSet(options);
- return await this.findTermsInternal(text, dictionaries, details, options);
+ const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
+ dictTermsSort(definitions);
+ return [definitions, length];
}
- async findTermsInternal(text, dictionaries, details, options) {
- text = Translator.getSearchableText(text, options);
+ async _findTermsInternal(text, dictionaries, details, options) {
+ text = this._getSearchableText(text, options);
if (text.length === 0) {
return [[], 0];
}
const deinflections = (
details.wildcard ?
- await this.findTermWildcard(text, dictionaries, details.wildcard) :
- await this.findTermDeinflections(text, dictionaries, options)
+ await this._findTermWildcard(text, dictionaries, details.wildcard) :
+ await this._findTermDeinflections(text, dictionaries, options)
);
let definitions = [];
for (const deinflection of deinflections) {
for (const definition of deinflection.definitions) {
- const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
+ const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
- const termTags = await this.expandTags(definition.termTags, definition.dictionary);
+ const termTags = await this._expandTags(definition.termTags, definition.dictionary);
const {expression, reading} = definition;
const furiganaSegments = jp.distributeFurigana(expression, reading);
@@ -284,8 +320,8 @@ class Translator {
return [definitions, length];
}
- async findTermWildcard(text, dictionaries, wildcard) {
- const definitions = await this.database.findTermsBulk([text], dictionaries, wildcard);
+ async _findTermWildcard(text, dictionaries, wildcard) {
+ const definitions = await this._database.findTermsBulk([text], dictionaries, wildcard);
if (definitions.length === 0) {
return [];
}
@@ -300,8 +336,8 @@ class Translator {
}];
}
- async findTermDeinflections(text, dictionaries, options) {
- const deinflections = this.getAllDeinflections(text, options);
+ async _findTermDeinflections(text, dictionaries, options) {
+ const deinflections = this._getAllDeinflections(text, options);
if (deinflections.length === 0) {
return [];
@@ -322,7 +358,7 @@ class Translator {
deinflectionArray.push(deinflection);
}
- const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
+ const definitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
for (const definition of definitions) {
const definitionRules = Deinflector.rulesToRuleFlags(definition.rules);
@@ -337,7 +373,7 @@ class Translator {
return deinflections.filter((e) => e.definitions.length > 0);
}
- getAllDeinflections(text, options) {
+ _getAllDeinflections(text, options) {
const translationOptions = options.translation;
const collapseEmphaticOptions = [[false, false]];
switch (translationOptions.collapseEmphaticSequences) {
@@ -349,17 +385,17 @@ class Translator {
break;
}
const textOptionVariantArray = [
- Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
- Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
- Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
- Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
- Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
+ this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
+ this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
+ this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
+ this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
+ this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
collapseEmphaticOptions
];
const deinflections = [];
const used = new Set();
- for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {
+ for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
let text2 = text;
const sourceMap = new TextSourceMap(text2);
if (halfWidth) {
@@ -385,7 +421,7 @@ class Translator {
const text2Substring = text2.substring(0, i);
if (used.has(text2Substring)) { break; }
used.add(text2Substring);
- for (const deinflection of this.deinflector.deinflect(text2Substring)) {
+ for (const deinflection of this._deinflector.deinflect(text2Substring)) {
deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
deinflections.push(deinflection);
}
@@ -394,7 +430,7 @@ class Translator {
return deinflections;
}
- static getTextOptionEntryVariants(value) {
+ _getTextOptionEntryVariants(value) {
switch (value) {
case 'true': return [true];
case 'variant': return [false, true];
@@ -402,39 +438,7 @@ class Translator {
}
}
- async findKanji(text, options) {
- const dictionaries = dictEnabledSet(options);
- const kanjiUnique = new Set();
- for (const c of text) {
- kanjiUnique.add(c);
- }
-
- const definitions = await this.database.findKanjiBulk([...kanjiUnique], dictionaries);
- if (definitions.length === 0) {
- return definitions;
- }
-
- if (definitions.length > 1) {
- definitions.sort((a, b) => a.index - b.index);
- }
-
- for (const definition of definitions) {
- const tags = await this.expandTags(definition.tags, definition.dictionary);
- tags.push(dictTagBuildSource(definition.dictionary));
- dictTagsSort(tags);
-
- const stats = await this.expandStats(definition.stats, definition.dictionary);
-
- definition.tags = tags;
- definition.stats = stats;
- }
-
- await this.buildKanjiMeta(definitions, dictionaries);
-
- return definitions;
- }
-
- async buildTermMeta(definitions, dictionaries) {
+ async _buildTermMeta(definitions, dictionaries) {
const terms = [];
for (const definition of definitions) {
if (definition.expressions) {
@@ -469,19 +473,19 @@ class Translator {
term.pitches = [];
}
- const metas = await this.database.findTermMetaBulk(expressionsUnique, dictionaries);
+ const metas = await this._database.findTermMetaBulk(expressionsUnique, dictionaries);
for (const {expression, mode, data, dictionary, index} of metas) {
switch (mode) {
case 'freq':
for (const term of termsUnique[index]) {
- const frequencyData = this.getFrequencyData(expression, data, dictionary, term);
+ const frequencyData = this._getFrequencyData(expression, data, dictionary, term);
if (frequencyData === null) { continue; }
term.frequencies.push(frequencyData);
}
break;
case 'pitch':
for (const term of termsUnique[index]) {
- const pitchData = await this.getPitchData(expression, data, dictionary, term);
+ const pitchData = await this._getPitchData(expression, data, dictionary, term);
if (pitchData === null) { continue; }
term.pitches.push(pitchData);
}
@@ -490,14 +494,14 @@ class Translator {
}
}
- async buildKanjiMeta(definitions, dictionaries) {
+ async _buildKanjiMeta(definitions, dictionaries) {
const kanjiList = [];
for (const definition of definitions) {
kanjiList.push(definition.character);
definition.frequencies = [];
}
- const metas = await this.database.findKanjiMetaBulk(kanjiList, dictionaries);
+ const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries);
for (const {character, mode, data, dictionary, index} of metas) {
switch (mode) {
case 'freq':
@@ -507,8 +511,8 @@ class Translator {
}
}
- async expandTags(names, title) {
- const tagMetaList = await this.getTagMetaList(names, title);
+ async _expandTags(names, title) {
+ const tagMetaList = await this._getTagMetaList(names, title);
return tagMetaList.map((meta, index) => {
const name = names[index];
const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name}));
@@ -516,9 +520,9 @@ class Translator {
});
}
- async expandStats(items, title) {
+ async _expandStats(items, title) {
const names = Object.keys(items);
- const tagMetaList = await this.getTagMetaList(names, title);
+ const tagMetaList = await this._getTagMetaList(names, title);
const statsGroups = new Map();
for (let i = 0; i < names.length; ++i) {
@@ -546,20 +550,20 @@ class Translator {
return stats;
}
- async getTagMetaList(names, title) {
+ async _getTagMetaList(names, title) {
const tagMetaList = [];
- let cache = this.tagCache.get(title);
+ let cache = this._tagCache.get(title);
if (typeof cache === 'undefined') {
cache = new Map();
- this.tagCache.set(title, cache);
+ this._tagCache.set(title, cache);
}
for (const name of names) {
- const base = Translator.getNameBase(name);
+ const base = this._getNameBase(name);
let tagMeta = cache.get(base);
if (typeof tagMeta === 'undefined') {
- tagMeta = await this.database.findTagForTitle(base, title);
+ tagMeta = await this._database.findTagForTitle(base, title);
cache.set(base, tagMeta);
}
@@ -569,7 +573,7 @@ class Translator {
return tagMetaList;
}
- getFrequencyData(expression, data, dictionary, term) {
+ _getFrequencyData(expression, data, dictionary, term) {
if (data !== null && typeof data === 'object') {
const {frequency, reading} = data;
@@ -581,21 +585,21 @@ class Translator {
return {expression, frequency: data, dictionary};
}
- async getPitchData(expression, data, dictionary, term) {
+ async _getPitchData(expression, data, dictionary, term) {
const reading = data.reading;
const termReading = term.reading || expression;
if (reading !== termReading) { return null; }
const pitches = [];
for (let {position, tags} of data.pitches) {
- tags = Array.isArray(tags) ? await this.getTagMetaList(tags, dictionary) : [];
+ tags = Array.isArray(tags) ? await this._getTagMetaList(tags, dictionary) : [];
pitches.push({position, tags});
}
return {reading, pitches, dictionary};
}
- static createExpression(expression, reading, termTags=null, termFrequency=null) {
+ _createExpression(expression, reading, termTags=null, termFrequency=null) {
const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
expression,
@@ -606,7 +610,7 @@ class Translator {
};
}
- static scoreToTermFrequency(score) {
+ _scoreToTermFrequency(score) {
if (score > 0) {
return 'popular';
} else if (score < 0) {
@@ -616,12 +620,12 @@ class Translator {
}
}
- static getNameBase(name) {
+ _getNameBase(name) {
const pos = name.indexOf(':');
return (pos >= 0 ? name.substring(0, pos) : name);
}
- static *getArrayVariants(arrayVariants) {
+ *_getArrayVariants(arrayVariants) {
const ii = arrayVariants.length;
let total = 1;
@@ -641,7 +645,7 @@ class Translator {
}
}
- static getSearchableText(text, options) {
+ _getSearchableText(text, options) {
if (!options.scanning.alphanumeric) {
let newText = '';
for (const c of text) {