aboutsummaryrefslogtreecommitdiff
path: root/ext/bg
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-10-04 12:54:55 -0400
committerGitHub <noreply@github.com>2020-10-04 12:54:55 -0400
commit2bd82353e46ecc8c16f2b55f81c8daae6f73e12e (patch)
tree933efe3292653edfb7c91c496d0e4ac8513219e3 /ext/bg
parent86c64ac4c27279fef6bd6f49aa807a10cd9d08bf (diff)
Translator options refactor (#879)
* Refactor internal options for findTerms to not use the settings object * Move findTerms/findKanji options creation * Deconstruct used options values to variables before any await calls * Rename findTermsOptions to just options * Add documentation comments * Add type information about definitions
Diffstat (limited to 'ext/bg')
-rw-r--r--ext/bg/js/backend.js71
-rw-r--r--ext/bg/js/translator.js132
2 files changed, 149 insertions, 54 deletions
diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js
index a268396d..ebdd2cda 100644
--- a/ext/bg/js/backend.js
+++ b/ext/bg/js/backend.js
@@ -376,16 +376,19 @@ class Backend {
async _onApiKanjiFind({text, optionsContext}) {
const options = this.getOptions(optionsContext);
- const definitions = await this._translator.findKanji(text, options);
- definitions.splice(options.general.maxResults);
+ const {general: {maxResults}} = options;
+ const findKanjiOptions = this._getTranslatorFindKanjiOptions(options);
+ const definitions = await this._translator.findKanji(text, findKanjiOptions);
+ definitions.splice(maxResults);
return definitions;
}
async _onApiTermsFind({text, details, optionsContext}) {
const options = this.getOptions(optionsContext);
- const mode = options.general.resultOutputMode;
- const [definitions, length] = await this._translator.findTerms(mode, text, details, options);
- definitions.splice(options.general.maxResults);
+ const {general: {resultOutputMode: mode, maxResults}} = options;
+ const findTermsOptions = this._getTranslatorFindTermsOptions(details, options);
+ const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions);
+ definitions.splice(maxResults);
return {length, definitions};
}
@@ -948,25 +951,26 @@ class Backend {
}
async _textParseScanning(text, options) {
+ const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
+ const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
const results = [];
while (text.length > 0) {
const term = [];
const [definitions, sourceLength] = await this._translator.findTerms(
'simple',
- text.substring(0, options.scanning.length),
- {},
- options
+ text.substring(0, scanningLength),
+ findTermsOptions
);
if (definitions.length > 0 && sourceLength > 0) {
const {expression, reading} = definitions[0];
const source = text.substring(0, sourceLength);
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
- const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
+ const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
}
text = text.substring(source.length);
} else {
- const reading = jp.convertReading(text[0], '', options.parsing.readingMode);
+ const reading = jp.convertReading(text[0], '', readingMode);
term.push({text: text[0], reading});
text = text.substring(1);
}
@@ -976,6 +980,7 @@ class Backend {
}
async _textParseMecab(text, options) {
+ const {parsing: {readingMode}} = options;
const results = [];
const rawResults = await this._mecab.parseText(text);
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
@@ -988,7 +993,7 @@ class Backend {
jp.convertKatakanaToHiragana(reading),
source
)) {
- const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
+ const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
}
result.push(term);
@@ -1660,4 +1665,48 @@ class Backend {
await this._optionsUtil.save(options);
this._applyOptions(source);
}
+
+ _getTranslatorFindTermsOptions(details, options) {
+ const {wildcard} = details;
+ const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
+ const {
+ general: {compactTags, mainDictionary},
+ scanning: {alphanumeric},
+ translation: {
+ convertHalfWidthCharacters,
+ convertNumericCharacters,
+ convertAlphabeticCharacters,
+ convertHiraganaToKatakana,
+ convertKatakanaToHiragana,
+ collapseEmphaticSequences
+ }
+ } = options;
+ return {
+ wildcard,
+ compactTags,
+ mainDictionary,
+ alphanumeric,
+ convertHalfWidthCharacters,
+ convertNumericCharacters,
+ convertAlphabeticCharacters,
+ convertHiraganaToKatakana,
+ convertKatakanaToHiragana,
+ collapseEmphaticSequences,
+ enabledDictionaryMap
+ };
+ }
+
+ _getTranslatorFindKanjiOptions(options) {
+ const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
+ return {enabledDictionaryMap};
+ }
+
+ _getTranslatorEnabledDictionaryMap(options) {
+ const enabledDictionaryMap = new Map();
+ for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
+ if (!enabled) { continue; }
+ enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
+ }
+ return enabledDictionaryMap;
+ }
}
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 34b6ca34..7af3f61f 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -21,7 +21,14 @@
* jp
*/
+/**
+ * Class which finds term and kanji definitions for text.
+ */
class Translator {
+ /**
+ * Creates a new Translator instance.
+ * @param database An instance of DictionaryDatabase.
+ */
constructor(database) {
this._database = database;
this._deinflector = null;
@@ -29,32 +36,82 @@ class Translator {
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
}
+ /**
+ * Initializes the instance for use. The public API should not be used until
+ * this function has been called and await'd.
+ */
async prepare() {
const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json');
this._deinflector = new Deinflector(reasons);
}
+ /**
+ * Clears the database tag cache. This should be executed if the database is changed.
+ */
clearDatabaseCaches() {
this._tagCache.clear();
}
- async findTerms(mode, text, details, options) {
+ /**
+ * Finds term definitions for the given text.
+ * @param mode The mode to use for finding terms, which determines the format of the resulting array.
+ * @param text The text to find terms for.
+ * @param options An object using the following structure:
+ * {
+ * wildcard: (null or string),
+ * compactTags: (boolean),
+ * mainDictionary: (string),
+ * alphanumeric: (boolean),
+ * convertHalfWidthCharacters: (boolean),
+ * convertNumericCharacters: (boolean),
+ * convertAlphabeticCharacters: (boolean),
+ * convertHiraganaToKatakana: (boolean),
+ * convertKatakanaToHiragana: (boolean),
+ * collapseEmphaticSequences: (boolean),
+ * enabledDictionaryMap: (Map of [
+ * (string),
+ * {
+ * priority: (number),
+ * allowSecondarySearches: (boolean)
+ * }
+ * ])
+ * }
+ * @returns An array of [definitions, textLength]. The structure of each definition depends on the
+ * mode parameter, see the _create?TermDefinition?() functions for structure details.
+ */
+ async findTerms(mode, text, options) {
switch (mode) {
case 'group':
- return await this._findTermsGrouped(text, details, options);
+ return await this._findTermsGrouped(text, options);
case 'merge':
- return await this._findTermsMerged(text, details, options);
+ return await this._findTermsMerged(text, options);
case 'split':
- return await this._findTermsSplit(text, details, options);
+ return await this._findTermsSplit(text, options);
case 'simple':
- return await this._findTermsSimple(text, details, options);
+ return await this._findTermsSimple(text, options);
default:
return [[], 0];
}
}
+ /**
+ * Finds kanji definitions for the given text.
+ * @param text The text to find kanji definitions for. This string can be of any length,
+ * but is typically just one character, which is a single kanji. If the string is multiple
+ * characters long, each character will be searched in the database.
+ * @param options An object using the following structure:
+ * {
+ * enabledDictionaryMap: (Map of [
+ * (string),
+ * {
+ * priority: (number)
+ * }
+ * ])
+ * }
+ * @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
+ */
async findKanji(text, options) {
- const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
+ const {enabledDictionaryMap} = options;
const kanjiUnique = new Set();
for (const c of text) {
kanjiUnique.add(c);
@@ -250,10 +307,9 @@ class Translator {
return result;
}
- async _findTermsGrouped(text, details, options) {
- const {general: {compactTags}} = options;
- const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
+ async _findTermsGrouped(text, options) {
+ const {compactTags, enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
@@ -268,12 +324,11 @@ class Translator {
return [groupedDefinitions, length];
}
- async _findTermsMerged(text, details, options) {
- const {general: {compactTags, mainDictionary}} = options;
- const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
+ async _findTermsMerged(text, options) {
+ const {compactTags, mainDictionary, enabledDictionaryMap} = options;
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
const definitionsMerged = [];
const usedDefinitions = new Set();
@@ -318,30 +373,31 @@ class Translator {
return [definitionsMerged, length];
}
- async _findTermsSplit(text, details, options) {
- const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
+ async _findTermsSplit(text, options) {
+ const {enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
await this._buildTermMeta(definitions, enabledDictionaryMap);
this._sortDefinitions(definitions, true);
return [definitions, length];
}
- async _findTermsSimple(text, details, options) {
- const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
- const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
+ async _findTermsSimple(text, options) {
+ const {enabledDictionaryMap} = options;
+ const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
this._sortDefinitions(definitions, false);
return [definitions, length];
}
- async _findTermsInternal(text, enabledDictionaryMap, details, options) {
- text = this._getSearchableText(text, options.scanning.alphanumeric);
+ async _findTermsInternal(text, enabledDictionaryMap, options) {
+ const {alphanumeric, wildcard} = options;
+ text = this._getSearchableText(text, alphanumeric);
if (text.length === 0) {
return [[], 0];
}
const deinflections = (
- details.wildcard ?
- await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) :
+ wildcard ?
+ await this._findTermWildcard(text, enabledDictionaryMap, wildcard) :
await this._findTermDeinflections(text, enabledDictionaryMap, options)
);
@@ -414,9 +470,8 @@ class Translator {
}
_getAllDeinflections(text, options) {
- const translationOptions = options.translation;
const collapseEmphaticOptions = [[false, false]];
- switch (translationOptions.collapseEmphaticSequences) {
+ switch (options.collapseEmphaticSequences) {
case 'true':
collapseEmphaticOptions.push([true, false]);
break;
@@ -425,11 +480,11 @@ class Translator {
break;
}
const textOptionVariantArray = [
- this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
- this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
- this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
- this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
- this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
+ this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
+ this._getTextOptionEntryVariants(options.convertNumericCharacters),
+ this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
+ this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
+ this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
collapseEmphaticOptions
];
@@ -707,15 +762,6 @@ class Translator {
return await response.json();
}
- _getEnabledDictionaryMap(options) {
- const enabledDictionaryMap = new Map();
- for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
- if (!enabled) { continue; }
- enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
- }
- return enabledDictionaryMap;
- }
-
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
const secondarySearchDictionaryMap = new Map();
for (const [title, dictionary] of enabledDictionaryMap.entries()) {
@@ -999,7 +1045,7 @@ class Translator {
// glossary
// definitionTags
termTags: this._cloneTags(termTags),
- definitions,
+ definitions, // type: 'term'
frequencies: [],
pitches: []
// only
@@ -1025,7 +1071,7 @@ class Translator {
// glossary
// definitionTags
// termTags
- definitions,
+ definitions, // type: 'termMergedByGlossary'
frequencies: [],
pitches: []
// only
@@ -1064,7 +1110,7 @@ class Translator {
glossary: [...glossary],
definitionTags,
// termTags
- definitions, // Contains duplicate data
+ definitions, // type: 'term'; contains duplicate data
frequencies: [],
pitches: [],
only