diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2019-12-22 13:09:35 -0500 |
---|---|---|
committer | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-01-24 20:15:25 -0500 |
commit | 9ae87f186a8ecbae3f727bcff8782ba3b0d6ba53 (patch) | |
tree | ea5c9a3f930906b28f993c972a8d85b6b781c4b0 /ext/bg/js/translator.js | |
parent | 04727a8cd7dc29a02595690389979ec1070bbded (diff) |
Update how deinflections are generated
Diffstat (limited to 'ext/bg/js/translator.js')
-rw-r--r-- | ext/bg/js/translator.js | 66 |
1 files changed, 40 insertions, 26 deletions
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index b6e9604d..bb78d46d 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -151,7 +151,7 @@ class Translator { async findTermsGrouped(text, details, options) { const dictionaries = dictEnabledSet(options); const titles = Object.keys(dictionaries); - const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details); + const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details, options); const definitionsGrouped = dictTermsGroup(definitions, dictionaries); await this.buildTermFrequencies(definitionsGrouped, titles); @@ -169,7 +169,7 @@ class Translator { const dictionaries = dictEnabledSet(options); const secondarySearchTitles = Object.keys(options.dictionaries).filter((dict) => options.dictionaries[dict].allowSecondarySearches); const titles = Object.keys(dictionaries); - const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details); + const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details, options); const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary); const definitionsMerged = []; const mergedByTermIndices = new Set(); @@ -206,14 +206,14 @@ class Translator { async findTermsSplit(text, details, options) { const dictionaries = dictEnabledSet(options); const titles = Object.keys(dictionaries); - const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details); + const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details, options); await this.buildTermFrequencies(definitions, titles); return [definitions, length]; } - async findTermsInternal(text, dictionaries, alphanumeric, details) { + async findTermsInternal(text, dictionaries, alphanumeric, details, options) { if (!alphanumeric && text.length > 0) { const c = text[0]; if (!jpIsKana(c) && !jpIsKanji(c)) { @@ -225,7 +225,7 @@ class Translator { const deinflections = ( details.wildcard ? await this.findTermWildcard(text, titles, details.wildcard) : - await this.findTermDeinflections(text, titles) + await this.findTermDeinflections(text, titles, options) ); let definitions = []; @@ -281,9 +281,8 @@ class Translator { }]; } - async findTermDeinflections(text, titles) { - const text2 = jpKatakanaToHiragana(text); - const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2)); + async findTermDeinflections(text, titles, options) { + const deinflections = this.getAllDeinflections(text, options); if (deinflections.length === 0) { return []; @@ -321,29 +320,24 @@ class Translator { return deinflections.filter((e) => e.definitions.length > 0); } - getDeinflections(text) { - const deinflections = []; - - for (let i = text.length; i > 0; --i) { - const textSubstring = text.substring(0, i); - deinflections.push(...this.deinflector.deinflect(textSubstring)); - } - - return deinflections; - } + getAllDeinflections(text, _options) { + const textOptionVariantArray = [ + [false, true] // convert katakana to hiragana + ]; - getDeinflections2(text1, text2) { const deinflections = []; - - for (let i = text1.length; i > 0; --i) { - const text1Substring = text1.substring(0, i); - const text2Substring = text2.substring(0, i); - deinflections.push(...this.deinflector.deinflect(text1Substring)); - if (text1Substring !== text2Substring) { + const used = new Set(); + for (const [hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { + let text2 = text; + if (hiragana) { text2 = jpKatakanaToHiragana(text2); } + + for (let i = text2.length; i > 0; --i) { + const text2Substring = text2.substring(0, i); + if (used.has(text2Substring)) { break; } + used.add(text2Substring); deinflections.push(...this.deinflector.deinflect(text2Substring)); } } - return deinflections; } @@ -527,4 +521,24 @@ class Translator { const pos = name.indexOf(':'); return (pos >= 0 ? name.substring(0, pos) : name); } + + static *getArrayVariants(arrayVariants) { + const ii = arrayVariants.length; + + let total = 1; + for (let i = 0; i < ii; ++i) { + total *= arrayVariants[i].length; + } + + for (let a = 0; a < total; ++a) { + const variant = []; + let index = a; + for (let i = 0; i < ii; ++i) { + const entryVariants = arrayVariants[i]; + variant.push(entryVariants[index % entryVariants.length]); + index = Math.floor(index / entryVariants.length); + } + yield variant; + } + } } |