From b57e1f52e34ec6f87163c8697c29330c61c245ae Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Mon, 17 Jun 2024 19:13:16 +0200 Subject: use only longest substring that deinflects to headword (#1075) * show only deinflection for longest matched text * cleanup * cleanup --- ext/js/language/translator.js | 44 +++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index f84328d9..f4f3449b 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -243,25 +243,49 @@ export class Translator { for (const databaseEntry of databaseEntries) { const {id} = databaseEntry; if (ids.has(id)) { - const existingEntry = dictionaryEntries.find((entry) => { - return entry.definitions.some((definition) => definition.id === id); - }); + const existingEntryInfo = this._findExistingEntry(dictionaryEntries, id); + if (!existingEntryInfo) { + continue; + } + const {existingEntry, existingIndex} = existingEntryInfo; - if (existingEntry && transformedText.length >= existingEntry.headwords[0].sources[0].transformedText.length) { + const existingTransformedLength = existingEntry.headwords[0].sources[0].transformedText.length; + if (transformedText.length < existingTransformedLength) { + continue; + } + if (transformedText.length > existingTransformedLength) { + dictionaryEntries.splice(existingIndex, 1, this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, inflectionRuleChainCandidates, true, enabledDictionaryMap, tagAggregator)); + } else { this._mergeInflectionRuleChains(existingEntry, inflectionRuleChainCandidates); } - - continue; + } else { + const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, inflectionRuleChainCandidates, true, enabledDictionaryMap, tagAggregator); + dictionaryEntries.push(dictionaryEntry); + ids.add(id); } - - const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, inflectionRuleChainCandidates, true, enabledDictionaryMap, tagAggregator); - dictionaryEntries.push(dictionaryEntry); - ids.add(id); } } return {dictionaryEntries, originalTextLength}; } + /** + * @param {import('translation-internal').TermDictionaryEntry[]} dictionaryEntries + * @param {number} id + * @returns {{existingEntry: import('translation-internal').TermDictionaryEntry, existingIndex: number} | null} + */ + _findExistingEntry(dictionaryEntries, id) { + let existingIndex = null; + let existingEntry = null; + for (const [index, entry] of dictionaryEntries.entries()) { + if (entry.definitions.some((definition) => definition.id === id)) { + existingIndex = index; + existingEntry = entry; + return {existingEntry, existingIndex}; + } + } + return null; + } + /** * @param {import('translation-internal').TermDictionaryEntry} existingEntry * @param {import('translation-internal').InflectionRuleChainCandidate[]} inflectionRuleChainCandidates -- cgit v1.2.3