From 8f057c63fea6f06e921f2134d881192002dd23bc Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 28 Feb 2021 16:38:01 -0500 Subject: Improve text parser (#1469) * Merge ungrouped characters * Update iteration * Fix incorrect code point handling * Simplify text * Specify language * Update how parsed status is represented --- ext/js/background/backend.js | 30 ++++++++++++++++++++---------- ext/js/display/query-parser.js | 15 +++++---------- 2 files changed, 25 insertions(+), 20 deletions(-) (limited to 'ext/js') diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 1238673b..715b916b 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -1038,31 +1038,41 @@ class Backend { const {scanning: {length: scanningLength}, parsing: {readingMode}} = options; const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options); const results = []; - while (text.length > 0) { - const term = []; + let previousUngroupedSegment = null; + let i = 0; + const ii = text.length; + while (i < ii) { const [definitions, sourceLength] = await this._translator.findTerms( 'simple', - text.substring(0, scanningLength), + text.substring(i, i + scanningLength), findTermsOptions ); + const codePoint = text.codePointAt(i); + const character = String.fromCodePoint(codePoint); if ( definitions.length > 0 && sourceLength > 0 && - (sourceLength !== 1 || this._japaneseUtil.isCodePointJapanese(text[0])) + (sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint)) ) { + previousUngroupedSegment = null; const {expression, reading} = definitions[0]; - const source = text.substring(0, sourceLength); + const source = text.substring(i, i + sourceLength); + const term = []; for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { const reading2 = jp.convertReading(text2, furigana, readingMode); term.push({text: text2, reading: reading2}); } - text = text.substring(source.length); + results.push(term); + i += sourceLength; } else { - const reading = jp.convertReading(text[0], '', readingMode); - term.push({text: text[0], reading}); - text = text.substring(1); + if (previousUngroupedSegment === null) { + previousUngroupedSegment = {text: character, reading: ''}; + results.push([previousUngroupedSegment]); + } else { + previousUngroupedSegment.text += character; + } + i += character.length; } - results.push(term); } return results; } diff --git a/ext/js/display/query-parser.js b/ext/js/display/query-parser.js index d6a3b4da..c21033de 100644 --- a/ext/js/display/query-parser.js +++ b/ext/js/display/query-parser.js @@ -132,7 +132,8 @@ class QueryParser extends EventDispatcher { _setPreview(text) { const terms = [[{text, reading: ''}]]; this._queryParser.textContent = ''; - this._queryParser.appendChild(this._createParseResult(terms, true)); + this._queryParser.dataset.parsed = 'false'; + this._queryParser.appendChild(this._createParseResult(terms)); } _renderParserSelect() { @@ -146,6 +147,7 @@ class QueryParser extends EventDispatcher { _renderParseResult() { const parseResult = this._getParseResult(); this._queryParser.textContent = ''; + this._queryParser.dataset.parsed = 'true'; if (!parseResult) { return; } this._queryParser.appendChild(this._createParseResult(parseResult.content, false)); } @@ -182,13 +184,11 @@ class QueryParser extends EventDispatcher { select.selectedIndex = selectedIndex; } - _createParseResult(terms, preview) { - const type = preview ? 'preview' : 'normal'; + _createParseResult(terms) { const fragment = document.createDocumentFragment(); for (const term of terms) { const termNode = document.createElement('span'); termNode.className = 'query-parser-term'; - termNode.dataset.type = type; for (const segment of term) { if (segment.reading.trim().length === 0) { this._addSegmentText(segment.text, termNode); @@ -221,11 +221,6 @@ class QueryParser extends EventDispatcher { } _addSegmentText(text, container) { - for (const character of text) { - const node = document.createElement('span'); - node.className = 'query-parser-char'; - node.textContent = character; - container.appendChild(node); - } + container.textContent = text; } } -- cgit v1.2.3