From 083da93142ec6302021ee1c29428121b54fc9e68 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 9 Jan 2021 23:10:55 -0500 Subject: Refactor sentence parsing (#1215) * Rename sentenceExtent with sentenceScanExtent * Update TextScanner.setOptions * Change function argument order * Rename quote map variables * Fix edge case quote handling * Update terminator maps to support character inclusion --- ext/fg/js/frontend.js | 4 +-- ext/mixed/js/display.js | 26 ++++++++-------- ext/mixed/js/document-util.js | 71 ++++++++++++++++++++++++++----------------- ext/mixed/js/text-scanner.js | 27 +++++++++++----- 4 files changed, 77 insertions(+), 51 deletions(-) (limited to 'ext') diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index c14c2feb..a206e3fb 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -312,7 +312,7 @@ class Frontend { async _updateOptionsInternal() { const optionsContext = await this._getOptionsContext(); const options = await api.optionsGet(optionsContext); - const scanningOptions = options.scanning; + const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options; this._options = options; await this._updatePopup(); @@ -326,7 +326,7 @@ class Frontend { touchInputEnabled: scanningOptions.touchInputEnabled, pointerEventsEnabled: scanningOptions.pointerEventsEnabled, scanLength: scanningOptions.length, - sentenceExtent: options.sentenceParsing.scanExtent, + sentenceScanExtent: sentenceParsingOptions.scanExtent, layoutAwareScan: scanningOptions.layoutAwareScan, preventMiddleMouse }); diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 60842a3d..4c8d2f91 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -309,7 +309,7 @@ class Display extends EventDispatcher { async updateOptions() { const options = await api.optionsGet(this.getOptionsContext()); - const scanning = options.scanning; + const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options; this._options = options; this._updateDocumentOptions(options); @@ -320,16 +320,16 @@ class Display extends EventDispatcher { selectedParser: options.parsing.selectedParser, termSpacing: options.parsing.termSpacing, scanning: { - inputs: scanning.inputs, - deepContentScan: scanning.deepDomScan, - selectText: scanning.selectText, - delay: scanning.delay, - touchInputEnabled: scanning.touchInputEnabled, - pointerEventsEnabled: scanning.pointerEventsEnabled, - scanLength: scanning.length, - sentenceExtent: options.sentenceParsing.scanExtent, - layoutAwareScan: scanning.layoutAwareScan, - preventMiddleMouse: scanning.preventMiddleMouse.onSearchQuery + inputs: scanningOptions.inputs, + deepContentScan: scanningOptions.deepDomScan, + selectText: scanningOptions.selectText, + delay: scanningOptions.delay, + touchInputEnabled: scanningOptions.touchInputEnabled, + pointerEventsEnabled: scanningOptions.pointerEventsEnabled, + scanLength: scanningOptions.length, + sentenceScanExtent: sentenceParsingOptions.scanExtent, + layoutAwareScan: scanningOptions.layoutAwareScan, + preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery } }); @@ -1810,7 +1810,7 @@ class Display extends EventDispatcher { this._definitionTextScanner.on('searched', this._onDefinitionTextScannerSearched.bind(this)); } - const scanningOptions = options.scanning; + const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options; this._definitionTextScanner.setOptions({ inputs: [{ include: 'mouse0', @@ -1832,7 +1832,7 @@ class Display extends EventDispatcher { touchInputEnabled: false, pointerEventsEnabled: false, scanLength: scanningOptions.length, - sentenceExtent: options.sentenceParsing.scanExtent, + sentenceScanExtent: sentenceParsingOptions.scanExtent, layoutAwareScan: scanningOptions.layoutAwareScan, preventMiddleMouse: false }); diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js index 46ed321e..647cbedc 100644 --- a/ext/mixed/js/document-util.js +++ b/ext/mixed/js/document-util.js @@ -31,12 +31,16 @@ class DocumentUtil { ['\'', '\''], ['"', '"'] ]; - this._terminatorSet = new Set(['…', '。', '.', '.', '?', '?', '!', '!']); - this._startQuoteMap = new Map(); - this._endQuoteMap = new Map(); + const terminatorString = '…。..??!!'; + this._terminatorMap = new Map(); + for (const char of terminatorString) { + this._terminatorMap.set(char, [false, true]); + } + this._forwardQuoteMap = new Map(); + this._backwardQuoteMap = new Map(); for (const [char1, char2] of quoteArray) { - this._startQuoteMap.set(char1, char2); - this._endQuoteMap.set(char2, char1); + this._forwardQuoteMap.set(char1, [char2, false]); + this._backwardQuoteMap.set(char2, [char1, false]); } } @@ -77,10 +81,10 @@ class DocumentUtil { } } - extractSentence(source, extent, layoutAwareScan) { - const terminatorSet = this._terminatorSet; - const startQuoteMap = this._startQuoteMap; - const endQuoteMap = this._endQuoteMap; + extractSentence(source, layoutAwareScan, extent) { + const terminatorMap = this._terminatorMap; + const forwardQuoteMap = this._forwardQuoteMap; + const backwardQuoteMap = this._backwardQuoteMap; // Scan text source = source.clone(); @@ -98,22 +102,28 @@ class DocumentUtil { const c = text[pos1 - 1]; if (c === '\n') { break; } - if (quoteStack.length === 0 && terminatorSet.has(c)) { - break; + if (quoteStack.length === 0) { + const terminatorInfo = terminatorMap.get(c); + if (typeof terminatorInfo !== 'undefined') { + if (terminatorInfo[0]) { --pos1; } + break; + } } - let otherQuote = startQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { + let quoteInfo = forwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { if (quoteStack.length === 0) { + if (quoteInfo[1]) { --pos1; } break; } else if (quoteStack[0] === c) { quoteStack.pop(); + continue; } - } else { - otherQuote = endQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { - quoteStack.unshift(otherQuote); - } + } + + quoteInfo = backwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { + quoteStack.unshift(quoteInfo[0]); } } @@ -123,23 +133,28 @@ class DocumentUtil { const c = text[pos2]; if (c === '\n') { break; } - if (quoteStack.length === 0 && terminatorSet.has(c)) { - ++pos2; - break; + if (quoteStack.length === 0) { + const terminatorInfo = terminatorMap.get(c); + if (typeof terminatorInfo !== 'undefined') { + if (terminatorInfo[1]) { ++pos2; } + break; + } } - let otherQuote = endQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { + let quoteInfo = backwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { if (quoteStack.length === 0) { + if (quoteInfo[1]) { ++pos2; } break; } else if (quoteStack[0] === c) { quoteStack.pop(); + continue; } - } else { - otherQuote = startQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { - quoteStack.unshift(otherQuote); - } + } + + quoteInfo = forwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { + quoteStack.unshift(quoteInfo[0]); } } diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js index 82bb898f..f26bcf0e 100644 --- a/ext/mixed/js/text-scanner.js +++ b/ext/mixed/js/text-scanner.js @@ -59,7 +59,7 @@ class TextScanner extends EventDispatcher { this._touchInputEnabled = false; this._pointerEventsEnabled = false; this._scanLength = 1; - this._sentenceExtent = 1; + this._sentenceScanExtent = 1; this._layoutAwareScan = false; this._preventMiddleMouse = false; this._inputs = []; @@ -134,7 +134,18 @@ class TextScanner extends EventDispatcher { } } - setOptions({inputs, deepContentScan, selectText, delay, touchInputEnabled, pointerEventsEnabled, scanLength, sentenceExtent, layoutAwareScan, preventMiddleMouse}) { + setOptions({ + inputs, + deepContentScan, + selectText, + delay, + touchInputEnabled, + pointerEventsEnabled, + scanLength, + sentenceScanExtent, + layoutAwareScan, + preventMiddleMouse + }) { if (Array.isArray(inputs)) { this._inputs = inputs.map(({ include, @@ -182,8 +193,8 @@ class TextScanner extends EventDispatcher { if (typeof scanLength === 'number') { this._scanLength = scanLength; } - if (typeof sentenceExtent === 'number') { - this._sentenceExtent = sentenceExtent; + if (typeof sentenceScanExtent === 'number') { + this._sentenceScanExtent = sentenceScanExtent; } if (typeof layoutAwareScan === 'boolean') { this._layoutAwareScan = layoutAwareScan; @@ -711,7 +722,7 @@ class TextScanner extends EventDispatcher { async _findTerms(textSource, optionsContext) { const scanLength = this._scanLength; - const sentenceExtent = this._sentenceExtent; + const sentenceScanExtent = this._sentenceScanExtent; const layoutAwareScan = this._layoutAwareScan; const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan); if (searchText.length === 0) { return null; } @@ -720,13 +731,13 @@ class TextScanner extends EventDispatcher { if (definitions.length === 0) { return null; } textSource.setEndOffset(length, layoutAwareScan); - const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan); + const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent); return {definitions, sentence, type: 'terms'}; } async _findKanji(textSource, optionsContext) { - const sentenceExtent = this._sentenceExtent; + const sentenceScanExtent = this._sentenceScanExtent; const layoutAwareScan = this._layoutAwareScan; const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan); if (searchText.length === 0) { return null; } @@ -735,7 +746,7 @@ class TextScanner extends EventDispatcher { if (definitions.length === 0) { return null; } textSource.setEndOffset(1, layoutAwareScan); - const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan); + const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent); return {definitions, sentence, type: 'kanji'}; } -- cgit v1.2.3