diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-01-09 23:10:55 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-01-09 23:10:55 -0500 | 
| commit | 083da93142ec6302021ee1c29428121b54fc9e68 (patch) | |
| tree | c07c7e3ae808682e127603303f4698056204abd7 | |
| parent | da1e1e5c5b4dc20cc6aa46b51c55a496d094ed5c (diff) | |
Refactor sentence parsing (#1215)
* Rename sentenceExtent with sentenceScanExtent
* Update TextScanner.setOptions
* Change function argument order
* Rename quote map variables
* Fix edge case quote handling
* Update terminator maps to support character inclusion
| -rw-r--r-- | ext/fg/js/frontend.js | 4 | ||||
| -rw-r--r-- | ext/mixed/js/display.js | 26 | ||||
| -rw-r--r-- | ext/mixed/js/document-util.js | 71 | ||||
| -rw-r--r-- | ext/mixed/js/text-scanner.js | 27 | ||||
| -rw-r--r-- | test/data/html/test-document1.html | 18 | ||||
| -rw-r--r-- | test/test-document-util.js | 6 | 
6 files changed, 89 insertions, 63 deletions
| diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index c14c2feb..a206e3fb 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -312,7 +312,7 @@ class Frontend {      async _updateOptionsInternal() {          const optionsContext = await this._getOptionsContext();          const options = await api.optionsGet(optionsContext); -        const scanningOptions = options.scanning; +        const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;          this._options = options;          await this._updatePopup(); @@ -326,7 +326,7 @@ class Frontend {              touchInputEnabled: scanningOptions.touchInputEnabled,              pointerEventsEnabled: scanningOptions.pointerEventsEnabled,              scanLength: scanningOptions.length, -            sentenceExtent: options.sentenceParsing.scanExtent, +            sentenceScanExtent: sentenceParsingOptions.scanExtent,              layoutAwareScan: scanningOptions.layoutAwareScan,              preventMiddleMouse          }); diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 60842a3d..4c8d2f91 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -309,7 +309,7 @@ class Display extends EventDispatcher {      async updateOptions() {          const options = await api.optionsGet(this.getOptionsContext()); -        const scanning = options.scanning; +        const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;          this._options = options;          this._updateDocumentOptions(options); @@ -320,16 +320,16 @@ class Display extends EventDispatcher {              selectedParser: options.parsing.selectedParser,              termSpacing: options.parsing.termSpacing,              scanning: { -                inputs: scanning.inputs, -                deepContentScan: scanning.deepDomScan, -                selectText: scanning.selectText, -                delay: scanning.delay, -                touchInputEnabled: scanning.touchInputEnabled, -                pointerEventsEnabled: scanning.pointerEventsEnabled, -                scanLength: scanning.length, -                sentenceExtent: options.sentenceParsing.scanExtent, -                layoutAwareScan: scanning.layoutAwareScan, -                preventMiddleMouse: scanning.preventMiddleMouse.onSearchQuery +                inputs: scanningOptions.inputs, +                deepContentScan: scanningOptions.deepDomScan, +                selectText: scanningOptions.selectText, +                delay: scanningOptions.delay, +                touchInputEnabled: scanningOptions.touchInputEnabled, +                pointerEventsEnabled: scanningOptions.pointerEventsEnabled, +                scanLength: scanningOptions.length, +                sentenceScanExtent: sentenceParsingOptions.scanExtent, +                layoutAwareScan: scanningOptions.layoutAwareScan, +                preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery              }          }); @@ -1810,7 +1810,7 @@ class Display extends EventDispatcher {              this._definitionTextScanner.on('searched', this._onDefinitionTextScannerSearched.bind(this));          } -        const scanningOptions = options.scanning; +        const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;          this._definitionTextScanner.setOptions({              inputs: [{                  include: 'mouse0', @@ -1832,7 +1832,7 @@ class Display extends EventDispatcher {              touchInputEnabled: false,              pointerEventsEnabled: false,              scanLength: scanningOptions.length, -            sentenceExtent: options.sentenceParsing.scanExtent, +            sentenceScanExtent: sentenceParsingOptions.scanExtent,              layoutAwareScan: scanningOptions.layoutAwareScan,              preventMiddleMouse: false          }); diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js index 46ed321e..647cbedc 100644 --- a/ext/mixed/js/document-util.js +++ b/ext/mixed/js/document-util.js @@ -31,12 +31,16 @@ class DocumentUtil {              ['\'', '\''],              ['"', '"']          ]; -        this._terminatorSet = new Set(['…', '。', '.', '.', '?', '?', '!', '!']); -        this._startQuoteMap = new Map(); -        this._endQuoteMap = new Map(); +        const terminatorString = '…。..??!!'; +        this._terminatorMap = new Map(); +        for (const char of terminatorString) { +            this._terminatorMap.set(char, [false, true]); +        } +        this._forwardQuoteMap = new Map(); +        this._backwardQuoteMap = new Map();          for (const [char1, char2] of quoteArray) { -            this._startQuoteMap.set(char1, char2); -            this._endQuoteMap.set(char2, char1); +            this._forwardQuoteMap.set(char1, [char2, false]); +            this._backwardQuoteMap.set(char2, [char1, false]);          }      } @@ -77,10 +81,10 @@ class DocumentUtil {          }      } -    extractSentence(source, extent, layoutAwareScan) { -        const terminatorSet = this._terminatorSet; -        const startQuoteMap = this._startQuoteMap; -        const endQuoteMap = this._endQuoteMap; +    extractSentence(source, layoutAwareScan, extent) { +        const terminatorMap = this._terminatorMap; +        const forwardQuoteMap = this._forwardQuoteMap; +        const backwardQuoteMap = this._backwardQuoteMap;          // Scan text          source = source.clone(); @@ -98,22 +102,28 @@ class DocumentUtil {              const c = text[pos1 - 1];              if (c === '\n') { break; } -            if (quoteStack.length === 0 && terminatorSet.has(c)) { -                break; +            if (quoteStack.length === 0) { +                const terminatorInfo = terminatorMap.get(c); +                if (typeof terminatorInfo !== 'undefined') { +                    if (terminatorInfo[0]) { --pos1; } +                    break; +                }              } -            let otherQuote = startQuoteMap.get(c); -            if (typeof otherQuote !== 'undefined') { +            let quoteInfo = forwardQuoteMap.get(c); +            if (typeof quoteInfo !== 'undefined') {                  if (quoteStack.length === 0) { +                    if (quoteInfo[1]) { --pos1; }                      break;                  } else if (quoteStack[0] === c) {                      quoteStack.pop(); +                    continue;                  } -            } else { -                otherQuote = endQuoteMap.get(c); -                if (typeof otherQuote !== 'undefined') { -                    quoteStack.unshift(otherQuote); -                } +            } + +            quoteInfo = backwardQuoteMap.get(c); +            if (typeof quoteInfo !== 'undefined') { +                quoteStack.unshift(quoteInfo[0]);              }          } @@ -123,23 +133,28 @@ class DocumentUtil {              const c = text[pos2];              if (c === '\n') { break; } -            if (quoteStack.length === 0 && terminatorSet.has(c)) { -                ++pos2; -                break; +            if (quoteStack.length === 0) { +                const terminatorInfo = terminatorMap.get(c); +                if (typeof terminatorInfo !== 'undefined') { +                    if (terminatorInfo[1]) { ++pos2; } +                    break; +                }              } -            let otherQuote = endQuoteMap.get(c); -            if (typeof otherQuote !== 'undefined') { +            let quoteInfo = backwardQuoteMap.get(c); +            if (typeof quoteInfo !== 'undefined') {                  if (quoteStack.length === 0) { +                    if (quoteInfo[1]) { ++pos2; }                      break;                  } else if (quoteStack[0] === c) {                      quoteStack.pop(); +                    continue;                  } -            } else { -                otherQuote = startQuoteMap.get(c); -                if (typeof otherQuote !== 'undefined') { -                    quoteStack.unshift(otherQuote); -                } +            } + +            quoteInfo = forwardQuoteMap.get(c); +            if (typeof quoteInfo !== 'undefined') { +                quoteStack.unshift(quoteInfo[0]);              }          } diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js index 82bb898f..f26bcf0e 100644 --- a/ext/mixed/js/text-scanner.js +++ b/ext/mixed/js/text-scanner.js @@ -59,7 +59,7 @@ class TextScanner extends EventDispatcher {          this._touchInputEnabled = false;          this._pointerEventsEnabled = false;          this._scanLength = 1; -        this._sentenceExtent = 1; +        this._sentenceScanExtent = 1;          this._layoutAwareScan = false;          this._preventMiddleMouse = false;          this._inputs = []; @@ -134,7 +134,18 @@ class TextScanner extends EventDispatcher {          }      } -    setOptions({inputs, deepContentScan, selectText, delay, touchInputEnabled, pointerEventsEnabled, scanLength, sentenceExtent, layoutAwareScan, preventMiddleMouse}) { +    setOptions({ +        inputs, +        deepContentScan, +        selectText, +        delay, +        touchInputEnabled, +        pointerEventsEnabled, +        scanLength, +        sentenceScanExtent, +        layoutAwareScan, +        preventMiddleMouse +    }) {          if (Array.isArray(inputs)) {              this._inputs = inputs.map(({                  include, @@ -182,8 +193,8 @@ class TextScanner extends EventDispatcher {          if (typeof scanLength === 'number') {              this._scanLength = scanLength;          } -        if (typeof sentenceExtent === 'number') { -            this._sentenceExtent = sentenceExtent; +        if (typeof sentenceScanExtent === 'number') { +            this._sentenceScanExtent = sentenceScanExtent;          }          if (typeof layoutAwareScan === 'boolean') {              this._layoutAwareScan = layoutAwareScan; @@ -711,7 +722,7 @@ class TextScanner extends EventDispatcher {      async _findTerms(textSource, optionsContext) {          const scanLength = this._scanLength; -        const sentenceExtent = this._sentenceExtent; +        const sentenceScanExtent = this._sentenceScanExtent;          const layoutAwareScan = this._layoutAwareScan;          const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan);          if (searchText.length === 0) { return null; } @@ -720,13 +731,13 @@ class TextScanner extends EventDispatcher {          if (definitions.length === 0) { return null; }          textSource.setEndOffset(length, layoutAwareScan); -        const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan); +        const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);          return {definitions, sentence, type: 'terms'};      }      async _findKanji(textSource, optionsContext) { -        const sentenceExtent = this._sentenceExtent; +        const sentenceScanExtent = this._sentenceScanExtent;          const layoutAwareScan = this._layoutAwareScan;          const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan);          if (searchText.length === 0) { return null; } @@ -735,7 +746,7 @@ class TextScanner extends EventDispatcher {          if (definitions.length === 0) { return null; }          textSource.setEndOffset(1, layoutAwareScan); -        const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan); +        const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);          return {definitions, sentence, type: 'kanji'};      } diff --git a/test/data/html/test-document1.html b/test/data/html/test-document1.html index 37dbb017..3b702a86 100644 --- a/test/data/html/test-document1.html +++ b/test/data/html/test-document1.html @@ -21,7 +21,7 @@          data-end-node-selector="span"          data-end-offset="0"          data-result-type="TextSourceRange", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="真白「心配してくださって、ありがとございます」"      >          <span>真白「心配してくださって、ありがとございます」</span> @@ -37,7 +37,7 @@          data-end-node-selector="span"          data-end-offset="5"          data-result-type="TextSourceRange", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="心配してくださって、ありがとございます"      >          <span>真白「心配してくださって、ありがとございます」</span> @@ -53,7 +53,7 @@          data-end-node-selector="span"          data-end-offset="16"          data-result-type="TextSourceRange", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="心配して「くださって」、ありがと「ございます」"      >          <span>真白「心配して「くださって」、ありがと「ございます」」</span> @@ -69,7 +69,7 @@          data-end-node-selector="span"          data-end-offset="4"          data-result-type="TextSourceRange", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="ありがとございます。"      >          <span>ありがとございます。ありがとございます。</span> @@ -85,7 +85,7 @@          data-end-node-selector="span"          data-end-offset="14"          data-result-type="TextSourceRange", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="ありがとございます。"      >          <span>ありがとございます。ありがとございます。</span> @@ -101,7 +101,7 @@          data-end-node-selector="input"          data-end-offset="0"          data-result-type="TextSourceRange", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="真白「心配してくださって、ありがとございます」"          data-has-imposter="true"      > @@ -118,7 +118,7 @@          data-end-node-selector="textarea"          data-end-offset="0"          data-result-type="TextSourceRange", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="真白「心配してくださって、ありがとございます」"          data-has-imposter="true"      > @@ -135,7 +135,7 @@          data-end-node-selector="button"          data-end-offset="0"          data-result-type="TextSourceElement", -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="よみちゃん"      >          <button style="width: 100%; box-sizing: border-box; font-family: inherit; font-size: inherit; border: 1px solid #d8d8d8; background-color: #f0f0f0; padding: 0.2em;">よみちゃん</button> @@ -151,7 +151,7 @@          data-end-node-selector="img"          data-end-offset="0"          data-result-type="TextSourceElement" -        data-sentence-extent="100" +        data-sentence-scan-extent="100"          data-sentence="よみちゃん"      >          <img src="data:image/gif;base64,R0lGODdhBwAHAIABAAAAAP///ywAAAAABwAHAAACDIRvEaC32FpCbEkKCgA7" alt="よみちゃん" title="よみちゃん" style="width: 70px; height: 70px; image-rendering: crisp-edges; image-rendering: pixelated; display: block;" /> diff --git a/test/test-document-util.js b/test/test-document-util.js index 56368f0b..09f0c5e7 100644 --- a/test/test-document-util.js +++ b/test/test-document-util.js @@ -127,7 +127,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR              endNodeSelector,              endOffset,              resultType, -            sentenceExtent, +            sentenceScanExtent,              sentence,              hasImposter          } = testElement.dataset; @@ -139,7 +139,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR          startOffset = parseInt(startOffset, 10);          endOffset = parseInt(endOffset, 10); -        sentenceExtent = parseInt(sentenceExtent, 10); +        sentenceScanExtent = parseInt(sentenceScanExtent, 10);          assert.notStrictEqual(elementFromPointValue, null);          assert.notStrictEqual(caretRangeFromPointValue, null); @@ -182,7 +182,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR          if (source === null) { continue; }          // Test docSentenceExtract -        const sentenceActual = documentUtil.extractSentence(source, sentenceExtent, false).text; +        const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text;          assert.strictEqual(sentenceActual, sentence);          // Clean |