diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-06-21 16:07:51 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-06-21 16:07:51 -0400 | 
| commit | e23504613f8526b90a497512c086ed48e66cde95 (patch) | |
| tree | 98db1a607ba40659d727e0083f2e45032a53e3a9 /ext | |
| parent | 4ebee3e17c2d536da7de33d16c2e44c54c4c8e51 (diff) | |
Use DOMTextScanner (#536)
* Use DOMTextScanner instead of TextSourceRange.seek*
* Move getNodesInRange to dom.js
* Move anyNodeMatchesSelector to dom.js
* Remove unused functions
* Update tests
* Add layoutAwareScan option
* Use layoutAwareScan for source and sentence scanning
* Remove unused IGNORE_TEXT_PATTERN
Diffstat (limited to 'ext')
| -rw-r--r-- | ext/bg/data/options-schema.json | 7 | ||||
| -rw-r--r-- | ext/bg/js/options.js | 3 | ||||
| -rw-r--r-- | ext/bg/js/search-query-parser.js | 8 | ||||
| -rw-r--r-- | ext/bg/search.html | 1 | ||||
| -rw-r--r-- | ext/bg/settings-popup-preview.html | 1 | ||||
| -rw-r--r-- | ext/bg/settings.html | 4 | ||||
| -rw-r--r-- | ext/fg/float.html | 1 | ||||
| -rw-r--r-- | ext/fg/js/document.js | 11 | ||||
| -rw-r--r-- | ext/fg/js/frontend.js | 14 | ||||
| -rw-r--r-- | ext/fg/js/source.js | 224 | ||||
| -rw-r--r-- | ext/manifest.json | 1 | ||||
| -rw-r--r-- | ext/mixed/js/display.js | 11 | ||||
| -rw-r--r-- | ext/mixed/js/dom.js | 38 | ||||
| -rw-r--r-- | ext/mixed/js/text-scanner.js | 11 | 
14 files changed, 94 insertions, 241 deletions
| diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index 0379fa75..5885e036 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -321,7 +321,8 @@                                      "enablePopupSearch",                                      "enableOnPopupExpressions",                                      "enableOnSearchPage", -                                    "enableSearchTags" +                                    "enableSearchTags", +                                    "layoutAwareScan"                                  ],                                  "properties": {                                      "middleMouse": { @@ -383,6 +384,10 @@                                      "enableSearchTags": {                                          "type": "boolean",                                          "default": false +                                    }, +                                    "layoutAwareScan": { +                                        "type": "boolean", +                                        "default": false                                      }                                  }                              }, diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 97368a0b..170e4799 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -203,7 +203,8 @@ function profileOptionsCreateDefaults() {              enablePopupSearch: false,              enableOnPopupExpressions: false,              enableOnSearchPage: true, -            enableSearchTags: false +            enableSearchTags: false, +            layoutAwareScan: false          },          translation: { diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index addfc686..97e98b40 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -75,15 +75,17 @@ class QueryParser {      async _search(textSource, cause) {          if (textSource === null) { return null; } -        const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length); +        const {length: scanLength, layoutAwareScan} = this._options.scanning; +        const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);          if (searchText.length === 0) { return null; }          const {definitions, length} = await api.termsFind(searchText, {}, this._getOptionsContext());          if (definitions.length === 0) { return null; } -        const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt); +        const sentenceExtent = this._options.anki.sentenceExt; +        const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan); -        textSource.setEndOffset(length); +        textSource.setEndOffset(length, layoutAwareScan);          this._setContent('terms', {definitions, context: {              focus: false, diff --git a/ext/bg/search.html b/ext/bg/search.html index de08cdae..4a28dd88 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -79,6 +79,7 @@          <script src="/bg/js/dictionary.js"></script>          <script src="/bg/js/handlebars.js"></script>          <script src="/fg/js/document.js"></script> +        <script src="/fg/js/dom-text-scanner.js"></script>          <script src="/fg/js/source.js"></script>          <script src="/mixed/js/audio-system.js"></script>          <script src="/mixed/js/display-context.js"></script> diff --git a/ext/bg/settings-popup-preview.html b/ext/bg/settings-popup-preview.html index fe92f24f..5eecd005 100644 --- a/ext/bg/settings-popup-preview.html +++ b/ext/bg/settings-popup-preview.html @@ -126,6 +126,7 @@          <script src="/mixed/js/text-scanner.js"></script>          <script src="/fg/js/document.js"></script> +        <script src="/fg/js/dom-text-scanner.js"></script>          <script src="/fg/js/popup.js"></script>          <script src="/fg/js/source.js"></script>          <script src="/fg/js/popup-factory.js"></script> diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 118a13b9..77b61aef 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -400,6 +400,10 @@                      <label><input type="checkbox" id="auto-hide-results" data-setting="scanning.autoHideResults"> Automatically hide results</label>                  </div> +                <div class="checkbox"> +                    <label><input type="checkbox" id="layout-aware-scan" data-setting="scanning.layoutAwareScan"> Layout-aware scan</label> +                </div> +                  <div class="checkbox options-advanced">                      <label><input type="checkbox" id="deep-dom-scan" data-setting="scanning.deepDomScan"> Deep DOM scan</label>                  </div> diff --git a/ext/fg/float.html b/ext/fg/float.html index 17dbcc6d..3e41cde5 100644 --- a/ext/fg/float.html +++ b/ext/fg/float.html @@ -46,6 +46,7 @@          <script src="/mixed/js/japanese.js"></script>          <script src="/fg/js/document.js"></script> +        <script src="/fg/js/dom-text-scanner.js"></script>          <script src="/fg/js/source.js"></script>          <script src="/mixed/js/audio-system.js"></script>          <script src="/mixed/js/display-context.js"></script> diff --git a/ext/fg/js/document.js b/ext/fg/js/document.js index d639bc86..c288502c 100644 --- a/ext/fg/js/document.js +++ b/ext/fg/js/document.js @@ -17,6 +17,7 @@  /* global   * DOM + * DOMTextScanner   * TextSourceElement   * TextSourceRange   */ @@ -152,14 +153,14 @@ function docRangeFromPoint(x, y, deepDomScan) {      }  } -function docSentenceExtract(source, extent) { +function docSentenceExtract(source, extent, layoutAwareScan) {      const quotesFwd = {'「': '」', '『': '』', "'": "'", '"': '"'};      const quotesBwd = {'」': '「', '』': '『', "'": "'", '"': '"'};      const terminators = '…。..??!!';      const sourceLocal = source.clone(); -    const position = sourceLocal.setStartOffset(extent); -    sourceLocal.setEndOffset(extent * 2 - position, true); +    const position = sourceLocal.setStartOffset(extent, layoutAwareScan); +    sourceLocal.setEndOffset(extent * 2 - position, layoutAwareScan, true);      const content = sourceLocal.text();      let quoteStack = []; @@ -232,7 +233,7 @@ function isPointInRange(x, y, range) {      const nodePre = range.endContainer;      const offsetPre = range.endOffset;      try { -        const {node, offset, content} = TextSourceRange.seekForward(range.endContainer, range.endOffset, 1); +        const {node, offset, content} = new DOMTextScanner(range.endContainer, range.endOffset, true, false).seek(1);          range.setEnd(node, offset);          if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) { @@ -243,7 +244,7 @@ function isPointInRange(x, y, range) {      }      // Scan backward -    const {node, offset, content} = TextSourceRange.seekBackward(range.startContainer, range.startOffset, 1); +    const {node, offset, content} = new DOMTextScanner(range.startContainer, range.startOffset, true, false).seek(-1);      range.setStart(node, offset);      if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) { diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index 70bd8a48..ab455c09 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -258,32 +258,36 @@ class Frontend {      }      async _findTerms(textSource, optionsContext) { -        const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length); +        const {length: scanLength, layoutAwareScan} = this._options.scanning; +        const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);          if (searchText.length === 0) { return null; }          const {definitions, length} = await api.termsFind(searchText, {}, optionsContext);          if (definitions.length === 0) { return null; } -        textSource.setEndOffset(length); +        textSource.setEndOffset(length, layoutAwareScan);          return {definitions, type: 'terms'};      }      async _findKanji(textSource, optionsContext) { -        const searchText = this._textScanner.getTextSourceContent(textSource, 1); +        const layoutAwareScan = this._options.scanning.layoutAwareScan; +        const searchText = this._textScanner.getTextSourceContent(textSource, 1, layoutAwareScan);          if (searchText.length === 0) { return null; }          const definitions = await api.kanjiFind(searchText, optionsContext);          if (definitions.length === 0) { return null; } -        textSource.setEndOffset(1); +        textSource.setEndOffset(1, layoutAwareScan);          return {definitions, type: 'kanji'};      }      _showContent(textSource, focus, definitions, type, optionsContext) {          const {url} = optionsContext; -        const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt); +        const sentenceExtent = this._options.anki.sentenceExt; +        const layoutAwareScan = this._options.scanning.layoutAwareScan; +        const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);          this._showPopupContent(              textSource,              optionsContext, diff --git a/ext/fg/js/source.js b/ext/fg/js/source.js index fa4706f2..38810f07 100644 --- a/ext/fg/js/source.js +++ b/ext/fg/js/source.js @@ -15,9 +15,9 @@   * along with this program.  If not, see <https://www.gnu.org/licenses/>.   */ -// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards -const IGNORE_TEXT_PATTERN = /\u200c/; - +/* global + * DOMTextScanner + */  /*   * TextSourceRange @@ -46,19 +46,19 @@ class TextSourceRange {          return this.content;      } -    setEndOffset(length, fromEnd=false) { +    setEndOffset(length, layoutAwareScan, fromEnd=false) {          const state = (              fromEnd ? -            TextSourceRange.seekForward(this.range.endContainer, this.range.endOffset, length) : -            TextSourceRange.seekForward(this.range.startContainer, this.range.startOffset, length) +            new DOMTextScanner(this.range.endContainer, this.range.endOffset, !layoutAwareScan, layoutAwareScan).seek(length) : +            new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(length)          );          this.range.setEnd(state.node, state.offset);          this.content = (fromEnd ? this.content + state.content : state.content);          return length - state.remainder;      } -    setStartOffset(length) { -        const state = TextSourceRange.seekBackward(this.range.startContainer, this.range.startOffset, length); +    setStartOffset(length, layoutAwareScan) { +        const state = new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length);          this.range.setStart(state.node, state.offset);          this.rangeStartOffset = this.range.startOffset;          this.content = state.content + this.content; @@ -110,154 +110,6 @@ class TextSourceRange {          }      } -    static shouldEnter(node) { -        switch (node.nodeName.toUpperCase()) { -            case 'RT': -            case 'SCRIPT': -            case 'STYLE': -                return false; -        } - -        const style = window.getComputedStyle(node); -        return !( -            style.visibility === 'hidden' || -            style.display === 'none' || -            parseFloat(style.fontSize) === 0 -        ); -    } - -    static getRubyElement(node) { -        node = TextSourceRange.getParentElement(node); -        if (node !== null && node.nodeName.toUpperCase() === 'RT') { -            node = node.parentNode; -            return (node !== null && node.nodeName.toUpperCase() === 'RUBY') ? node : null; -        } -        return null; -    } - -    static seekForward(node, offset, length) { -        const state = {node, offset, remainder: length, content: ''}; -        if (length <= 0) { -            return state; -        } - -        const TEXT_NODE = Node.TEXT_NODE; -        const ELEMENT_NODE = Node.ELEMENT_NODE; -        let resetOffset = false; - -        const ruby = TextSourceRange.getRubyElement(node); -        if (ruby !== null) { -            node = ruby; -            resetOffset = true; -        } - -        while (node !== null) { -            let visitChildren = true; -            const nodeType = node.nodeType; - -            if (nodeType === TEXT_NODE) { -                state.node = node; -                if (TextSourceRange.seekForwardTextNode(state, resetOffset)) { -                    break; -                } -                resetOffset = true; -            } else if (nodeType === ELEMENT_NODE) { -                visitChildren = TextSourceRange.shouldEnter(node); -            } - -            node = TextSourceRange.getNextNode(node, visitChildren); -        } - -        return state; -    } - -    static seekForwardTextNode(state, resetOffset) { -        const nodeValue = state.node.nodeValue; -        const nodeValueLength = nodeValue.length; -        let content = state.content; -        let offset = resetOffset ? 0 : state.offset; -        let remainder = state.remainder; -        let result = false; - -        for (; offset < nodeValueLength; ++offset) { -            const c = nodeValue[offset]; -            if (!IGNORE_TEXT_PATTERN.test(c)) { -                content += c; -                if (--remainder <= 0) { -                    result = true; -                    ++offset; -                    break; -                } -            } -        } - -        state.offset = offset; -        state.content = content; -        state.remainder = remainder; -        return result; -    } - -    static seekBackward(node, offset, length) { -        const state = {node, offset, remainder: length, content: ''}; -        if (length <= 0) { -            return state; -        } - -        const TEXT_NODE = Node.TEXT_NODE; -        const ELEMENT_NODE = Node.ELEMENT_NODE; -        let resetOffset = false; - -        const ruby = TextSourceRange.getRubyElement(node); -        if (ruby !== null) { -            node = ruby; -            resetOffset = true; -        } - -        while (node !== null) { -            let visitChildren = true; -            const nodeType = node.nodeType; - -            if (nodeType === TEXT_NODE) { -                state.node = node; -                if (TextSourceRange.seekBackwardTextNode(state, resetOffset)) { -                    break; -                } -                resetOffset = true; -            } else if (nodeType === ELEMENT_NODE) { -                visitChildren = TextSourceRange.shouldEnter(node); -            } - -            node = TextSourceRange.getPreviousNode(node, visitChildren); -        } - -        return state; -    } - -    static seekBackwardTextNode(state, resetOffset) { -        const nodeValue = state.node.nodeValue; -        let content = state.content; -        let offset = resetOffset ? nodeValue.length : state.offset; -        let remainder = state.remainder; -        let result = false; - -        for (; offset > 0; --offset) { -            const c = nodeValue[offset - 1]; -            if (!IGNORE_TEXT_PATTERN.test(c)) { -                content = c + content; -                if (--remainder <= 0) { -                    result = true; -                    --offset; -                    break; -                } -            } -        } - -        state.offset = offset; -        state.content = content; -        state.remainder = remainder; -        return result; -    } -      static getParentElement(node) {          while (node !== null && node.nodeType !== Node.ELEMENT_NODE) {              node = node.parentNode; @@ -290,66 +142,6 @@ class TextSourceRange {                  return writingMode;          }      } - -    static getNodesInRange(range) { -        const end = range.endContainer; -        const nodes = []; -        for (let node = range.startContainer; node !== null; node = TextSourceRange.getNextNode(node, true)) { -            nodes.push(node); -            if (node === end) { break; } -        } -        return nodes; -    } - -    static getNextNode(node, visitChildren) { -        let next = visitChildren ? node.firstChild : null; -        if (next === null) { -            while (true) { -                next = node.nextSibling; -                if (next !== null) { break; } - -                next = node.parentNode; -                if (next === null) { break; } - -                node = next; -            } -        } -        return next; -    } - -    static getPreviousNode(node, visitChildren) { -        let next = visitChildren ? node.lastChild : null; -        if (next === null) { -            while (true) { -                next = node.previousSibling; -                if (next !== null) { break; } - -                next = node.parentNode; -                if (next === null) { break; } - -                node = next; -            } -        } -        return next; -    } - -    static anyNodeMatchesSelector(nodeList, selector) { -        for (const node of nodeList) { -            if (TextSourceRange.nodeMatchesSelector(node, selector)) { -                return true; -            } -        } -        return false; -    } - -    static nodeMatchesSelector(node, selector) { -        for (; node !== null; node = node.parentNode) { -            if (node.nodeType === Node.ELEMENT_NODE) { -                return node.matches(selector); -            } -        } -        return false; -    }  } diff --git a/ext/manifest.json b/ext/manifest.json index 75334675..4d4f0c06 100644 --- a/ext/manifest.json +++ b/ext/manifest.json @@ -42,6 +42,7 @@              "mixed/js/dynamic-loader.js",              "mixed/js/text-scanner.js",              "fg/js/document.js", +            "fg/js/dom-text-scanner.js",              "fg/js/popup.js",              "fg/js/source.js",              "fg/js/popup-factory.js", diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 90fd1037..1d699706 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -236,7 +236,9 @@ class Display {              const {textSource, definitions} = termLookupResults;              const scannedElement = e.target; -            const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt); +            const sentenceExtent = this.options.anki.sentenceExt; +            const layoutAwareScan = this.options.scanning.layoutAwareScan; +            const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);              this.context.update({                  index: this.entryIndexFind(scannedElement), @@ -273,21 +275,22 @@ class Display {          try {              e.preventDefault(); -            const textSource = docRangeFromPoint(e.clientX, e.clientY, this.options.scanning.deepDomScan); +            const {length: scanLength, deepDomScan: deepScan, layoutAwareScan} = this.options.scanning; +            const textSource = docRangeFromPoint(e.clientX, e.clientY, deepScan);              if (textSource === null) {                  return false;              }              let definitions, length;              try { -                textSource.setEndOffset(this.options.scanning.length); +                textSource.setEndOffset(scanLength, layoutAwareScan);                  ({definitions, length} = await api.termsFind(textSource.text(), {}, this.getOptionsContext()));                  if (definitions.length === 0) {                      return false;                  } -                textSource.setEndOffset(length); +                textSource.setEndOffset(length, layoutAwareScan);              } finally {                  textSource.cleanup();              } diff --git a/ext/mixed/js/dom.js b/ext/mixed/js/dom.js index 0e8f4462..05764443 100644 --- a/ext/mixed/js/dom.js +++ b/ext/mixed/js/dom.js @@ -86,4 +86,42 @@ class DOM {              null          );      } + +    static getNodesInRange(range) { +        const end = range.endContainer; +        const nodes = []; +        for (let node = range.startContainer; node !== null; node = DOM.getNextNode(node)) { +            nodes.push(node); +            if (node === end) { break; } +        } +        return nodes; +    } + +    static getNextNode(node) { +        let next = node.firstChild; +        if (next === null) { +            while (true) { +                next = node.nextSibling; +                if (next !== null) { break; } + +                next = node.parentNode; +                if (next === null) { break; } + +                node = next; +            } +        } +        return next; +    } + +    static anyNodeMatchesSelector(nodes, selector) { +        const ELEMENT_NODE = Node.ELEMENT_NODE; +        for (let node of nodes) { +            for (; node !== null; node = node.parentNode) { +                if (node.nodeType !== ELEMENT_NODE) { continue; } +                if (node.matches(selector)) { return true; } +                break; +            } +        } +        return false; +    }  } diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js index b8688b08..fb275452 100644 --- a/ext/mixed/js/text-scanner.js +++ b/ext/mixed/js/text-scanner.js @@ -17,7 +17,6 @@  /* global   * DOM - * TextSourceRange   * docRangeFromPoint   */ @@ -119,20 +118,20 @@ class TextScanner extends EventDispatcher {          }      } -    getTextSourceContent(textSource, length) { +    getTextSourceContent(textSource, length, layoutAwareScan) {          const clonedTextSource = textSource.clone(); -        clonedTextSource.setEndOffset(length); +        clonedTextSource.setEndOffset(length, layoutAwareScan);          if (this._ignoreNodes !== null && clonedTextSource.range) {              length = clonedTextSource.text().length;              while (clonedTextSource.range && length > 0) { -                const nodes = TextSourceRange.getNodesInRange(clonedTextSource.range); -                if (!TextSourceRange.anyNodeMatchesSelector(nodes, this._ignoreNodes)) { +                const nodes = DOM.getNodesInRange(clonedTextSource.range); +                if (!DOM.anyNodeMatchesSelector(nodes, this._ignoreNodes)) {                      break;                  }                  --length; -                clonedTextSource.setEndOffset(length); +                clonedTextSource.setEndOffset(length, layoutAwareScan);              }          } |