summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-06-21 16:07:51 -0400
committerGitHub <noreply@github.com>2020-06-21 16:07:51 -0400
commite23504613f8526b90a497512c086ed48e66cde95 (patch)
tree98db1a607ba40659d727e0083f2e45032a53e3a9 /ext
parent4ebee3e17c2d536da7de33d16c2e44c54c4c8e51 (diff)
Use DOMTextScanner (#536)
* Use DOMTextScanner instead of TextSourceRange.seek* * Move getNodesInRange to dom.js * Move anyNodeMatchesSelector to dom.js * Remove unused functions * Update tests * Add layoutAwareScan option * Use layoutAwareScan for source and sentence scanning * Remove unused IGNORE_TEXT_PATTERN
Diffstat (limited to 'ext')
-rw-r--r--ext/bg/data/options-schema.json7
-rw-r--r--ext/bg/js/options.js3
-rw-r--r--ext/bg/js/search-query-parser.js8
-rw-r--r--ext/bg/search.html1
-rw-r--r--ext/bg/settings-popup-preview.html1
-rw-r--r--ext/bg/settings.html4
-rw-r--r--ext/fg/float.html1
-rw-r--r--ext/fg/js/document.js11
-rw-r--r--ext/fg/js/frontend.js14
-rw-r--r--ext/fg/js/source.js224
-rw-r--r--ext/manifest.json1
-rw-r--r--ext/mixed/js/display.js11
-rw-r--r--ext/mixed/js/dom.js38
-rw-r--r--ext/mixed/js/text-scanner.js11
14 files changed, 94 insertions, 241 deletions
diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json
index 0379fa75..5885e036 100644
--- a/ext/bg/data/options-schema.json
+++ b/ext/bg/data/options-schema.json
@@ -321,7 +321,8 @@
"enablePopupSearch",
"enableOnPopupExpressions",
"enableOnSearchPage",
- "enableSearchTags"
+ "enableSearchTags",
+ "layoutAwareScan"
],
"properties": {
"middleMouse": {
@@ -383,6 +384,10 @@
"enableSearchTags": {
"type": "boolean",
"default": false
+ },
+ "layoutAwareScan": {
+ "type": "boolean",
+ "default": false
}
}
},
diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js
index 97368a0b..170e4799 100644
--- a/ext/bg/js/options.js
+++ b/ext/bg/js/options.js
@@ -203,7 +203,8 @@ function profileOptionsCreateDefaults() {
enablePopupSearch: false,
enableOnPopupExpressions: false,
enableOnSearchPage: true,
- enableSearchTags: false
+ enableSearchTags: false,
+ layoutAwareScan: false
},
translation: {
diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js
index addfc686..97e98b40 100644
--- a/ext/bg/js/search-query-parser.js
+++ b/ext/bg/js/search-query-parser.js
@@ -75,15 +75,17 @@ class QueryParser {
async _search(textSource, cause) {
if (textSource === null) { return null; }
- const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length);
+ const {length: scanLength, layoutAwareScan} = this._options.scanning;
+ const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; }
const {definitions, length} = await api.termsFind(searchText, {}, this._getOptionsContext());
if (definitions.length === 0) { return null; }
- const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt);
+ const sentenceExtent = this._options.anki.sentenceExt;
+ const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
- textSource.setEndOffset(length);
+ textSource.setEndOffset(length, layoutAwareScan);
this._setContent('terms', {definitions, context: {
focus: false,
diff --git a/ext/bg/search.html b/ext/bg/search.html
index de08cdae..4a28dd88 100644
--- a/ext/bg/search.html
+++ b/ext/bg/search.html
@@ -79,6 +79,7 @@
<script src="/bg/js/dictionary.js"></script>
<script src="/bg/js/handlebars.js"></script>
<script src="/fg/js/document.js"></script>
+ <script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script>
diff --git a/ext/bg/settings-popup-preview.html b/ext/bg/settings-popup-preview.html
index fe92f24f..5eecd005 100644
--- a/ext/bg/settings-popup-preview.html
+++ b/ext/bg/settings-popup-preview.html
@@ -126,6 +126,7 @@
<script src="/mixed/js/text-scanner.js"></script>
<script src="/fg/js/document.js"></script>
+ <script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/popup.js"></script>
<script src="/fg/js/source.js"></script>
<script src="/fg/js/popup-factory.js"></script>
diff --git a/ext/bg/settings.html b/ext/bg/settings.html
index 118a13b9..77b61aef 100644
--- a/ext/bg/settings.html
+++ b/ext/bg/settings.html
@@ -400,6 +400,10 @@
<label><input type="checkbox" id="auto-hide-results" data-setting="scanning.autoHideResults"> Automatically hide results</label>
</div>
+ <div class="checkbox">
+ <label><input type="checkbox" id="layout-aware-scan" data-setting="scanning.layoutAwareScan"> Layout-aware scan</label>
+ </div>
+
<div class="checkbox options-advanced">
<label><input type="checkbox" id="deep-dom-scan" data-setting="scanning.deepDomScan"> Deep DOM scan</label>
</div>
diff --git a/ext/fg/float.html b/ext/fg/float.html
index 17dbcc6d..3e41cde5 100644
--- a/ext/fg/float.html
+++ b/ext/fg/float.html
@@ -46,6 +46,7 @@
<script src="/mixed/js/japanese.js"></script>
<script src="/fg/js/document.js"></script>
+ <script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script>
diff --git a/ext/fg/js/document.js b/ext/fg/js/document.js
index d639bc86..c288502c 100644
--- a/ext/fg/js/document.js
+++ b/ext/fg/js/document.js
@@ -17,6 +17,7 @@
/* global
* DOM
+ * DOMTextScanner
* TextSourceElement
* TextSourceRange
*/
@@ -152,14 +153,14 @@ function docRangeFromPoint(x, y, deepDomScan) {
}
}
-function docSentenceExtract(source, extent) {
+function docSentenceExtract(source, extent, layoutAwareScan) {
const quotesFwd = {'「': '」', '『': '』', "'": "'", '"': '"'};
const quotesBwd = {'」': '「', '』': '『', "'": "'", '"': '"'};
const terminators = '…。..??!!';
const sourceLocal = source.clone();
- const position = sourceLocal.setStartOffset(extent);
- sourceLocal.setEndOffset(extent * 2 - position, true);
+ const position = sourceLocal.setStartOffset(extent, layoutAwareScan);
+ sourceLocal.setEndOffset(extent * 2 - position, layoutAwareScan, true);
const content = sourceLocal.text();
let quoteStack = [];
@@ -232,7 +233,7 @@ function isPointInRange(x, y, range) {
const nodePre = range.endContainer;
const offsetPre = range.endOffset;
try {
- const {node, offset, content} = TextSourceRange.seekForward(range.endContainer, range.endOffset, 1);
+ const {node, offset, content} = new DOMTextScanner(range.endContainer, range.endOffset, true, false).seek(1);
range.setEnd(node, offset);
if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) {
@@ -243,7 +244,7 @@ function isPointInRange(x, y, range) {
}
// Scan backward
- const {node, offset, content} = TextSourceRange.seekBackward(range.startContainer, range.startOffset, 1);
+ const {node, offset, content} = new DOMTextScanner(range.startContainer, range.startOffset, true, false).seek(-1);
range.setStart(node, offset);
if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) {
diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js
index 70bd8a48..ab455c09 100644
--- a/ext/fg/js/frontend.js
+++ b/ext/fg/js/frontend.js
@@ -258,32 +258,36 @@ class Frontend {
}
async _findTerms(textSource, optionsContext) {
- const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length);
+ const {length: scanLength, layoutAwareScan} = this._options.scanning;
+ const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; }
const {definitions, length} = await api.termsFind(searchText, {}, optionsContext);
if (definitions.length === 0) { return null; }
- textSource.setEndOffset(length);
+ textSource.setEndOffset(length, layoutAwareScan);
return {definitions, type: 'terms'};
}
async _findKanji(textSource, optionsContext) {
- const searchText = this._textScanner.getTextSourceContent(textSource, 1);
+ const layoutAwareScan = this._options.scanning.layoutAwareScan;
+ const searchText = this._textScanner.getTextSourceContent(textSource, 1, layoutAwareScan);
if (searchText.length === 0) { return null; }
const definitions = await api.kanjiFind(searchText, optionsContext);
if (definitions.length === 0) { return null; }
- textSource.setEndOffset(1);
+ textSource.setEndOffset(1, layoutAwareScan);
return {definitions, type: 'kanji'};
}
_showContent(textSource, focus, definitions, type, optionsContext) {
const {url} = optionsContext;
- const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt);
+ const sentenceExtent = this._options.anki.sentenceExt;
+ const layoutAwareScan = this._options.scanning.layoutAwareScan;
+ const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
this._showPopupContent(
textSource,
optionsContext,
diff --git a/ext/fg/js/source.js b/ext/fg/js/source.js
index fa4706f2..38810f07 100644
--- a/ext/fg/js/source.js
+++ b/ext/fg/js/source.js
@@ -15,9 +15,9 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards
-const IGNORE_TEXT_PATTERN = /\u200c/;
-
+/* global
+ * DOMTextScanner
+ */
/*
* TextSourceRange
@@ -46,19 +46,19 @@ class TextSourceRange {
return this.content;
}
- setEndOffset(length, fromEnd=false) {
+ setEndOffset(length, layoutAwareScan, fromEnd=false) {
const state = (
fromEnd ?
- TextSourceRange.seekForward(this.range.endContainer, this.range.endOffset, length) :
- TextSourceRange.seekForward(this.range.startContainer, this.range.startOffset, length)
+ new DOMTextScanner(this.range.endContainer, this.range.endOffset, !layoutAwareScan, layoutAwareScan).seek(length) :
+ new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(length)
);
this.range.setEnd(state.node, state.offset);
this.content = (fromEnd ? this.content + state.content : state.content);
return length - state.remainder;
}
- setStartOffset(length) {
- const state = TextSourceRange.seekBackward(this.range.startContainer, this.range.startOffset, length);
+ setStartOffset(length, layoutAwareScan) {
+ const state = new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length);
this.range.setStart(state.node, state.offset);
this.rangeStartOffset = this.range.startOffset;
this.content = state.content + this.content;
@@ -110,154 +110,6 @@ class TextSourceRange {
}
}
- static shouldEnter(node) {
- switch (node.nodeName.toUpperCase()) {
- case 'RT':
- case 'SCRIPT':
- case 'STYLE':
- return false;
- }
-
- const style = window.getComputedStyle(node);
- return !(
- style.visibility === 'hidden' ||
- style.display === 'none' ||
- parseFloat(style.fontSize) === 0
- );
- }
-
- static getRubyElement(node) {
- node = TextSourceRange.getParentElement(node);
- if (node !== null && node.nodeName.toUpperCase() === 'RT') {
- node = node.parentNode;
- return (node !== null && node.nodeName.toUpperCase() === 'RUBY') ? node : null;
- }
- return null;
- }
-
- static seekForward(node, offset, length) {
- const state = {node, offset, remainder: length, content: ''};
- if (length <= 0) {
- return state;
- }
-
- const TEXT_NODE = Node.TEXT_NODE;
- const ELEMENT_NODE = Node.ELEMENT_NODE;
- let resetOffset = false;
-
- const ruby = TextSourceRange.getRubyElement(node);
- if (ruby !== null) {
- node = ruby;
- resetOffset = true;
- }
-
- while (node !== null) {
- let visitChildren = true;
- const nodeType = node.nodeType;
-
- if (nodeType === TEXT_NODE) {
- state.node = node;
- if (TextSourceRange.seekForwardTextNode(state, resetOffset)) {
- break;
- }
- resetOffset = true;
- } else if (nodeType === ELEMENT_NODE) {
- visitChildren = TextSourceRange.shouldEnter(node);
- }
-
- node = TextSourceRange.getNextNode(node, visitChildren);
- }
-
- return state;
- }
-
- static seekForwardTextNode(state, resetOffset) {
- const nodeValue = state.node.nodeValue;
- const nodeValueLength = nodeValue.length;
- let content = state.content;
- let offset = resetOffset ? 0 : state.offset;
- let remainder = state.remainder;
- let result = false;
-
- for (; offset < nodeValueLength; ++offset) {
- const c = nodeValue[offset];
- if (!IGNORE_TEXT_PATTERN.test(c)) {
- content += c;
- if (--remainder <= 0) {
- result = true;
- ++offset;
- break;
- }
- }
- }
-
- state.offset = offset;
- state.content = content;
- state.remainder = remainder;
- return result;
- }
-
- static seekBackward(node, offset, length) {
- const state = {node, offset, remainder: length, content: ''};
- if (length <= 0) {
- return state;
- }
-
- const TEXT_NODE = Node.TEXT_NODE;
- const ELEMENT_NODE = Node.ELEMENT_NODE;
- let resetOffset = false;
-
- const ruby = TextSourceRange.getRubyElement(node);
- if (ruby !== null) {
- node = ruby;
- resetOffset = true;
- }
-
- while (node !== null) {
- let visitChildren = true;
- const nodeType = node.nodeType;
-
- if (nodeType === TEXT_NODE) {
- state.node = node;
- if (TextSourceRange.seekBackwardTextNode(state, resetOffset)) {
- break;
- }
- resetOffset = true;
- } else if (nodeType === ELEMENT_NODE) {
- visitChildren = TextSourceRange.shouldEnter(node);
- }
-
- node = TextSourceRange.getPreviousNode(node, visitChildren);
- }
-
- return state;
- }
-
- static seekBackwardTextNode(state, resetOffset) {
- const nodeValue = state.node.nodeValue;
- let content = state.content;
- let offset = resetOffset ? nodeValue.length : state.offset;
- let remainder = state.remainder;
- let result = false;
-
- for (; offset > 0; --offset) {
- const c = nodeValue[offset - 1];
- if (!IGNORE_TEXT_PATTERN.test(c)) {
- content = c + content;
- if (--remainder <= 0) {
- result = true;
- --offset;
- break;
- }
- }
- }
-
- state.offset = offset;
- state.content = content;
- state.remainder = remainder;
- return result;
- }
-
static getParentElement(node) {
while (node !== null && node.nodeType !== Node.ELEMENT_NODE) {
node = node.parentNode;
@@ -290,66 +142,6 @@ class TextSourceRange {
return writingMode;
}
}
-
- static getNodesInRange(range) {
- const end = range.endContainer;
- const nodes = [];
- for (let node = range.startContainer; node !== null; node = TextSourceRange.getNextNode(node, true)) {
- nodes.push(node);
- if (node === end) { break; }
- }
- return nodes;
- }
-
- static getNextNode(node, visitChildren) {
- let next = visitChildren ? node.firstChild : null;
- if (next === null) {
- while (true) {
- next = node.nextSibling;
- if (next !== null) { break; }
-
- next = node.parentNode;
- if (next === null) { break; }
-
- node = next;
- }
- }
- return next;
- }
-
- static getPreviousNode(node, visitChildren) {
- let next = visitChildren ? node.lastChild : null;
- if (next === null) {
- while (true) {
- next = node.previousSibling;
- if (next !== null) { break; }
-
- next = node.parentNode;
- if (next === null) { break; }
-
- node = next;
- }
- }
- return next;
- }
-
- static anyNodeMatchesSelector(nodeList, selector) {
- for (const node of nodeList) {
- if (TextSourceRange.nodeMatchesSelector(node, selector)) {
- return true;
- }
- }
- return false;
- }
-
- static nodeMatchesSelector(node, selector) {
- for (; node !== null; node = node.parentNode) {
- if (node.nodeType === Node.ELEMENT_NODE) {
- return node.matches(selector);
- }
- }
- return false;
- }
}
diff --git a/ext/manifest.json b/ext/manifest.json
index 75334675..4d4f0c06 100644
--- a/ext/manifest.json
+++ b/ext/manifest.json
@@ -42,6 +42,7 @@
"mixed/js/dynamic-loader.js",
"mixed/js/text-scanner.js",
"fg/js/document.js",
+ "fg/js/dom-text-scanner.js",
"fg/js/popup.js",
"fg/js/source.js",
"fg/js/popup-factory.js",
diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js
index 90fd1037..1d699706 100644
--- a/ext/mixed/js/display.js
+++ b/ext/mixed/js/display.js
@@ -236,7 +236,9 @@ class Display {
const {textSource, definitions} = termLookupResults;
const scannedElement = e.target;
- const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt);
+ const sentenceExtent = this.options.anki.sentenceExt;
+ const layoutAwareScan = this.options.scanning.layoutAwareScan;
+ const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
this.context.update({
index: this.entryIndexFind(scannedElement),
@@ -273,21 +275,22 @@ class Display {
try {
e.preventDefault();
- const textSource = docRangeFromPoint(e.clientX, e.clientY, this.options.scanning.deepDomScan);
+ const {length: scanLength, deepDomScan: deepScan, layoutAwareScan} = this.options.scanning;
+ const textSource = docRangeFromPoint(e.clientX, e.clientY, deepScan);
if (textSource === null) {
return false;
}
let definitions, length;
try {
- textSource.setEndOffset(this.options.scanning.length);
+ textSource.setEndOffset(scanLength, layoutAwareScan);
({definitions, length} = await api.termsFind(textSource.text(), {}, this.getOptionsContext()));
if (definitions.length === 0) {
return false;
}
- textSource.setEndOffset(length);
+ textSource.setEndOffset(length, layoutAwareScan);
} finally {
textSource.cleanup();
}
diff --git a/ext/mixed/js/dom.js b/ext/mixed/js/dom.js
index 0e8f4462..05764443 100644
--- a/ext/mixed/js/dom.js
+++ b/ext/mixed/js/dom.js
@@ -86,4 +86,42 @@ class DOM {
null
);
}
+
+ static getNodesInRange(range) {
+ const end = range.endContainer;
+ const nodes = [];
+ for (let node = range.startContainer; node !== null; node = DOM.getNextNode(node)) {
+ nodes.push(node);
+ if (node === end) { break; }
+ }
+ return nodes;
+ }
+
+ static getNextNode(node) {
+ let next = node.firstChild;
+ if (next === null) {
+ while (true) {
+ next = node.nextSibling;
+ if (next !== null) { break; }
+
+ next = node.parentNode;
+ if (next === null) { break; }
+
+ node = next;
+ }
+ }
+ return next;
+ }
+
+ static anyNodeMatchesSelector(nodes, selector) {
+ const ELEMENT_NODE = Node.ELEMENT_NODE;
+ for (let node of nodes) {
+ for (; node !== null; node = node.parentNode) {
+ if (node.nodeType !== ELEMENT_NODE) { continue; }
+ if (node.matches(selector)) { return true; }
+ break;
+ }
+ }
+ return false;
+ }
}
diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js
index b8688b08..fb275452 100644
--- a/ext/mixed/js/text-scanner.js
+++ b/ext/mixed/js/text-scanner.js
@@ -17,7 +17,6 @@
/* global
* DOM
- * TextSourceRange
* docRangeFromPoint
*/
@@ -119,20 +118,20 @@ class TextScanner extends EventDispatcher {
}
}
- getTextSourceContent(textSource, length) {
+ getTextSourceContent(textSource, length, layoutAwareScan) {
const clonedTextSource = textSource.clone();
- clonedTextSource.setEndOffset(length);
+ clonedTextSource.setEndOffset(length, layoutAwareScan);
if (this._ignoreNodes !== null && clonedTextSource.range) {
length = clonedTextSource.text().length;
while (clonedTextSource.range && length > 0) {
- const nodes = TextSourceRange.getNodesInRange(clonedTextSource.range);
- if (!TextSourceRange.anyNodeMatchesSelector(nodes, this._ignoreNodes)) {
+ const nodes = DOM.getNodesInRange(clonedTextSource.range);
+ if (!DOM.anyNodeMatchesSelector(nodes, this._ignoreNodes)) {
break;
}
--length;
- clonedTextSource.setEndOffset(length);
+ clonedTextSource.setEndOffset(length, layoutAwareScan);
}
}