aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/fg/js/dom-text-scanner.js538
-rw-r--r--test/data/html/test-dom-text-scanner.html393
-rw-r--r--test/data/html/test-stylesheet.css12
-rw-r--r--test/test-dom-text-scanner.js181
4 files changed, 1121 insertions, 3 deletions
diff --git a/ext/fg/js/dom-text-scanner.js b/ext/fg/js/dom-text-scanner.js
new file mode 100644
index 00000000..2de65041
--- /dev/null
+++ b/ext/fg/js/dom-text-scanner.js
@@ -0,0 +1,538 @@
+/*
+ * Copyright (C) 2020 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+ * A class used to scan text in a document.
+ */
+class DOMTextScanner {
+ /**
+ * Creates a new instance of a DOMTextScanner.
+ * @param node The DOM Node to start at.
+ * @param offset The character offset in to start at when node is a text node.
+ * Use 0 for non-text nodes.
+ */
+ constructor(node, offset, forcePreserveWhitespace=false, generateLayoutContent=true) {
+ const ruby = DOMTextScanner.getParentRubyElement(node);
+ const resetOffset = (ruby !== null);
+ if (resetOffset) { node = ruby; }
+
+ this._node = node;
+ this._offset = offset;
+ this._content = '';
+ this._remainder = 0;
+ this._resetOffset = resetOffset;
+ this._newlines = 0;
+ this._lineHasWhitespace = false;
+ this._lineHasContent = false;
+ this._forcePreserveWhitespace = forcePreserveWhitespace;
+ this._generateLayoutContent = generateLayoutContent;
+ }
+
+ /**
+ * Gets the current node being scanned.
+ * @returns A DOM Node.
+ */
+ get node() {
+ return this._node;
+ }
+
+ /**
+ * Gets the current offset corresponding to the node being scanned.
+ * This value is only applicable for text nodes.
+ * @returns An integer.
+ */
+ get offset() {
+ return this._offset;
+ }
+
+ /**
+ * Gets the accumulated content string resulting from calls to seek().
+ * @returns A string.
+ */
+ get content() {
+ return this._content;
+ }
+
+ /**
+ * Seeks a given length in the document and accumulates the text content.
+ * @param length A positive or negative integer corresponding to how many characters
+ * should be added to content. Content is only added to the accumulation string,
+ * never removed, so mixing seek calls with differently signed length values
+ * may give unexpected results.
+ * @returns this
+ */
+ seek(length) {
+ const forward = (length >= 0);
+ this._remainder = (forward ? length : -length);
+ if (length === 0) { return this; }
+
+ const TEXT_NODE = Node.TEXT_NODE;
+ const ELEMENT_NODE = Node.ELEMENT_NODE;
+
+ const generateLayoutContent = this._generateLayoutContent;
+ let node = this._node;
+ let resetOffset = this._resetOffset;
+ let newlines = 0;
+ while (node !== null) {
+ let enterable = false;
+ const nodeType = node.nodeType;
+
+ if (nodeType === TEXT_NODE) {
+ if (!(
+ forward ?
+ this._seekTextNodeForward(node, resetOffset) :
+ this._seekTextNodeBackward(node, resetOffset)
+ )) {
+ // Length reached
+ break;
+ }
+ } else if (nodeType === ELEMENT_NODE) {
+ [enterable, newlines] = DOMTextScanner.getElementSeekInfo(node);
+ if (newlines > this._newlines && generateLayoutContent) {
+ this._newlines = newlines;
+ }
+ }
+
+ const exitedNodes = [];
+ node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes);
+
+ for (const exitedNode of exitedNodes) {
+ if (exitedNode.nodeType !== ELEMENT_NODE) { continue; }
+ newlines = DOMTextScanner.getElementSeekInfo(exitedNode)[1];
+ if (newlines > this._newlines && generateLayoutContent) {
+ this._newlines = newlines;
+ }
+ }
+
+ resetOffset = true;
+ }
+
+ this._node = node;
+ this._resetOffset = resetOffset;
+
+ return this;
+ }
+
+ // Private
+
+ /**
+ * Seeks forward in a text node.
+ * @param textNode The text node to use.
+ * @param resetOffset Whether or not the text offset should be reset.
+ * @returns true if scanning should continue, or false if the scan length has been reached.
+ */
+ _seekTextNodeForward(textNode, resetOffset) {
+ const nodeValue = textNode.nodeValue;
+ const nodeValueLength = nodeValue.length;
+ const [preserveNewlines, preserveWhitespace] = (
+ this._forcePreserveWhitespace ?
+ [true, true] :
+ DOMTextScanner.getWhitespaceSettings(textNode)
+ );
+
+ let lineHasWhitespace = this._lineHasWhitespace;
+ let lineHasContent = this._lineHasContent;
+ let content = this._content;
+ let offset = resetOffset ? 0 : this._offset;
+ let remainder = this._remainder;
+ let newlines = this._newlines;
+
+ while (offset < nodeValueLength) {
+ const char = nodeValue[offset];
+ const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
+ ++offset;
+
+ if (charAttributes === 0) {
+ // Character should be ignored
+ continue;
+ } else if (charAttributes === 1) {
+ // Character is collapsable whitespace
+ lineHasWhitespace = true;
+ } else {
+ // Character should be added to the content
+ if (newlines > 0) {
+ if (content.length > 0) {
+ const useNewlineCount = Math.min(remainder, newlines);
+ content += '\n'.repeat(useNewlineCount);
+ remainder -= useNewlineCount;
+ newlines -= useNewlineCount;
+ } else {
+ newlines = 0;
+ }
+ lineHasContent = false;
+ lineHasWhitespace = false;
+ if (remainder <= 0) {
+ --offset; // Revert character offset
+ break;
+ }
+ }
+
+ lineHasContent = (charAttributes === 2); // 3 = character is a newline
+
+ if (lineHasWhitespace) {
+ if (lineHasContent) {
+ content += ' ';
+ lineHasWhitespace = false;
+ if (--remainder <= 0) {
+ --offset; // Revert character offset
+ break;
+ }
+ } else {
+ lineHasWhitespace = false;
+ }
+ }
+
+ content += char;
+
+ if (--remainder <= 0) { break; }
+ }
+ }
+
+ this._lineHasWhitespace = lineHasWhitespace;
+ this._lineHasContent = lineHasContent;
+ this._content = content;
+ this._offset = offset;
+ this._remainder = remainder;
+ this._newlines = newlines;
+
+ return (remainder > 0);
+ }
+
+ /**
+ * Seeks backward in a text node.
+ * This function is nearly the same as _seekTextNodeForward, with the following differences:
+ * - Iteration condition is reversed to check if offset is greater than 0.
+ * - offset is reset to nodeValueLength instead of 0.
+ * - offset is decremented instead of incremented.
+ * - offset is decremented before getting the character.
+ * - offset is reverted by incrementing instead of decrementing.
+ * - content string is prepended instead of appended.
+ * @param textNode The text node to use.
+ * @param resetOffset Whether or not the text offset should be reset.
+ * @returns true if scanning should continue, or false if the scan length has been reached.
+ */
+ _seekTextNodeBackward(textNode, resetOffset) {
+ const nodeValue = textNode.nodeValue;
+ const nodeValueLength = nodeValue.length;
+ const [preserveNewlines, preserveWhitespace] = (
+ this._forcePreserveWhitespace ?
+ [true, true] :
+ DOMTextScanner.getWhitespaceSettings(textNode)
+ );
+
+ let lineHasWhitespace = this._lineHasWhitespace;
+ let lineHasContent = this._lineHasContent;
+ let content = this._content;
+ let offset = resetOffset ? nodeValueLength : this._offset;
+ let remainder = this._remainder;
+ let newlines = this._newlines;
+
+ while (offset > 0) {
+ --offset;
+ const char = nodeValue[offset];
+ const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
+
+ if (charAttributes === 0) {
+ // Character should be ignored
+ continue;
+ } else if (charAttributes === 1) {
+ // Character is collapsable whitespace
+ lineHasWhitespace = true;
+ } else {
+ // Character should be added to the content
+ if (newlines > 0) {
+ if (content.length > 0) {
+ const useNewlineCount = Math.min(remainder, newlines);
+ content = '\n'.repeat(useNewlineCount) + content;
+ remainder -= useNewlineCount;
+ newlines -= useNewlineCount;
+ } else {
+ newlines = 0;
+ }
+ lineHasContent = false;
+ lineHasWhitespace = false;
+ if (remainder <= 0) {
+ ++offset; // Revert character offset
+ break;
+ }
+ }
+
+ lineHasContent = (charAttributes === 2); // 3 = character is a newline
+
+ if (lineHasWhitespace) {
+ if (lineHasContent) {
+ content = ' ' + content;
+ lineHasWhitespace = false;
+ if (--remainder <= 0) {
+ ++offset; // Revert character offset
+ break;
+ }
+ } else {
+ lineHasWhitespace = false;
+ }
+ }
+
+ content = char + content;
+
+ if (--remainder <= 0) { break; }
+ }
+ }
+
+ this._lineHasWhitespace = lineHasWhitespace;
+ this._lineHasContent = lineHasContent;
+ this._content = content;
+ this._offset = offset;
+ this._remainder = remainder;
+ this._newlines = newlines;
+
+ return (remainder > 0);
+ }
+
+ // Static helpers
+
+ /**
+ * Gets the next node in the document for a specified scanning direction.
+ * @param node The current DOM Node.
+ * @param forward Whether to scan forward in the document or backward.
+ * @param visitChildren Whether the children of the current node should be visited.
+ * @param exitedNodes An array which stores nodes which were exited.
+ * @returns The next node in the document, or null if there is no next node.
+ */
+ static getNextNode(node, forward, visitChildren, exitedNodes) {
+ let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null;
+ if (next === null) {
+ while (true) {
+ exitedNodes.push(node);
+
+ next = (forward ? node.nextSibling : node.previousSibling);
+ if (next !== null) { break; }
+
+ next = node.parentNode;
+ if (next === null) { break; }
+
+ node = next;
+ }
+ }
+ return next;
+ }
+
+ /**
+ * Gets the parent element of a given Node.
+ * @param node The node to check.
+ * @returns The parent element if one exists, otherwise null.
+ */
+ static getParentElement(node) {
+ while (node !== null && node.nodeType !== Node.ELEMENT_NODE) {
+ node = node.parentNode;
+ }
+ return node;
+ }
+
+ /**
+ * Gets the parent <ruby> element of a given node, if one exists. For efficiency purposes,
+ * this only checks the immediate parent elements and does not check all ancestors, so
+ * there are cases where the node may be in a ruby element but it is not returned.
+ * @param node The node to check.
+ * @returns A <ruby> node if the input node is contained in one, otherwise null.
+ */
+ static getParentRubyElement(node) {
+ node = DOMTextScanner.getParentElement(node);
+ if (node !== null && node.nodeName.toUpperCase() === 'RT') {
+ node = node.parentNode;
+ if (node !== null && node.nodeName.toUpperCase() === 'RUBY') {
+ return node;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * @returns [enterable: boolean, newlines: integer]
+ * The enterable value indicates whether the content of this node should be entered.
+ * The newlines value corresponds to the number of newline characters that should be added.
+ * 1 newline corresponds to a simple new line in the layout.
+ * 2 newlines corresponds to a significant visual distinction since the previous content.
+ */
+ static getElementSeekInfo(element) {
+ let enterable = true;
+ switch (element.nodeName.toUpperCase()) {
+ case 'HEAD':
+ case 'RT':
+ case 'SCRIPT':
+ case 'STYLE':
+ return [false, 0];
+ case 'BR':
+ return [false, 1];
+ case 'TEXTAREA':
+ case 'INPUT':
+ case 'BUTTON':
+ enterable = false;
+ break;
+ }
+
+ const style = window.getComputedStyle(element);
+ const display = style.display;
+
+ const visible = (display !== 'none' && DOMTextScanner.isStyleVisible(style));
+ let newlines = 0;
+
+ if (!visible) {
+ enterable = false;
+ } else {
+ switch (style.position) {
+ case 'absolute':
+ case 'fixed':
+ case 'sticky':
+ newlines = 2;
+ break;
+ }
+ if (newlines === 0 && DOMTextScanner.doesCSSDisplayChangeLayout(display)) {
+ newlines = 1;
+ }
+ }
+
+ return [enterable, newlines];
+ }
+
+ /**
+ * Gets information about how whitespace characters are treated.
+ * @param textNode The Text node to check.
+ * @returns [preserveNewlines: boolean, preserveWhitespace: boolean]
+ * The value of preserveNewlines indicates whether or not newline characters are treated as line breaks.
+ * The value of preserveWhitespace indicates whether or not sequences of whitespace characters are collapsed.
+ */
+ static getWhitespaceSettings(textNode) {
+ const element = DOMTextScanner.getParentElement(textNode);
+ if (element !== null) {
+ const style = window.getComputedStyle(element);
+ switch (style.whiteSpace) {
+ case 'pre':
+ case 'pre-wrap':
+ case 'break-spaces':
+ return [true, true];
+ case 'pre-line':
+ return [true, false];
+ }
+ }
+ return [false, false];
+ }
+
+ /**
+ * Gets attributes for the specified character.
+ * @param character A string containing a single character.
+ * @returns An integer representing the attributes of the character.
+ * 0: Character should be ignored.
+ * 1: Character is collapsable whitespace.
+ * 2: Character should be added to the content.
+ * 3: Character should be added to the content and is a newline.
+ */
+ static getCharacterAttributes(character, preserveNewlines, preserveWhitespace) {
+ switch (character.charCodeAt(0)) {
+ case 0x09: // Tab ('\t')
+ case 0x0c: // Form feed ('\f')
+ case 0x0d: // Carriage return ('\r')
+ case 0x20: // Space (' ')
+ return preserveWhitespace ? 2 : 1;
+ case 0x0a: // Line feed ('\n')
+ return preserveNewlines ? 3 : 1;
+ case 0x200c: // Zero-width non-joiner ('\u200c')
+ return 0;
+ default: // Other
+ return 2;
+ }
+ }
+
+ /**
+ * Checks whether a given style is visible or not.
+ * This function does not check style.display === 'none'.
+ * @param style An object implementing the CSSStyleDeclaration interface.
+ * @returns true if the style should result in an element being visible, otherwise false.
+ */
+ static isStyleVisible(style) {
+ return !(
+ style.visibility === 'hidden' ||
+ parseFloat(style.opacity) <= 0 ||
+ parseFloat(style.fontSize) <= 0 ||
+ (
+ !DOMTextScanner.isStyleSelectable(style) &&
+ (
+ DOMTextScanner.isCSSColorTransparent(style.color) ||
+ DOMTextScanner.isCSSColorTransparent(style.webkitTextFillColor)
+ )
+ )
+ );
+ }
+
+ /**
+ * Checks whether a given style is selectable or not.
+ * @param style An object implementing the CSSStyleDeclaration interface.
+ * @returns true if the style is selectable, otherwise false.
+ */
+ static isStyleSelectable(style) {
+ return !(
+ style.userSelect === 'none' ||
+ style.webkitUserSelect === 'none' ||
+ style.MozUserSelect === 'none' ||
+ style.msUserSelect === 'none'
+ );
+ }
+
+ /**
+ * Checks whether a CSS color is transparent or not.
+ * @param cssColor A CSS color string, expected to be encoded in rgb(a) form.
+ * @returns true if the color is transparent, otherwise false.
+ */
+ static isCSSColorTransparent(cssColor) {
+ return (
+ typeof cssColor === 'string' &&
+ cssColor.startsWith('rgba(') &&
+ /,\s*0.?0*\)$/.test(cssColor)
+ );
+ }
+
+ /**
+ * Checks whether a CSS display value will cause a layout change for text.
+ * @param cssDisplay A CSS string corresponding to the value of the display property.
+ * @returns true if the layout is changed by this value, otherwise false.
+ */
+ static doesCSSDisplayChangeLayout(cssDisplay) {
+ let pos = cssDisplay.indexOf(' ');
+ if (pos >= 0) {
+ // Truncate to <display-outside> part
+ cssDisplay = cssDisplay.substring(0, pos);
+ }
+
+ pos = cssDisplay.indexOf('-');
+ if (pos >= 0) {
+ // Truncate to first part of kebab-case value
+ cssDisplay = cssDisplay.substring(0, pos);
+ }
+
+ switch (cssDisplay) {
+ case 'block':
+ case 'flex':
+ case 'grid':
+ case 'list': // list-item
+ case 'table': // table, table-*
+ return true;
+ case 'ruby': // rubt-*
+ return (pos >= 0);
+ default:
+ return false;
+ }
+ }
+}
diff --git a/test/data/html/test-dom-text-scanner.html b/test/data/html/test-dom-text-scanner.html
new file mode 100644
index 00000000..6b78570a
--- /dev/null
+++ b/test/data/html/test-dom-text-scanner.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
+ <title>Yomichan DOMTextScanner Tests</title>
+ <link rel="icon" type="image/gif" href="" />
+ <link rel="stylesheet" href="test-stylesheet.css" />
+ </head>
+<body>
+
+ <h1>Yomichan DOMTextScanner Tests</h1>
+
+ <y-test
+ data-test-data='{
+ "node": "div:nth-of-type(1)",
+ "offset": 0,
+ "length": 15,
+ "expected": {
+ "node": "div:nth-of-type(2)>div::text",
+ "offset": 3,
+ "content": "小ぢん\nまり1\n小ぢん\nまり2"
+ }
+ }'
+ >
+ <y-description>Layout newlines expected due to entering and exiting display:block nodes.</y-description>
+<div><div>小ぢん</div>まり1</div>
+<div>小ぢん<div>まり2</div></div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 13,
+ "expected": {
+ "node": "div:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>Layout newline expected due to sequential display:block elements.</y-description>
+<div>小ぢんまり1</div><div>小ぢんまり2</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 13,
+ "expected": {
+ "node": "div:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>Layout newline expected due to sequential display:block elements separated by a newline.</y-description>
+<div>小ぢんまり1</div>
+<div>小ぢんまり2</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "span:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 12,
+ "expected": {
+ "node": "span:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>No newlines expected due to display:inline.</y-description>
+<span>小ぢんまり1</span><span>小ぢんまり2</span>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "span:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 13,
+ "expected": {
+ "node": "span:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>No newlines expected due to white-space:normal.</y-description>
+<span>小ぢんまり1</span>
+<span>小ぢんまり2</span>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "span:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 13,
+ "expected": {
+ "node": "span:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>Newline expected due to white-space:pre.</y-description>
+<pre>
+<span>小ぢんまり1</span>
+<span>小ぢんまり2</span>
+</pre>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "span:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 12,
+ "expected": {
+ "node": "span:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>No newlines expected due to display:inline-block. Actual layout flow cannot be determined by DOM/CSS alone.</y-description>
+<span style="display: inline-block;">小ぢんまり1</span><span style="display: inline-block;">小ぢんまり2</span>
+ </y-test>
+
+ <y-test
+ style="position: relative;"
+ data-test-data='{
+ "node": "div:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 13,
+ "expected": {
+ "node": "div:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>Single newline expected due to display:block layout.</y-description>
+<div>小ぢんまり1</div><div style="position: relative;">小ぢんまり2</div>
+ </y-test>
+
+ <y-test
+ style="position: relative; overflow: hidden;"
+ data-test-data='{
+ "node": "div:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 14,
+ "expected": {
+ "node": "div:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>Two newlines expected due to position:absolute causing a significant layout change.</y-description>
+<div>小ぢんまり1</div><div style="position: absolute;">小ぢんまり2</div>
+ </y-test>
+
+ <y-test
+ style="position: relative; overflow: hidden;"
+ data-test-data='{
+ "node": "div:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 14,
+ "expected": {
+ "node": "div:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>Two newlines expected due to position:fixed causing a significant layout change.</y-description>
+<div>小ぢんまり1</div><div style="position: fixed;">小ぢんまり2</div>
+ </y-test>
+
+ <y-test
+ style="position: relative;"
+ data-test-data='{
+ "node": "div:nth-of-type(1)::text",
+ "offset": 0,
+ "length": 14,
+ "expected": {
+ "node": "div:nth-of-type(2)::text",
+ "offset": 6,
+ "content": "小ぢんまり1\n\n小ぢんまり2"
+ }
+ }'
+ >
+ <y-description>Two newlines expected due to position:sticky being able to cause a significant layout change.</y-description>
+<div>小ぢんまり1</div><div style="position: sticky;">小ぢんまり2</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "rt",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::text",
+ "offset": 5,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Scanning text starting in an &lt;rt&gt; element. Should start scanning at the start of the &lt;ruby&gt; tag instead.</y-description>
+<div><ruby>小<rp>(</rp><rt>こ</rt><rp>)</rp></ruby>ぢんまり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip &lt;script&gt; content.</y-description>
+<div>小ぢん<script>/*comment*/</script>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip &lt;style&gt; content.</y-description>
+<div>小ぢん<style>/*comment*/</style>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip &lt;textarea&gt; content.</y-description>
+<div>小ぢん<textarea>textarea content</textarea>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip &lt;input&gt; content.</y-description>
+<div>小ぢん<input value="content" />まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip &lt;button&gt; content.</y-description>
+<div>小ぢん<button>content</button>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip content with font-size:0.</y-description>
+<div>小ぢん<span style="font-size: 0;">content</span>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip content with opacity:0.</y-description>
+<div>小ぢん<span style="opacity: 0;">content</span>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip content with visibility:hidden.</y-description>
+<div>小ぢん<span style="visibility: hidden;">content</span>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip content with display:none.</y-description>
+<div>小ぢん<span style="display: none;">content</span>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Don't skip content with user-select:none.</y-description>
+<div>小ぢ<span style="user-select: none;">ん</span>まり1</div>
+ </y-test>
+
+ <y-test
+ data-test-data='{
+ "node": "div",
+ "offset": 0,
+ "length": 6,
+ "expected": {
+ "node": "div::nth-text(2)",
+ "offset": 3,
+ "content": "小ぢんまり1"
+ }
+ }'
+ >
+ <y-description>Skip content with user-select:none <em>and</em> a transparent color.</y-description>
+<div>小ぢん<span style="user-select: none; color: rgba(0, 0, 0, 0);">content</span>まり1</div>
+ </y-test>
+
+</body>
+</html> \ No newline at end of file
diff --git a/test/data/html/test-stylesheet.css b/test/data/html/test-stylesheet.css
index f63d2481..2e9a2f52 100644
--- a/test/data/html/test-stylesheet.css
+++ b/test/data/html/test-stylesheet.css
@@ -28,7 +28,9 @@ a, a:visited {
text-decoration: underline;
}
-.test {
+.test,
+y-test {
+ display: block;
background-color: #ffffff;
margin: 1em 0;
padding: 0.5em;
@@ -36,7 +38,8 @@ a, a:visited {
border-radius: 4px;
}
-.test:before {
+.test:before,
+y-test:before {
content: "Test " counter(test-id);
display: block;
counter-increment: test-id;
@@ -45,7 +48,10 @@ a, a:visited {
font-weight: bold;
}
-.description {
+.description,
+y-description {
color: #444444;
font-style: italic;
+ display: block;
+ padding-bottom: 0.5em;
}
diff --git a/test/test-dom-text-scanner.js b/test/test-dom-text-scanner.js
new file mode 100644
index 00000000..41d6e307
--- /dev/null
+++ b/test/test-dom-text-scanner.js
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2020 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+const fs = require('fs');
+const path = require('path');
+const assert = require('assert');
+const {JSDOM} = require('jsdom');
+const {VM} = require('./yomichan-vm');
+
+
+function createJSDOM(fileName) {
+ const domSource = fs.readFileSync(fileName, {encoding: 'utf8'});
+ return new JSDOM(domSource);
+}
+
+function querySelectorTextNode(element, selector) {
+ let textIndex = -1;
+ const match = /::text$|::nth-text\((\d+)\)$/.exec(selector);
+ if (match !== null) {
+ textIndex = (match[1] ? parseInt(match[1], 10) - 1 : 0);
+ selector = selector.substring(0, selector.length - match[0].length);
+ }
+ const result = element.querySelector(selector);
+ if (textIndex < 0) {
+ return result;
+ }
+ for (let n = result.firstChild; n !== null; n = n.nextSibling) {
+ if (n.nodeType === n.constructor.TEXT_NODE) {
+ if (textIndex === 0) {
+ return n;
+ }
+ --textIndex;
+ }
+ }
+ return null;
+}
+
+
+function getComputedFontSizeInPixels(window, getComputedStyle, element) {
+ for (; element !== null; element = element.parentNode) {
+ if (element.nodeType === window.Node.ELEMENT_NODE) {
+ const fontSize = getComputedStyle(element).fontSize;
+ if (fontSize.endsWith('px')) {
+ const value = parseFloat(fontSize.substring(0, fontSize.length - 2));
+ return value;
+ }
+ }
+ }
+ const defaultFontSize = 14;
+ return defaultFontSize;
+}
+
+function createAbsoluteGetComputedStyle(window) {
+ // Wrapper to convert em units to px units
+ const getComputedStyleOld = window.getComputedStyle.bind(window);
+ return (element, ...args) => {
+ const style = getComputedStyleOld(element, ...args);
+ return new Proxy(style, {
+ get: (target, property) => {
+ let result = target[property];
+ if (typeof result === 'string') {
+ result = result.replace(/([-+]?\d(?:\.\d)?(?:[eE][-+]?\d+)?)em/g, (g0, g1) => {
+ const fontSize = getComputedFontSizeInPixels(window, getComputedStyleOld, element);
+ return `${parseFloat(g1) * fontSize}px`;
+ });
+ }
+ return result;
+ }
+ });
+ };
+}
+
+
+async function testDomTextScanner(dom, {DOMTextScanner}) {
+ const document = dom.window.document;
+ for (const testElement of document.querySelectorAll('y-test')) {
+ let testData = JSON.parse(testElement.dataset.testData);
+ if (!Array.isArray(testData)) {
+ testData = [testData];
+ }
+ for (const testDataItem of testData) {
+ let {
+ node,
+ offset,
+ length,
+ forcePreserveWhitespace,
+ generateLayoutContent,
+ reversible,
+ expected: {
+ node: expectedNode,
+ offset: expectedOffset,
+ content: expectedContent
+ }
+ } = testDataItem;
+
+ node = querySelectorTextNode(testElement, node);
+ expectedNode = querySelectorTextNode(testElement, expectedNode);
+
+ // Standard test
+ {
+ const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent);
+ scanner.seek(length);
+
+ const {node: actualNode1, offset: actualOffset1, content: actualContent1} = scanner;
+ assert.strictEqual(actualContent1, expectedContent);
+ assert.strictEqual(actualOffset1, expectedOffset);
+ assert.strictEqual(actualNode1, expectedNode);
+ }
+
+ // Substring tests
+ for (let i = 1; i <= length; ++i) {
+ const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent);
+ scanner.seek(length - i);
+
+ const {content: actualContent} = scanner;
+ assert.strictEqual(actualContent, expectedContent.substring(0, expectedContent.length - i));
+ }
+
+ if (reversible === false) { continue; }
+
+ // Reversed test
+ {
+ const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent);
+ scanner.seek(-length);
+
+ const {content: actualContent} = scanner;
+ assert.strictEqual(actualContent, expectedContent);
+ }
+
+ // Reversed substring tests
+ for (let i = 1; i <= length; ++i) {
+ const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent);
+ scanner.seek(-(length - i));
+
+ const {content: actualContent} = scanner;
+ assert.strictEqual(actualContent, expectedContent.substring(i));
+ }
+ }
+ }
+}
+
+
+async function testDocument1() {
+ const dom = createJSDOM(path.join(__dirname, 'data', 'html', 'test-dom-text-scanner.html'));
+ const window = dom.window;
+ try {
+ const {document, Node, Range} = window;
+
+ window.getComputedStyle = createAbsoluteGetComputedStyle(window);
+
+ const vm = new VM({document, window, Range, Node});
+ vm.execute('fg/js/dom-text-scanner.js');
+ const DOMTextScanner = vm.get('DOMTextScanner');
+
+ await testDomTextScanner(dom, {DOMTextScanner});
+ } finally {
+ window.close();
+ }
+}
+
+
+async function main() {
+ await testDocument1();
+}
+
+
+if (require.main === module) { main(); }