diff options
-rw-r--r-- | ext/fg/js/dom-text-scanner.js | 538 | ||||
-rw-r--r-- | test/data/html/test-dom-text-scanner.html | 393 | ||||
-rw-r--r-- | test/data/html/test-stylesheet.css | 12 | ||||
-rw-r--r-- | test/test-dom-text-scanner.js | 181 |
4 files changed, 1121 insertions, 3 deletions
diff --git a/ext/fg/js/dom-text-scanner.js b/ext/fg/js/dom-text-scanner.js new file mode 100644 index 00000000..2de65041 --- /dev/null +++ b/ext/fg/js/dom-text-scanner.js @@ -0,0 +1,538 @@ +/* + * Copyright (C) 2020 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * A class used to scan text in a document. + */ +class DOMTextScanner { + /** + * Creates a new instance of a DOMTextScanner. + * @param node The DOM Node to start at. + * @param offset The character offset in to start at when node is a text node. + * Use 0 for non-text nodes. + */ + constructor(node, offset, forcePreserveWhitespace=false, generateLayoutContent=true) { + const ruby = DOMTextScanner.getParentRubyElement(node); + const resetOffset = (ruby !== null); + if (resetOffset) { node = ruby; } + + this._node = node; + this._offset = offset; + this._content = ''; + this._remainder = 0; + this._resetOffset = resetOffset; + this._newlines = 0; + this._lineHasWhitespace = false; + this._lineHasContent = false; + this._forcePreserveWhitespace = forcePreserveWhitespace; + this._generateLayoutContent = generateLayoutContent; + } + + /** + * Gets the current node being scanned. + * @returns A DOM Node. + */ + get node() { + return this._node; + } + + /** + * Gets the current offset corresponding to the node being scanned. + * This value is only applicable for text nodes. + * @returns An integer. + */ + get offset() { + return this._offset; + } + + /** + * Gets the accumulated content string resulting from calls to seek(). + * @returns A string. + */ + get content() { + return this._content; + } + + /** + * Seeks a given length in the document and accumulates the text content. + * @param length A positive or negative integer corresponding to how many characters + * should be added to content. Content is only added to the accumulation string, + * never removed, so mixing seek calls with differently signed length values + * may give unexpected results. + * @returns this + */ + seek(length) { + const forward = (length >= 0); + this._remainder = (forward ? length : -length); + if (length === 0) { return this; } + + const TEXT_NODE = Node.TEXT_NODE; + const ELEMENT_NODE = Node.ELEMENT_NODE; + + const generateLayoutContent = this._generateLayoutContent; + let node = this._node; + let resetOffset = this._resetOffset; + let newlines = 0; + while (node !== null) { + let enterable = false; + const nodeType = node.nodeType; + + if (nodeType === TEXT_NODE) { + if (!( + forward ? + this._seekTextNodeForward(node, resetOffset) : + this._seekTextNodeBackward(node, resetOffset) + )) { + // Length reached + break; + } + } else if (nodeType === ELEMENT_NODE) { + [enterable, newlines] = DOMTextScanner.getElementSeekInfo(node); + if (newlines > this._newlines && generateLayoutContent) { + this._newlines = newlines; + } + } + + const exitedNodes = []; + node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes); + + for (const exitedNode of exitedNodes) { + if (exitedNode.nodeType !== ELEMENT_NODE) { continue; } + newlines = DOMTextScanner.getElementSeekInfo(exitedNode)[1]; + if (newlines > this._newlines && generateLayoutContent) { + this._newlines = newlines; + } + } + + resetOffset = true; + } + + this._node = node; + this._resetOffset = resetOffset; + + return this; + } + + // Private + + /** + * Seeks forward in a text node. + * @param textNode The text node to use. + * @param resetOffset Whether or not the text offset should be reset. + * @returns true if scanning should continue, or false if the scan length has been reached. + */ + _seekTextNodeForward(textNode, resetOffset) { + const nodeValue = textNode.nodeValue; + const nodeValueLength = nodeValue.length; + const [preserveNewlines, preserveWhitespace] = ( + this._forcePreserveWhitespace ? + [true, true] : + DOMTextScanner.getWhitespaceSettings(textNode) + ); + + let lineHasWhitespace = this._lineHasWhitespace; + let lineHasContent = this._lineHasContent; + let content = this._content; + let offset = resetOffset ? 0 : this._offset; + let remainder = this._remainder; + let newlines = this._newlines; + + while (offset < nodeValueLength) { + const char = nodeValue[offset]; + const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); + ++offset; + + if (charAttributes === 0) { + // Character should be ignored + continue; + } else if (charAttributes === 1) { + // Character is collapsable whitespace + lineHasWhitespace = true; + } else { + // Character should be added to the content + if (newlines > 0) { + if (content.length > 0) { + const useNewlineCount = Math.min(remainder, newlines); + content += '\n'.repeat(useNewlineCount); + remainder -= useNewlineCount; + newlines -= useNewlineCount; + } else { + newlines = 0; + } + lineHasContent = false; + lineHasWhitespace = false; + if (remainder <= 0) { + --offset; // Revert character offset + break; + } + } + + lineHasContent = (charAttributes === 2); // 3 = character is a newline + + if (lineHasWhitespace) { + if (lineHasContent) { + content += ' '; + lineHasWhitespace = false; + if (--remainder <= 0) { + --offset; // Revert character offset + break; + } + } else { + lineHasWhitespace = false; + } + } + + content += char; + + if (--remainder <= 0) { break; } + } + } + + this._lineHasWhitespace = lineHasWhitespace; + this._lineHasContent = lineHasContent; + this._content = content; + this._offset = offset; + this._remainder = remainder; + this._newlines = newlines; + + return (remainder > 0); + } + + /** + * Seeks backward in a text node. + * This function is nearly the same as _seekTextNodeForward, with the following differences: + * - Iteration condition is reversed to check if offset is greater than 0. + * - offset is reset to nodeValueLength instead of 0. + * - offset is decremented instead of incremented. + * - offset is decremented before getting the character. + * - offset is reverted by incrementing instead of decrementing. + * - content string is prepended instead of appended. + * @param textNode The text node to use. + * @param resetOffset Whether or not the text offset should be reset. + * @returns true if scanning should continue, or false if the scan length has been reached. + */ + _seekTextNodeBackward(textNode, resetOffset) { + const nodeValue = textNode.nodeValue; + const nodeValueLength = nodeValue.length; + const [preserveNewlines, preserveWhitespace] = ( + this._forcePreserveWhitespace ? + [true, true] : + DOMTextScanner.getWhitespaceSettings(textNode) + ); + + let lineHasWhitespace = this._lineHasWhitespace; + let lineHasContent = this._lineHasContent; + let content = this._content; + let offset = resetOffset ? nodeValueLength : this._offset; + let remainder = this._remainder; + let newlines = this._newlines; + + while (offset > 0) { + --offset; + const char = nodeValue[offset]; + const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); + + if (charAttributes === 0) { + // Character should be ignored + continue; + } else if (charAttributes === 1) { + // Character is collapsable whitespace + lineHasWhitespace = true; + } else { + // Character should be added to the content + if (newlines > 0) { + if (content.length > 0) { + const useNewlineCount = Math.min(remainder, newlines); + content = '\n'.repeat(useNewlineCount) + content; + remainder -= useNewlineCount; + newlines -= useNewlineCount; + } else { + newlines = 0; + } + lineHasContent = false; + lineHasWhitespace = false; + if (remainder <= 0) { + ++offset; // Revert character offset + break; + } + } + + lineHasContent = (charAttributes === 2); // 3 = character is a newline + + if (lineHasWhitespace) { + if (lineHasContent) { + content = ' ' + content; + lineHasWhitespace = false; + if (--remainder <= 0) { + ++offset; // Revert character offset + break; + } + } else { + lineHasWhitespace = false; + } + } + + content = char + content; + + if (--remainder <= 0) { break; } + } + } + + this._lineHasWhitespace = lineHasWhitespace; + this._lineHasContent = lineHasContent; + this._content = content; + this._offset = offset; + this._remainder = remainder; + this._newlines = newlines; + + return (remainder > 0); + } + + // Static helpers + + /** + * Gets the next node in the document for a specified scanning direction. + * @param node The current DOM Node. + * @param forward Whether to scan forward in the document or backward. + * @param visitChildren Whether the children of the current node should be visited. + * @param exitedNodes An array which stores nodes which were exited. + * @returns The next node in the document, or null if there is no next node. + */ + static getNextNode(node, forward, visitChildren, exitedNodes) { + let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null; + if (next === null) { + while (true) { + exitedNodes.push(node); + + next = (forward ? node.nextSibling : node.previousSibling); + if (next !== null) { break; } + + next = node.parentNode; + if (next === null) { break; } + + node = next; + } + } + return next; + } + + /** + * Gets the parent element of a given Node. + * @param node The node to check. + * @returns The parent element if one exists, otherwise null. + */ + static getParentElement(node) { + while (node !== null && node.nodeType !== Node.ELEMENT_NODE) { + node = node.parentNode; + } + return node; + } + + /** + * Gets the parent <ruby> element of a given node, if one exists. For efficiency purposes, + * this only checks the immediate parent elements and does not check all ancestors, so + * there are cases where the node may be in a ruby element but it is not returned. + * @param node The node to check. + * @returns A <ruby> node if the input node is contained in one, otherwise null. + */ + static getParentRubyElement(node) { + node = DOMTextScanner.getParentElement(node); + if (node !== null && node.nodeName.toUpperCase() === 'RT') { + node = node.parentNode; + if (node !== null && node.nodeName.toUpperCase() === 'RUBY') { + return node; + } + } + return null; + } + + /** + * @returns [enterable: boolean, newlines: integer] + * The enterable value indicates whether the content of this node should be entered. + * The newlines value corresponds to the number of newline characters that should be added. + * 1 newline corresponds to a simple new line in the layout. + * 2 newlines corresponds to a significant visual distinction since the previous content. + */ + static getElementSeekInfo(element) { + let enterable = true; + switch (element.nodeName.toUpperCase()) { + case 'HEAD': + case 'RT': + case 'SCRIPT': + case 'STYLE': + return [false, 0]; + case 'BR': + return [false, 1]; + case 'TEXTAREA': + case 'INPUT': + case 'BUTTON': + enterable = false; + break; + } + + const style = window.getComputedStyle(element); + const display = style.display; + + const visible = (display !== 'none' && DOMTextScanner.isStyleVisible(style)); + let newlines = 0; + + if (!visible) { + enterable = false; + } else { + switch (style.position) { + case 'absolute': + case 'fixed': + case 'sticky': + newlines = 2; + break; + } + if (newlines === 0 && DOMTextScanner.doesCSSDisplayChangeLayout(display)) { + newlines = 1; + } + } + + return [enterable, newlines]; + } + + /** + * Gets information about how whitespace characters are treated. + * @param textNode The Text node to check. + * @returns [preserveNewlines: boolean, preserveWhitespace: boolean] + * The value of preserveNewlines indicates whether or not newline characters are treated as line breaks. + * The value of preserveWhitespace indicates whether or not sequences of whitespace characters are collapsed. + */ + static getWhitespaceSettings(textNode) { + const element = DOMTextScanner.getParentElement(textNode); + if (element !== null) { + const style = window.getComputedStyle(element); + switch (style.whiteSpace) { + case 'pre': + case 'pre-wrap': + case 'break-spaces': + return [true, true]; + case 'pre-line': + return [true, false]; + } + } + return [false, false]; + } + + /** + * Gets attributes for the specified character. + * @param character A string containing a single character. + * @returns An integer representing the attributes of the character. + * 0: Character should be ignored. + * 1: Character is collapsable whitespace. + * 2: Character should be added to the content. + * 3: Character should be added to the content and is a newline. + */ + static getCharacterAttributes(character, preserveNewlines, preserveWhitespace) { + switch (character.charCodeAt(0)) { + case 0x09: // Tab ('\t') + case 0x0c: // Form feed ('\f') + case 0x0d: // Carriage return ('\r') + case 0x20: // Space (' ') + return preserveWhitespace ? 2 : 1; + case 0x0a: // Line feed ('\n') + return preserveNewlines ? 3 : 1; + case 0x200c: // Zero-width non-joiner ('\u200c') + return 0; + default: // Other + return 2; + } + } + + /** + * Checks whether a given style is visible or not. + * This function does not check style.display === 'none'. + * @param style An object implementing the CSSStyleDeclaration interface. + * @returns true if the style should result in an element being visible, otherwise false. + */ + static isStyleVisible(style) { + return !( + style.visibility === 'hidden' || + parseFloat(style.opacity) <= 0 || + parseFloat(style.fontSize) <= 0 || + ( + !DOMTextScanner.isStyleSelectable(style) && + ( + DOMTextScanner.isCSSColorTransparent(style.color) || + DOMTextScanner.isCSSColorTransparent(style.webkitTextFillColor) + ) + ) + ); + } + + /** + * Checks whether a given style is selectable or not. + * @param style An object implementing the CSSStyleDeclaration interface. + * @returns true if the style is selectable, otherwise false. + */ + static isStyleSelectable(style) { + return !( + style.userSelect === 'none' || + style.webkitUserSelect === 'none' || + style.MozUserSelect === 'none' || + style.msUserSelect === 'none' + ); + } + + /** + * Checks whether a CSS color is transparent or not. + * @param cssColor A CSS color string, expected to be encoded in rgb(a) form. + * @returns true if the color is transparent, otherwise false. + */ + static isCSSColorTransparent(cssColor) { + return ( + typeof cssColor === 'string' && + cssColor.startsWith('rgba(') && + /,\s*0.?0*\)$/.test(cssColor) + ); + } + + /** + * Checks whether a CSS display value will cause a layout change for text. + * @param cssDisplay A CSS string corresponding to the value of the display property. + * @returns true if the layout is changed by this value, otherwise false. + */ + static doesCSSDisplayChangeLayout(cssDisplay) { + let pos = cssDisplay.indexOf(' '); + if (pos >= 0) { + // Truncate to <display-outside> part + cssDisplay = cssDisplay.substring(0, pos); + } + + pos = cssDisplay.indexOf('-'); + if (pos >= 0) { + // Truncate to first part of kebab-case value + cssDisplay = cssDisplay.substring(0, pos); + } + + switch (cssDisplay) { + case 'block': + case 'flex': + case 'grid': + case 'list': // list-item + case 'table': // table, table-* + return true; + case 'ruby': // rubt-* + return (pos >= 0); + default: + return false; + } + } +} diff --git a/test/data/html/test-dom-text-scanner.html b/test/data/html/test-dom-text-scanner.html new file mode 100644 index 00000000..6b78570a --- /dev/null +++ b/test/data/html/test-dom-text-scanner.html @@ -0,0 +1,393 @@ +<!DOCTYPE html> +<html> + <head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width,initial-scale=1" /> + <title>Yomichan DOMTextScanner Tests</title> + <link rel="icon" type="image/gif" href="data:image/gif;base64,R0lGODlhEAAQAKEBAAAAAP///////////yH5BAEKAAIALAAAAAAQABAAAAImFI6Zpt0B4YkS0TCpq07xbmEgcGVRUpLaI46ZG7ppalY0jDCwUAAAOw==" /> + <link rel="stylesheet" href="test-stylesheet.css" /> + </head> +<body> + + <h1>Yomichan DOMTextScanner Tests</h1> + + <y-test + data-test-data='{ + "node": "div:nth-of-type(1)", + "offset": 0, + "length": 15, + "expected": { + "node": "div:nth-of-type(2)>div::text", + "offset": 3, + "content": "小ぢん\nまり1\n小ぢん\nまり2" + } + }' + > + <y-description>Layout newlines expected due to entering and exiting display:block nodes.</y-description> +<div><div>小ぢん</div>まり1</div> +<div>小ぢん<div>まり2</div></div> + </y-test> + + <y-test + data-test-data='{ + "node": "div:nth-of-type(1)::text", + "offset": 0, + "length": 13, + "expected": { + "node": "div:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n小ぢんまり2" + } + }' + > + <y-description>Layout newline expected due to sequential display:block elements.</y-description> +<div>小ぢんまり1</div><div>小ぢんまり2</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div:nth-of-type(1)::text", + "offset": 0, + "length": 13, + "expected": { + "node": "div:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n小ぢんまり2" + } + }' + > + <y-description>Layout newline expected due to sequential display:block elements separated by a newline.</y-description> +<div>小ぢんまり1</div> +<div>小ぢんまり2</div> + </y-test> + + <y-test + data-test-data='{ + "node": "span:nth-of-type(1)::text", + "offset": 0, + "length": 12, + "expected": { + "node": "span:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1小ぢんまり2" + } + }' + > + <y-description>No newlines expected due to display:inline.</y-description> +<span>小ぢんまり1</span><span>小ぢんまり2</span> + </y-test> + + <y-test + data-test-data='{ + "node": "span:nth-of-type(1)::text", + "offset": 0, + "length": 13, + "expected": { + "node": "span:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n小ぢんまり2" + } + }' + > + <y-description>No newlines expected due to white-space:normal.</y-description> +<span>小ぢんまり1</span> +<span>小ぢんまり2</span> + </y-test> + + <y-test + data-test-data='{ + "node": "span:nth-of-type(1)::text", + "offset": 0, + "length": 13, + "expected": { + "node": "span:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n小ぢんまり2" + } + }' + > + <y-description>Newline expected due to white-space:pre.</y-description> +<pre> +<span>小ぢんまり1</span> +<span>小ぢんまり2</span> +</pre> + </y-test> + + <y-test + data-test-data='{ + "node": "span:nth-of-type(1)::text", + "offset": 0, + "length": 12, + "expected": { + "node": "span:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1小ぢんまり2" + } + }' + > + <y-description>No newlines expected due to display:inline-block. Actual layout flow cannot be determined by DOM/CSS alone.</y-description> +<span style="display: inline-block;">小ぢんまり1</span><span style="display: inline-block;">小ぢんまり2</span> + </y-test> + + <y-test + style="position: relative;" + data-test-data='{ + "node": "div:nth-of-type(1)::text", + "offset": 0, + "length": 13, + "expected": { + "node": "div:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n小ぢんまり2" + } + }' + > + <y-description>Single newline expected due to display:block layout.</y-description> +<div>小ぢんまり1</div><div style="position: relative;">小ぢんまり2</div> + </y-test> + + <y-test + style="position: relative; overflow: hidden;" + data-test-data='{ + "node": "div:nth-of-type(1)::text", + "offset": 0, + "length": 14, + "expected": { + "node": "div:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n\n小ぢんまり2" + } + }' + > + <y-description>Two newlines expected due to position:absolute causing a significant layout change.</y-description> +<div>小ぢんまり1</div><div style="position: absolute;">小ぢんまり2</div> + </y-test> + + <y-test + style="position: relative; overflow: hidden;" + data-test-data='{ + "node": "div:nth-of-type(1)::text", + "offset": 0, + "length": 14, + "expected": { + "node": "div:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n\n小ぢんまり2" + } + }' + > + <y-description>Two newlines expected due to position:fixed causing a significant layout change.</y-description> +<div>小ぢんまり1</div><div style="position: fixed;">小ぢんまり2</div> + </y-test> + + <y-test + style="position: relative;" + data-test-data='{ + "node": "div:nth-of-type(1)::text", + "offset": 0, + "length": 14, + "expected": { + "node": "div:nth-of-type(2)::text", + "offset": 6, + "content": "小ぢんまり1\n\n小ぢんまり2" + } + }' + > + <y-description>Two newlines expected due to position:sticky being able to cause a significant layout change.</y-description> +<div>小ぢんまり1</div><div style="position: sticky;">小ぢんまり2</div> + </y-test> + + <y-test + data-test-data='{ + "node": "rt", + "offset": 0, + "length": 6, + "expected": { + "node": "div::text", + "offset": 5, + "content": "小ぢんまり1" + } + }' + > + <y-description>Scanning text starting in an <rt> element. Should start scanning at the start of the <ruby> tag instead.</y-description> +<div><ruby>小<rp>(</rp><rt>こ</rt><rp>)</rp></ruby>ぢんまり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip <script> content.</y-description> +<div>小ぢん<script>/*comment*/</script>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip <style> content.</y-description> +<div>小ぢん<style>/*comment*/</style>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip <textarea> content.</y-description> +<div>小ぢん<textarea>textarea content</textarea>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip <input> content.</y-description> +<div>小ぢん<input value="content" />まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip <button> content.</y-description> +<div>小ぢん<button>content</button>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip content with font-size:0.</y-description> +<div>小ぢん<span style="font-size: 0;">content</span>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip content with opacity:0.</y-description> +<div>小ぢん<span style="opacity: 0;">content</span>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip content with visibility:hidden.</y-description> +<div>小ぢん<span style="visibility: hidden;">content</span>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip content with display:none.</y-description> +<div>小ぢん<span style="display: none;">content</span>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Don't skip content with user-select:none.</y-description> +<div>小ぢ<span style="user-select: none;">ん</span>まり1</div> + </y-test> + + <y-test + data-test-data='{ + "node": "div", + "offset": 0, + "length": 6, + "expected": { + "node": "div::nth-text(2)", + "offset": 3, + "content": "小ぢんまり1" + } + }' + > + <y-description>Skip content with user-select:none <em>and</em> a transparent color.</y-description> +<div>小ぢん<span style="user-select: none; color: rgba(0, 0, 0, 0);">content</span>まり1</div> + </y-test> + +</body> +</html>
\ No newline at end of file diff --git a/test/data/html/test-stylesheet.css b/test/data/html/test-stylesheet.css index f63d2481..2e9a2f52 100644 --- a/test/data/html/test-stylesheet.css +++ b/test/data/html/test-stylesheet.css @@ -28,7 +28,9 @@ a, a:visited { text-decoration: underline; } -.test { +.test, +y-test { + display: block; background-color: #ffffff; margin: 1em 0; padding: 0.5em; @@ -36,7 +38,8 @@ a, a:visited { border-radius: 4px; } -.test:before { +.test:before, +y-test:before { content: "Test " counter(test-id); display: block; counter-increment: test-id; @@ -45,7 +48,10 @@ a, a:visited { font-weight: bold; } -.description { +.description, +y-description { color: #444444; font-style: italic; + display: block; + padding-bottom: 0.5em; } diff --git a/test/test-dom-text-scanner.js b/test/test-dom-text-scanner.js new file mode 100644 index 00000000..41d6e307 --- /dev/null +++ b/test/test-dom-text-scanner.js @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2020 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +const fs = require('fs'); +const path = require('path'); +const assert = require('assert'); +const {JSDOM} = require('jsdom'); +const {VM} = require('./yomichan-vm'); + + +function createJSDOM(fileName) { + const domSource = fs.readFileSync(fileName, {encoding: 'utf8'}); + return new JSDOM(domSource); +} + +function querySelectorTextNode(element, selector) { + let textIndex = -1; + const match = /::text$|::nth-text\((\d+)\)$/.exec(selector); + if (match !== null) { + textIndex = (match[1] ? parseInt(match[1], 10) - 1 : 0); + selector = selector.substring(0, selector.length - match[0].length); + } + const result = element.querySelector(selector); + if (textIndex < 0) { + return result; + } + for (let n = result.firstChild; n !== null; n = n.nextSibling) { + if (n.nodeType === n.constructor.TEXT_NODE) { + if (textIndex === 0) { + return n; + } + --textIndex; + } + } + return null; +} + + +function getComputedFontSizeInPixels(window, getComputedStyle, element) { + for (; element !== null; element = element.parentNode) { + if (element.nodeType === window.Node.ELEMENT_NODE) { + const fontSize = getComputedStyle(element).fontSize; + if (fontSize.endsWith('px')) { + const value = parseFloat(fontSize.substring(0, fontSize.length - 2)); + return value; + } + } + } + const defaultFontSize = 14; + return defaultFontSize; +} + +function createAbsoluteGetComputedStyle(window) { + // Wrapper to convert em units to px units + const getComputedStyleOld = window.getComputedStyle.bind(window); + return (element, ...args) => { + const style = getComputedStyleOld(element, ...args); + return new Proxy(style, { + get: (target, property) => { + let result = target[property]; + if (typeof result === 'string') { + result = result.replace(/([-+]?\d(?:\.\d)?(?:[eE][-+]?\d+)?)em/g, (g0, g1) => { + const fontSize = getComputedFontSizeInPixels(window, getComputedStyleOld, element); + return `${parseFloat(g1) * fontSize}px`; + }); + } + return result; + } + }); + }; +} + + +async function testDomTextScanner(dom, {DOMTextScanner}) { + const document = dom.window.document; + for (const testElement of document.querySelectorAll('y-test')) { + let testData = JSON.parse(testElement.dataset.testData); + if (!Array.isArray(testData)) { + testData = [testData]; + } + for (const testDataItem of testData) { + let { + node, + offset, + length, + forcePreserveWhitespace, + generateLayoutContent, + reversible, + expected: { + node: expectedNode, + offset: expectedOffset, + content: expectedContent + } + } = testDataItem; + + node = querySelectorTextNode(testElement, node); + expectedNode = querySelectorTextNode(testElement, expectedNode); + + // Standard test + { + const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent); + scanner.seek(length); + + const {node: actualNode1, offset: actualOffset1, content: actualContent1} = scanner; + assert.strictEqual(actualContent1, expectedContent); + assert.strictEqual(actualOffset1, expectedOffset); + assert.strictEqual(actualNode1, expectedNode); + } + + // Substring tests + for (let i = 1; i <= length; ++i) { + const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent); + scanner.seek(length - i); + + const {content: actualContent} = scanner; + assert.strictEqual(actualContent, expectedContent.substring(0, expectedContent.length - i)); + } + + if (reversible === false) { continue; } + + // Reversed test + { + const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent); + scanner.seek(-length); + + const {content: actualContent} = scanner; + assert.strictEqual(actualContent, expectedContent); + } + + // Reversed substring tests + for (let i = 1; i <= length; ++i) { + const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent); + scanner.seek(-(length - i)); + + const {content: actualContent} = scanner; + assert.strictEqual(actualContent, expectedContent.substring(i)); + } + } + } +} + + +async function testDocument1() { + const dom = createJSDOM(path.join(__dirname, 'data', 'html', 'test-dom-text-scanner.html')); + const window = dom.window; + try { + const {document, Node, Range} = window; + + window.getComputedStyle = createAbsoluteGetComputedStyle(window); + + const vm = new VM({document, window, Range, Node}); + vm.execute('fg/js/dom-text-scanner.js'); + const DOMTextScanner = vm.get('DOMTextScanner'); + + await testDomTextScanner(dom, {DOMTextScanner}); + } finally { + window.close(); + } +} + + +async function main() { + await testDocument1(); +} + + +if (require.main === module) { main(); } |