diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-05-02 13:05:43 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-05-02 13:05:43 -0400 | 
| commit | d4ae9aa501ece99ea6c5e6b8fb01c3005f5b7f03 (patch) | |
| tree | f96211038ffac0be88da912cb40bd3980c212c18 | |
| parent | d581bffa15419b3b55773f1ed08a2e787e574f1f (diff) | |
DOMTextScanner (#458)
* Create new class for scanning text in a document
* Update test styles
* Add tests
| -rw-r--r-- | ext/fg/js/dom-text-scanner.js | 538 | ||||
| -rw-r--r-- | test/data/html/test-dom-text-scanner.html | 393 | ||||
| -rw-r--r-- | test/data/html/test-stylesheet.css | 12 | ||||
| -rw-r--r-- | test/test-dom-text-scanner.js | 181 | 
4 files changed, 1121 insertions, 3 deletions
| diff --git a/ext/fg/js/dom-text-scanner.js b/ext/fg/js/dom-text-scanner.js new file mode 100644 index 00000000..2de65041 --- /dev/null +++ b/ext/fg/js/dom-text-scanner.js @@ -0,0 +1,538 @@ +/* + * Copyright (C) 2020  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * A class used to scan text in a document. + */ +class DOMTextScanner { +    /** +     * Creates a new instance of a DOMTextScanner. +     * @param node The DOM Node to start at. +     * @param offset The character offset in to start at when node is a text node. +     *   Use 0 for non-text nodes. +     */ +    constructor(node, offset, forcePreserveWhitespace=false, generateLayoutContent=true) { +        const ruby = DOMTextScanner.getParentRubyElement(node); +        const resetOffset = (ruby !== null); +        if (resetOffset) { node = ruby; } + +        this._node = node; +        this._offset = offset; +        this._content = ''; +        this._remainder = 0; +        this._resetOffset = resetOffset; +        this._newlines = 0; +        this._lineHasWhitespace = false; +        this._lineHasContent = false; +        this._forcePreserveWhitespace = forcePreserveWhitespace; +        this._generateLayoutContent = generateLayoutContent; +    } + +    /** +     * Gets the current node being scanned. +     * @returns A DOM Node. +     */ +    get node() { +        return this._node; +    } + +    /** +     * Gets the current offset corresponding to the node being scanned. +     * This value is only applicable for text nodes. +     * @returns An integer. +     */ +    get offset() { +        return this._offset; +    } + +    /** +     * Gets the accumulated content string resulting from calls to seek(). +     * @returns A string. +     */ +    get content() { +        return this._content; +    } + +    /** +     * Seeks a given length in the document and accumulates the text content. +     * @param length A positive or negative integer corresponding to how many characters +     *   should be added to content. Content is only added to the accumulation string, +     *   never removed, so mixing seek calls with differently signed length values +     *   may give unexpected results. +     * @returns this +     */ +    seek(length) { +        const forward = (length >= 0); +        this._remainder = (forward ? length : -length); +        if (length === 0) { return this; } + +        const TEXT_NODE = Node.TEXT_NODE; +        const ELEMENT_NODE = Node.ELEMENT_NODE; + +        const generateLayoutContent = this._generateLayoutContent; +        let node = this._node; +        let resetOffset = this._resetOffset; +        let newlines = 0; +        while (node !== null) { +            let enterable = false; +            const nodeType = node.nodeType; + +            if (nodeType === TEXT_NODE) { +                if (!( +                    forward ? +                    this._seekTextNodeForward(node, resetOffset) : +                    this._seekTextNodeBackward(node, resetOffset) +                )) { +                    // Length reached +                    break; +                } +            } else if (nodeType === ELEMENT_NODE) { +                [enterable, newlines] = DOMTextScanner.getElementSeekInfo(node); +                if (newlines > this._newlines && generateLayoutContent) { +                    this._newlines = newlines; +                } +            } + +            const exitedNodes = []; +            node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes); + +            for (const exitedNode of exitedNodes) { +                if (exitedNode.nodeType !== ELEMENT_NODE) { continue; } +                newlines = DOMTextScanner.getElementSeekInfo(exitedNode)[1]; +                if (newlines > this._newlines && generateLayoutContent) { +                    this._newlines = newlines; +                } +            } + +            resetOffset = true; +        } + +        this._node = node; +        this._resetOffset = resetOffset; + +        return this; +    } + +    // Private + +    /** +     * Seeks forward in a text node. +     * @param textNode The text node to use. +     * @param resetOffset Whether or not the text offset should be reset. +     * @returns true if scanning should continue, or false if the scan length has been reached. +     */ +    _seekTextNodeForward(textNode, resetOffset) { +        const nodeValue = textNode.nodeValue; +        const nodeValueLength = nodeValue.length; +        const [preserveNewlines, preserveWhitespace] = ( +            this._forcePreserveWhitespace ? +            [true, true] : +            DOMTextScanner.getWhitespaceSettings(textNode) +        ); + +        let lineHasWhitespace = this._lineHasWhitespace; +        let lineHasContent = this._lineHasContent; +        let content = this._content; +        let offset = resetOffset ? 0 : this._offset; +        let remainder = this._remainder; +        let newlines = this._newlines; + +        while (offset < nodeValueLength) { +            const char = nodeValue[offset]; +            const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); +            ++offset; + +            if (charAttributes === 0) { +                // Character should be ignored +                continue; +            } else if (charAttributes === 1) { +                // Character is collapsable whitespace +                lineHasWhitespace = true; +            } else { +                // Character should be added to the content +                if (newlines > 0) { +                    if (content.length > 0) { +                        const useNewlineCount = Math.min(remainder, newlines); +                        content += '\n'.repeat(useNewlineCount); +                        remainder -= useNewlineCount; +                        newlines -= useNewlineCount; +                    } else { +                        newlines = 0; +                    } +                    lineHasContent = false; +                    lineHasWhitespace = false; +                    if (remainder <= 0) { +                        --offset; // Revert character offset +                        break; +                    } +                } + +                lineHasContent = (charAttributes === 2); // 3 = character is a newline + +                if (lineHasWhitespace) { +                    if (lineHasContent) { +                        content += ' '; +                        lineHasWhitespace = false; +                        if (--remainder <= 0) { +                            --offset; // Revert character offset +                            break; +                        } +                    } else { +                        lineHasWhitespace = false; +                    } +                } + +                content += char; + +                if (--remainder <= 0) { break; } +            } +        } + +        this._lineHasWhitespace = lineHasWhitespace; +        this._lineHasContent = lineHasContent; +        this._content = content; +        this._offset = offset; +        this._remainder = remainder; +        this._newlines = newlines; + +        return (remainder > 0); +    } + +    /** +     * Seeks backward in a text node. +     * This function is nearly the same as _seekTextNodeForward, with the following differences: +     * - Iteration condition is reversed to check if offset is greater than 0. +     * - offset is reset to nodeValueLength instead of 0. +     * - offset is decremented instead of incremented. +     * - offset is decremented before getting the character. +     * - offset is reverted by incrementing instead of decrementing. +     * - content string is prepended instead of appended. +     * @param textNode The text node to use. +     * @param resetOffset Whether or not the text offset should be reset. +     * @returns true if scanning should continue, or false if the scan length has been reached. +     */ +    _seekTextNodeBackward(textNode, resetOffset) { +        const nodeValue = textNode.nodeValue; +        const nodeValueLength = nodeValue.length; +        const [preserveNewlines, preserveWhitespace] = ( +            this._forcePreserveWhitespace ? +            [true, true] : +            DOMTextScanner.getWhitespaceSettings(textNode) +        ); + +        let lineHasWhitespace = this._lineHasWhitespace; +        let lineHasContent = this._lineHasContent; +        let content = this._content; +        let offset = resetOffset ? nodeValueLength : this._offset; +        let remainder = this._remainder; +        let newlines = this._newlines; + +        while (offset > 0) { +            --offset; +            const char = nodeValue[offset]; +            const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); + +            if (charAttributes === 0) { +                // Character should be ignored +                continue; +            } else if (charAttributes === 1) { +                // Character is collapsable whitespace +                lineHasWhitespace = true; +            } else { +                // Character should be added to the content +                if (newlines > 0) { +                    if (content.length > 0) { +                        const useNewlineCount = Math.min(remainder, newlines); +                        content = '\n'.repeat(useNewlineCount) + content; +                        remainder -= useNewlineCount; +                        newlines -= useNewlineCount; +                    } else { +                        newlines = 0; +                    } +                    lineHasContent = false; +                    lineHasWhitespace = false; +                    if (remainder <= 0) { +                        ++offset; // Revert character offset +                        break; +                    } +                } + +                lineHasContent = (charAttributes === 2); // 3 = character is a newline + +                if (lineHasWhitespace) { +                    if (lineHasContent) { +                        content = ' ' + content; +                        lineHasWhitespace = false; +                        if (--remainder <= 0) { +                            ++offset; // Revert character offset +                            break; +                        } +                    } else { +                        lineHasWhitespace = false; +                    } +                } + +                content = char + content; + +                if (--remainder <= 0) { break; } +            } +        } + +        this._lineHasWhitespace = lineHasWhitespace; +        this._lineHasContent = lineHasContent; +        this._content = content; +        this._offset = offset; +        this._remainder = remainder; +        this._newlines = newlines; + +        return (remainder > 0); +    } + +    // Static helpers + +    /** +     * Gets the next node in the document for a specified scanning direction. +     * @param node The current DOM Node. +     * @param forward Whether to scan forward in the document or backward. +     * @param visitChildren Whether the children of the current node should be visited. +     * @param exitedNodes An array which stores nodes which were exited. +     * @returns The next node in the document, or null if there is no next node. +     */ +    static getNextNode(node, forward, visitChildren, exitedNodes) { +        let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null; +        if (next === null) { +            while (true) { +                exitedNodes.push(node); + +                next = (forward ? node.nextSibling : node.previousSibling); +                if (next !== null) { break; } + +                next = node.parentNode; +                if (next === null) { break; } + +                node = next; +            } +        } +        return next; +    } + +    /** +     * Gets the parent element of a given Node. +     * @param node The node to check. +     * @returns The parent element if one exists, otherwise null. +     */ +    static getParentElement(node) { +        while (node !== null && node.nodeType !== Node.ELEMENT_NODE) { +            node = node.parentNode; +        } +        return node; +    } + +    /** +     * Gets the parent <ruby> element of a given node, if one exists. For efficiency purposes, +     * this only checks the immediate parent elements and does not check all ancestors, so +     * there are cases where the node may be in a ruby element but it is not returned. +     * @param node The node to check. +     * @returns A <ruby> node if the input node is contained in one, otherwise null. +     */ +    static getParentRubyElement(node) { +        node = DOMTextScanner.getParentElement(node); +        if (node !== null && node.nodeName.toUpperCase() === 'RT') { +            node = node.parentNode; +            if (node !== null && node.nodeName.toUpperCase() === 'RUBY') { +                return node; +            } +        } +        return null; +    } + +    /** +     * @returns [enterable: boolean, newlines: integer] +     *   The enterable value indicates whether the content of this node should be entered. +     *   The newlines value corresponds to the number of newline characters that should be added. +     *     1 newline corresponds to a simple new line in the layout. +     *     2 newlines corresponds to a significant visual distinction since the previous content. +     */ +    static getElementSeekInfo(element) { +        let enterable = true; +        switch (element.nodeName.toUpperCase()) { +            case 'HEAD': +            case 'RT': +            case 'SCRIPT': +            case 'STYLE': +                return [false, 0]; +            case 'BR': +                return [false, 1]; +            case 'TEXTAREA': +            case 'INPUT': +            case 'BUTTON': +                enterable = false; +                break; +        } + +        const style = window.getComputedStyle(element); +        const display = style.display; + +        const visible = (display !== 'none' && DOMTextScanner.isStyleVisible(style)); +        let newlines = 0; + +        if (!visible) { +            enterable = false; +        } else { +            switch (style.position) { +                case 'absolute': +                case 'fixed': +                case 'sticky': +                    newlines = 2; +                    break; +            } +            if (newlines === 0 && DOMTextScanner.doesCSSDisplayChangeLayout(display)) { +                newlines = 1; +            } +        } + +        return [enterable, newlines]; +    } + +    /** +     * Gets information about how whitespace characters are treated. +     * @param textNode The Text node to check. +     * @returns [preserveNewlines: boolean, preserveWhitespace: boolean] +     *   The value of preserveNewlines indicates whether or not newline characters are treated as line breaks. +     *   The value of preserveWhitespace indicates whether or not sequences of whitespace characters are collapsed. +     */ +    static getWhitespaceSettings(textNode) { +        const element = DOMTextScanner.getParentElement(textNode); +        if (element !== null) { +            const style = window.getComputedStyle(element); +            switch (style.whiteSpace) { +                case 'pre': +                case 'pre-wrap': +                case 'break-spaces': +                    return [true, true]; +                case 'pre-line': +                    return [true, false]; +            } +        } +        return [false, false]; +    } + +    /** +     * Gets attributes for the specified character. +     * @param character A string containing a single character. +     * @returns An integer representing the attributes of the character. +     *   0: Character should be ignored. +     *   1: Character is collapsable whitespace. +     *   2: Character should be added to the content. +     *   3: Character should be added to the content and is a newline. +     */ +    static getCharacterAttributes(character, preserveNewlines, preserveWhitespace) { +        switch (character.charCodeAt(0)) { +            case 0x09: // Tab ('\t') +            case 0x0c: // Form feed ('\f') +            case 0x0d: // Carriage return ('\r') +            case 0x20: // Space (' ') +                return preserveWhitespace ? 2 : 1; +            case 0x0a: // Line feed ('\n') +                return preserveNewlines ? 3 : 1; +            case 0x200c: // Zero-width non-joiner ('\u200c') +                return 0; +            default: // Other +                return 2; +        } +    } + +    /** +     * Checks whether a given style is visible or not. +     * This function does not check style.display === 'none'. +     * @param style An object implementing the CSSStyleDeclaration interface. +     * @returns true if the style should result in an element being visible, otherwise false. +     */ +    static isStyleVisible(style) { +        return !( +            style.visibility === 'hidden' || +            parseFloat(style.opacity) <= 0 || +            parseFloat(style.fontSize) <= 0 || +            ( +                !DOMTextScanner.isStyleSelectable(style) && +                ( +                    DOMTextScanner.isCSSColorTransparent(style.color) || +                    DOMTextScanner.isCSSColorTransparent(style.webkitTextFillColor) +                ) +            ) +        ); +    } + +    /** +     * Checks whether a given style is selectable or not. +     * @param style An object implementing the CSSStyleDeclaration interface. +     * @returns true if the style is selectable, otherwise false. +     */ +    static isStyleSelectable(style) { +        return !( +            style.userSelect === 'none' || +            style.webkitUserSelect === 'none' || +            style.MozUserSelect === 'none' || +            style.msUserSelect === 'none' +        ); +    } + +    /** +     * Checks whether a CSS color is transparent or not. +     * @param cssColor A CSS color string, expected to be encoded in rgb(a) form. +     * @returns true if the color is transparent, otherwise false. +     */ +    static isCSSColorTransparent(cssColor) { +        return ( +            typeof cssColor === 'string' && +            cssColor.startsWith('rgba(') && +            /,\s*0.?0*\)$/.test(cssColor) +        ); +    } + +    /** +     * Checks whether a CSS display value will cause a layout change for text. +     * @param cssDisplay A CSS string corresponding to the value of the display property. +     * @returns true if the layout is changed by this value, otherwise false. +     */ +    static doesCSSDisplayChangeLayout(cssDisplay) { +        let pos = cssDisplay.indexOf(' '); +        if (pos >= 0) { +            // Truncate to <display-outside> part +            cssDisplay = cssDisplay.substring(0, pos); +        } + +        pos = cssDisplay.indexOf('-'); +        if (pos >= 0) { +            // Truncate to first part of kebab-case value +            cssDisplay = cssDisplay.substring(0, pos); +        } + +        switch (cssDisplay) { +            case 'block': +            case 'flex': +            case 'grid': +            case 'list': // list-item +            case 'table': // table, table-* +                return true; +            case 'ruby': // rubt-* +                return (pos >= 0); +            default: +                return false; +        } +    } +} diff --git a/test/data/html/test-dom-text-scanner.html b/test/data/html/test-dom-text-scanner.html new file mode 100644 index 00000000..6b78570a --- /dev/null +++ b/test/data/html/test-dom-text-scanner.html @@ -0,0 +1,393 @@ +<!DOCTYPE html> +<html> +    <head> +        <meta charset="UTF-8"> +        <meta name="viewport" content="width=device-width,initial-scale=1" /> +        <title>Yomichan DOMTextScanner Tests</title> +        <link rel="icon" type="image/gif" href="" /> +        <link rel="stylesheet" href="test-stylesheet.css" /> +    </head> +<body> + +    <h1>Yomichan DOMTextScanner Tests</h1> + +    <y-test +        data-test-data='{ +            "node": "div:nth-of-type(1)", +            "offset": 0, +            "length": 15, +            "expected": { +                "node": "div:nth-of-type(2)>div::text", +                "offset": 3, +                "content": "小ぢん\nまり1\n小ぢん\nまり2" +            } +        }' +    > +        <y-description>Layout newlines expected due to entering and exiting display:block nodes.</y-description> +<div><div>小ぢん</div>まり1</div> +<div>小ぢん<div>まり2</div></div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div:nth-of-type(1)::text", +            "offset": 0, +            "length": 13, +            "expected": { +                "node": "div:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n小ぢんまり2" +            } +        }' +    > +        <y-description>Layout newline expected due to sequential display:block elements.</y-description> +<div>小ぢんまり1</div><div>小ぢんまり2</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div:nth-of-type(1)::text", +            "offset": 0, +            "length": 13, +            "expected": { +                "node": "div:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n小ぢんまり2" +            } +        }' +    > +        <y-description>Layout newline expected due to sequential display:block elements separated by a newline.</y-description> +<div>小ぢんまり1</div> +<div>小ぢんまり2</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "span:nth-of-type(1)::text", +            "offset": 0, +            "length": 12, +            "expected": { +                "node": "span:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1小ぢんまり2" +            } +        }' +    > +        <y-description>No newlines expected due to display:inline.</y-description> +<span>小ぢんまり1</span><span>小ぢんまり2</span> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "span:nth-of-type(1)::text", +            "offset": 0, +            "length": 13, +            "expected": { +                "node": "span:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n小ぢんまり2" +            } +        }' +    > +        <y-description>No newlines expected due to white-space:normal.</y-description> +<span>小ぢんまり1</span> +<span>小ぢんまり2</span> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "span:nth-of-type(1)::text", +            "offset": 0, +            "length": 13, +            "expected": { +                "node": "span:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n小ぢんまり2" +            } +        }' +    > +        <y-description>Newline expected due to white-space:pre.</y-description> +<pre> +<span>小ぢんまり1</span> +<span>小ぢんまり2</span> +</pre> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "span:nth-of-type(1)::text", +            "offset": 0, +            "length": 12, +            "expected": { +                "node": "span:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1小ぢんまり2" +            } +        }' +    > +        <y-description>No newlines expected due to display:inline-block. Actual layout flow cannot be determined by DOM/CSS alone.</y-description> +<span style="display: inline-block;">小ぢんまり1</span><span style="display: inline-block;">小ぢんまり2</span> +    </y-test> + +    <y-test +        style="position: relative;" +        data-test-data='{ +            "node": "div:nth-of-type(1)::text", +            "offset": 0, +            "length": 13, +            "expected": { +                "node": "div:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n小ぢんまり2" +            } +        }' +    > +        <y-description>Single newline expected due to display:block layout.</y-description> +<div>小ぢんまり1</div><div style="position: relative;">小ぢんまり2</div> +    </y-test> + +    <y-test +        style="position: relative; overflow: hidden;" +        data-test-data='{ +            "node": "div:nth-of-type(1)::text", +            "offset": 0, +            "length": 14, +            "expected": { +                "node": "div:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n\n小ぢんまり2" +            } +        }' +    > +        <y-description>Two newlines expected due to position:absolute causing a significant layout change.</y-description> +<div>小ぢんまり1</div><div style="position: absolute;">小ぢんまり2</div> +    </y-test> + +    <y-test +        style="position: relative; overflow: hidden;" +        data-test-data='{ +            "node": "div:nth-of-type(1)::text", +            "offset": 0, +            "length": 14, +            "expected": { +                "node": "div:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n\n小ぢんまり2" +            } +        }' +    > +        <y-description>Two newlines expected due to position:fixed causing a significant layout change.</y-description> +<div>小ぢんまり1</div><div style="position: fixed;">小ぢんまり2</div> +    </y-test> + +    <y-test +        style="position: relative;" +        data-test-data='{ +            "node": "div:nth-of-type(1)::text", +            "offset": 0, +            "length": 14, +            "expected": { +                "node": "div:nth-of-type(2)::text", +                "offset": 6, +                "content": "小ぢんまり1\n\n小ぢんまり2" +            } +        }' +    > +        <y-description>Two newlines expected due to position:sticky being able to cause a significant layout change.</y-description> +<div>小ぢんまり1</div><div style="position: sticky;">小ぢんまり2</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "rt", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::text", +                "offset": 5, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Scanning text starting in an <rt> element. Should start scanning at the start of the <ruby> tag instead.</y-description> +<div><ruby>小<rp>(</rp><rt>こ</rt><rp>)</rp></ruby>ぢんまり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip <script> content.</y-description> +<div>小ぢん<script>/*comment*/</script>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip <style> content.</y-description> +<div>小ぢん<style>/*comment*/</style>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip <textarea> content.</y-description> +<div>小ぢん<textarea>textarea content</textarea>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip <input> content.</y-description> +<div>小ぢん<input value="content" />まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip <button> content.</y-description> +<div>小ぢん<button>content</button>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip content with font-size:0.</y-description> +<div>小ぢん<span style="font-size: 0;">content</span>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip content with opacity:0.</y-description> +<div>小ぢん<span style="opacity: 0;">content</span>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip content with visibility:hidden.</y-description> +<div>小ぢん<span style="visibility: hidden;">content</span>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip content with display:none.</y-description> +<div>小ぢん<span style="display: none;">content</span>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Don't skip content with user-select:none.</y-description> +<div>小ぢ<span style="user-select: none;">ん</span>まり1</div> +    </y-test> + +    <y-test +        data-test-data='{ +            "node": "div", +            "offset": 0, +            "length": 6, +            "expected": { +                "node": "div::nth-text(2)", +                "offset": 3, +                "content": "小ぢんまり1" +            } +        }' +    > +        <y-description>Skip content with user-select:none <em>and</em> a transparent color.</y-description> +<div>小ぢん<span style="user-select: none; color: rgba(0, 0, 0, 0);">content</span>まり1</div> +    </y-test> + +</body> +</html>
\ No newline at end of file diff --git a/test/data/html/test-stylesheet.css b/test/data/html/test-stylesheet.css index f63d2481..2e9a2f52 100644 --- a/test/data/html/test-stylesheet.css +++ b/test/data/html/test-stylesheet.css @@ -28,7 +28,9 @@ a, a:visited {      text-decoration: underline;  } -.test { +.test, +y-test { +    display: block;      background-color: #ffffff;      margin: 1em 0;      padding: 0.5em; @@ -36,7 +38,8 @@ a, a:visited {      border-radius: 4px;  } -.test:before { +.test:before, +y-test:before {      content: "Test " counter(test-id);      display: block;      counter-increment: test-id; @@ -45,7 +48,10 @@ a, a:visited {      font-weight: bold;  } -.description { +.description, +y-description {      color: #444444;      font-style: italic; +    display: block; +    padding-bottom: 0.5em;  } diff --git a/test/test-dom-text-scanner.js b/test/test-dom-text-scanner.js new file mode 100644 index 00000000..41d6e307 --- /dev/null +++ b/test/test-dom-text-scanner.js @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2020  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +const fs = require('fs'); +const path = require('path'); +const assert = require('assert'); +const {JSDOM} = require('jsdom'); +const {VM} = require('./yomichan-vm'); + + +function createJSDOM(fileName) { +    const domSource = fs.readFileSync(fileName, {encoding: 'utf8'}); +    return new JSDOM(domSource); +} + +function querySelectorTextNode(element, selector) { +    let textIndex = -1; +    const match = /::text$|::nth-text\((\d+)\)$/.exec(selector); +    if (match !== null) { +        textIndex = (match[1] ? parseInt(match[1], 10) - 1 : 0); +        selector = selector.substring(0, selector.length - match[0].length); +    } +    const result = element.querySelector(selector); +    if (textIndex < 0) { +        return result; +    } +    for (let n = result.firstChild; n !== null; n = n.nextSibling) { +        if (n.nodeType === n.constructor.TEXT_NODE) { +            if (textIndex === 0) { +                return n; +            } +            --textIndex; +        } +    } +    return null; +} + + +function getComputedFontSizeInPixels(window, getComputedStyle, element) { +    for (; element !== null; element = element.parentNode) { +        if (element.nodeType === window.Node.ELEMENT_NODE) { +            const fontSize = getComputedStyle(element).fontSize; +            if (fontSize.endsWith('px')) { +                const value = parseFloat(fontSize.substring(0, fontSize.length - 2)); +                return value; +            } +        } +    } +    const defaultFontSize = 14; +    return defaultFontSize; +} + +function createAbsoluteGetComputedStyle(window) { +    // Wrapper to convert em units to px units +    const getComputedStyleOld = window.getComputedStyle.bind(window); +    return (element, ...args) => { +        const style = getComputedStyleOld(element, ...args); +        return new Proxy(style, { +            get: (target, property) => { +                let result = target[property]; +                if (typeof result === 'string') { +                    result = result.replace(/([-+]?\d(?:\.\d)?(?:[eE][-+]?\d+)?)em/g, (g0, g1) => { +                        const fontSize = getComputedFontSizeInPixels(window, getComputedStyleOld, element); +                        return `${parseFloat(g1) * fontSize}px`; +                    }); +                } +                return result; +            } +        }); +    }; +} + + +async function testDomTextScanner(dom, {DOMTextScanner}) { +    const document = dom.window.document; +    for (const testElement of document.querySelectorAll('y-test')) { +        let testData = JSON.parse(testElement.dataset.testData); +        if (!Array.isArray(testData)) { +            testData = [testData]; +        } +        for (const testDataItem of testData) { +            let { +                node, +                offset, +                length, +                forcePreserveWhitespace, +                generateLayoutContent, +                reversible, +                expected: { +                    node: expectedNode, +                    offset: expectedOffset, +                    content: expectedContent +                } +            } = testDataItem; + +            node = querySelectorTextNode(testElement, node); +            expectedNode = querySelectorTextNode(testElement, expectedNode); + +            // Standard test +            { +                const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent); +                scanner.seek(length); + +                const {node: actualNode1, offset: actualOffset1, content: actualContent1} = scanner; +                assert.strictEqual(actualContent1, expectedContent); +                assert.strictEqual(actualOffset1, expectedOffset); +                assert.strictEqual(actualNode1, expectedNode); +            } + +            // Substring tests +            for (let i = 1; i <= length; ++i) { +                const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent); +                scanner.seek(length - i); + +                const {content: actualContent} = scanner; +                assert.strictEqual(actualContent, expectedContent.substring(0, expectedContent.length - i)); +            } + +            if (reversible === false) { continue; } + +            // Reversed test +            { +                const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent); +                scanner.seek(-length); + +                const {content: actualContent} = scanner; +                assert.strictEqual(actualContent, expectedContent); +            } + +            // Reversed substring tests +            for (let i = 1; i <= length; ++i) { +                const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent); +                scanner.seek(-(length - i)); + +                const {content: actualContent} = scanner; +                assert.strictEqual(actualContent, expectedContent.substring(i)); +            } +        } +    } +} + + +async function testDocument1() { +    const dom = createJSDOM(path.join(__dirname, 'data', 'html', 'test-dom-text-scanner.html')); +    const window = dom.window; +    try { +        const {document, Node, Range} = window; + +        window.getComputedStyle = createAbsoluteGetComputedStyle(window); + +        const vm = new VM({document, window, Range, Node}); +        vm.execute('fg/js/dom-text-scanner.js'); +        const DOMTextScanner = vm.get('DOMTextScanner'); + +        await testDomTextScanner(dom, {DOMTextScanner}); +    } finally { +        window.close(); +    } +} + + +async function main() { +    await testDocument1(); +} + + +if (require.main === module) { main(); } |