diff options
Diffstat (limited to 'ext/js/dom')
-rw-r--r-- | ext/js/dom/dom-text-scanner.js | 551 | ||||
-rw-r--r-- | ext/js/dom/text-source-element.js | 139 | ||||
-rw-r--r-- | ext/js/dom/text-source-range.js | 170 |
3 files changed, 860 insertions, 0 deletions
diff --git a/ext/js/dom/dom-text-scanner.js b/ext/js/dom/dom-text-scanner.js new file mode 100644 index 00000000..71e74fc3 --- /dev/null +++ b/ext/js/dom/dom-text-scanner.js @@ -0,0 +1,551 @@ +/* + * Copyright (C) 2020-2021 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * A class used to scan text in a document. + */ +class DOMTextScanner { + /** + * Creates a new instance of a DOMTextScanner. + * @param node The DOM Node to start at. + * @param offset The character offset in to start at when node is a text node. + * Use 0 for non-text nodes. + */ + constructor(node, offset, forcePreserveWhitespace=false, generateLayoutContent=true) { + const ruby = DOMTextScanner.getParentRubyElement(node); + const resetOffset = (ruby !== null); + if (resetOffset) { node = ruby; } + + this._node = node; + this._offset = offset; + this._content = ''; + this._remainder = 0; + this._resetOffset = resetOffset; + this._newlines = 0; + this._lineHasWhitespace = false; + this._lineHasContent = false; + this._forcePreserveWhitespace = forcePreserveWhitespace; + this._generateLayoutContent = generateLayoutContent; + } + + /** + * Gets the current node being scanned. + * @returns A DOM Node. + */ + get node() { + return this._node; + } + + /** + * Gets the current offset corresponding to the node being scanned. + * This value is only applicable for text nodes. + * @returns An integer. + */ + get offset() { + return this._offset; + } + + /** + * Gets the remaining number of characters that weren't scanned in the last seek() call. + * This value is usually 0 unless the end of the document was reached. + * @returns An integer. + */ + get remainder() { + return this._remainder; + } + + /** + * Gets the accumulated content string resulting from calls to seek(). + * @returns A string. + */ + get content() { + return this._content; + } + + /** + * Seeks a given length in the document and accumulates the text content. + * @param length A positive or negative integer corresponding to how many characters + * should be added to content. Content is only added to the accumulation string, + * never removed, so mixing seek calls with differently signed length values + * may give unexpected results. + * @returns this + */ + seek(length) { + const forward = (length >= 0); + this._remainder = (forward ? length : -length); + if (length === 0) { return this; } + + const TEXT_NODE = Node.TEXT_NODE; + const ELEMENT_NODE = Node.ELEMENT_NODE; + + const generateLayoutContent = this._generateLayoutContent; + let node = this._node; + let lastNode = node; + let resetOffset = this._resetOffset; + let newlines = 0; + while (node !== null) { + let enterable = false; + const nodeType = node.nodeType; + + if (nodeType === TEXT_NODE) { + lastNode = node; + if (!( + forward ? + this._seekTextNodeForward(node, resetOffset) : + this._seekTextNodeBackward(node, resetOffset) + )) { + // Length reached + break; + } + } else if (nodeType === ELEMENT_NODE) { + lastNode = node; + this._offset = 0; + [enterable, newlines] = DOMTextScanner.getElementSeekInfo(node); + if (newlines > this._newlines && generateLayoutContent) { + this._newlines = newlines; + } + } + + const exitedNodes = []; + node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes); + + for (const exitedNode of exitedNodes) { + if (exitedNode.nodeType !== ELEMENT_NODE) { continue; } + newlines = DOMTextScanner.getElementSeekInfo(exitedNode)[1]; + if (newlines > this._newlines && generateLayoutContent) { + this._newlines = newlines; + } + } + + resetOffset = true; + } + + this._node = lastNode; + this._resetOffset = resetOffset; + + return this; + } + + // Private + + /** + * Seeks forward in a text node. + * @param textNode The text node to use. + * @param resetOffset Whether or not the text offset should be reset. + * @returns true if scanning should continue, or false if the scan length has been reached. + */ + _seekTextNodeForward(textNode, resetOffset) { + const nodeValue = textNode.nodeValue; + const nodeValueLength = nodeValue.length; + const [preserveNewlines, preserveWhitespace] = ( + this._forcePreserveWhitespace ? + [true, true] : + DOMTextScanner.getWhitespaceSettings(textNode) + ); + + let lineHasWhitespace = this._lineHasWhitespace; + let lineHasContent = this._lineHasContent; + let content = this._content; + let offset = resetOffset ? 0 : this._offset; + let remainder = this._remainder; + let newlines = this._newlines; + + while (offset < nodeValueLength) { + const char = nodeValue[offset]; + const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); + ++offset; + + if (charAttributes === 0) { + // Character should be ignored + continue; + } else if (charAttributes === 1) { + // Character is collapsable whitespace + lineHasWhitespace = true; + } else { + // Character should be added to the content + if (newlines > 0) { + if (content.length > 0) { + const useNewlineCount = Math.min(remainder, newlines); + content += '\n'.repeat(useNewlineCount); + remainder -= useNewlineCount; + newlines -= useNewlineCount; + } else { + newlines = 0; + } + lineHasContent = false; + lineHasWhitespace = false; + if (remainder <= 0) { + --offset; // Revert character offset + break; + } + } + + lineHasContent = (charAttributes === 2); // 3 = character is a newline + + if (lineHasWhitespace) { + if (lineHasContent) { + content += ' '; + lineHasWhitespace = false; + if (--remainder <= 0) { + --offset; // Revert character offset + break; + } + } else { + lineHasWhitespace = false; + } + } + + content += char; + + if (--remainder <= 0) { break; } + } + } + + this._lineHasWhitespace = lineHasWhitespace; + this._lineHasContent = lineHasContent; + this._content = content; + this._offset = offset; + this._remainder = remainder; + this._newlines = newlines; + + return (remainder > 0); + } + + /** + * Seeks backward in a text node. + * This function is nearly the same as _seekTextNodeForward, with the following differences: + * - Iteration condition is reversed to check if offset is greater than 0. + * - offset is reset to nodeValueLength instead of 0. + * - offset is decremented instead of incremented. + * - offset is decremented before getting the character. + * - offset is reverted by incrementing instead of decrementing. + * - content string is prepended instead of appended. + * @param textNode The text node to use. + * @param resetOffset Whether or not the text offset should be reset. + * @returns true if scanning should continue, or false if the scan length has been reached. + */ + _seekTextNodeBackward(textNode, resetOffset) { + const nodeValue = textNode.nodeValue; + const nodeValueLength = nodeValue.length; + const [preserveNewlines, preserveWhitespace] = ( + this._forcePreserveWhitespace ? + [true, true] : + DOMTextScanner.getWhitespaceSettings(textNode) + ); + + let lineHasWhitespace = this._lineHasWhitespace; + let lineHasContent = this._lineHasContent; + let content = this._content; + let offset = resetOffset ? nodeValueLength : this._offset; + let remainder = this._remainder; + let newlines = this._newlines; + + while (offset > 0) { + --offset; + const char = nodeValue[offset]; + const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); + + if (charAttributes === 0) { + // Character should be ignored + continue; + } else if (charAttributes === 1) { + // Character is collapsable whitespace + lineHasWhitespace = true; + } else { + // Character should be added to the content + if (newlines > 0) { + if (content.length > 0) { + const useNewlineCount = Math.min(remainder, newlines); + content = '\n'.repeat(useNewlineCount) + content; + remainder -= useNewlineCount; + newlines -= useNewlineCount; + } else { + newlines = 0; + } + lineHasContent = false; + lineHasWhitespace = false; + if (remainder <= 0) { + ++offset; // Revert character offset + break; + } + } + + lineHasContent = (charAttributes === 2); // 3 = character is a newline + + if (lineHasWhitespace) { + if (lineHasContent) { + content = ' ' + content; + lineHasWhitespace = false; + if (--remainder <= 0) { + ++offset; // Revert character offset + break; + } + } else { + lineHasWhitespace = false; + } + } + + content = char + content; + + if (--remainder <= 0) { break; } + } + } + + this._lineHasWhitespace = lineHasWhitespace; + this._lineHasContent = lineHasContent; + this._content = content; + this._offset = offset; + this._remainder = remainder; + this._newlines = newlines; + + return (remainder > 0); + } + + // Static helpers + + /** + * Gets the next node in the document for a specified scanning direction. + * @param node The current DOM Node. + * @param forward Whether to scan forward in the document or backward. + * @param visitChildren Whether the children of the current node should be visited. + * @param exitedNodes An array which stores nodes which were exited. + * @returns The next node in the document, or null if there is no next node. + */ + static getNextNode(node, forward, visitChildren, exitedNodes) { + let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null; + if (next === null) { + while (true) { + exitedNodes.push(node); + + next = (forward ? node.nextSibling : node.previousSibling); + if (next !== null) { break; } + + next = node.parentNode; + if (next === null) { break; } + + node = next; + } + } + return next; + } + + /** + * Gets the parent element of a given Node. + * @param node The node to check. + * @returns The parent element if one exists, otherwise null. + */ + static getParentElement(node) { + while (node !== null && node.nodeType !== Node.ELEMENT_NODE) { + node = node.parentNode; + } + return node; + } + + /** + * Gets the parent <ruby> element of a given node, if one exists. For efficiency purposes, + * this only checks the immediate parent elements and does not check all ancestors, so + * there are cases where the node may be in a ruby element but it is not returned. + * @param node The node to check. + * @returns A <ruby> node if the input node is contained in one, otherwise null. + */ + static getParentRubyElement(node) { + node = DOMTextScanner.getParentElement(node); + if (node !== null && node.nodeName.toUpperCase() === 'RT') { + node = node.parentNode; + if (node !== null && node.nodeName.toUpperCase() === 'RUBY') { + return node; + } + } + return null; + } + + /** + * @returns [enterable: boolean, newlines: integer] + * The enterable value indicates whether the content of this node should be entered. + * The newlines value corresponds to the number of newline characters that should be added. + * 1 newline corresponds to a simple new line in the layout. + * 2 newlines corresponds to a significant visual distinction since the previous content. + */ + static getElementSeekInfo(element) { + let enterable = true; + switch (element.nodeName.toUpperCase()) { + case 'HEAD': + case 'RT': + case 'SCRIPT': + case 'STYLE': + return [false, 0]; + case 'BR': + return [false, 1]; + case 'TEXTAREA': + case 'INPUT': + case 'BUTTON': + enterable = false; + break; + } + + const style = window.getComputedStyle(element); + const display = style.display; + + const visible = (display !== 'none' && DOMTextScanner.isStyleVisible(style)); + let newlines = 0; + + if (!visible) { + enterable = false; + } else { + switch (style.position) { + case 'absolute': + case 'fixed': + case 'sticky': + newlines = 2; + break; + } + if (newlines === 0 && DOMTextScanner.doesCSSDisplayChangeLayout(display)) { + newlines = 1; + } + } + + return [enterable, newlines]; + } + + /** + * Gets information about how whitespace characters are treated. + * @param textNode The Text node to check. + * @returns [preserveNewlines: boolean, preserveWhitespace: boolean] + * The value of preserveNewlines indicates whether or not newline characters are treated as line breaks. + * The value of preserveWhitespace indicates whether or not sequences of whitespace characters are collapsed. + */ + static getWhitespaceSettings(textNode) { + const element = DOMTextScanner.getParentElement(textNode); + if (element !== null) { + const style = window.getComputedStyle(element); + switch (style.whiteSpace) { + case 'pre': + case 'pre-wrap': + case 'break-spaces': + return [true, true]; + case 'pre-line': + return [true, false]; + } + } + return [false, false]; + } + + /** + * Gets attributes for the specified character. + * @param character A string containing a single character. + * @returns An integer representing the attributes of the character. + * 0: Character should be ignored. + * 1: Character is collapsable whitespace. + * 2: Character should be added to the content. + * 3: Character should be added to the content and is a newline. + */ + static getCharacterAttributes(character, preserveNewlines, preserveWhitespace) { + switch (character.charCodeAt(0)) { + case 0x09: // Tab ('\t') + case 0x0c: // Form feed ('\f') + case 0x0d: // Carriage return ('\r') + case 0x20: // Space (' ') + return preserveWhitespace ? 2 : 1; + case 0x0a: // Line feed ('\n') + return preserveNewlines ? 3 : 1; + case 0x200c: // Zero-width non-joiner ('\u200c') + return 0; + default: // Other + return 2; + } + } + + /** + * Checks whether a given style is visible or not. + * This function does not check style.display === 'none'. + * @param style An object implementing the CSSStyleDeclaration interface. + * @returns true if the style should result in an element being visible, otherwise false. + */ + static isStyleVisible(style) { + return !( + style.visibility === 'hidden' || + parseFloat(style.opacity) <= 0 || + parseFloat(style.fontSize) <= 0 || + ( + !DOMTextScanner.isStyleSelectable(style) && + ( + DOMTextScanner.isCSSColorTransparent(style.color) || + DOMTextScanner.isCSSColorTransparent(style.webkitTextFillColor) + ) + ) + ); + } + + /** + * Checks whether a given style is selectable or not. + * @param style An object implementing the CSSStyleDeclaration interface. + * @returns true if the style is selectable, otherwise false. + */ + static isStyleSelectable(style) { + return !( + style.userSelect === 'none' || + style.webkitUserSelect === 'none' || + style.MozUserSelect === 'none' || + style.msUserSelect === 'none' + ); + } + + /** + * Checks whether a CSS color is transparent or not. + * @param cssColor A CSS color string, expected to be encoded in rgb(a) form. + * @returns true if the color is transparent, otherwise false. + */ + static isCSSColorTransparent(cssColor) { + return ( + typeof cssColor === 'string' && + cssColor.startsWith('rgba(') && + /,\s*0.?0*\)$/.test(cssColor) + ); + } + + /** + * Checks whether a CSS display value will cause a layout change for text. + * @param cssDisplay A CSS string corresponding to the value of the display property. + * @returns true if the layout is changed by this value, otherwise false. + */ + static doesCSSDisplayChangeLayout(cssDisplay) { + let pos = cssDisplay.indexOf(' '); + if (pos >= 0) { + // Truncate to <display-outside> part + cssDisplay = cssDisplay.substring(0, pos); + } + + pos = cssDisplay.indexOf('-'); + if (pos >= 0) { + // Truncate to first part of kebab-case value + cssDisplay = cssDisplay.substring(0, pos); + } + + switch (cssDisplay) { + case 'block': + case 'flex': + case 'grid': + case 'list': // list-item + case 'table': // table, table-* + return true; + case 'ruby': // rubt-* + return (pos >= 0); + default: + return false; + } + } +} diff --git a/ext/js/dom/text-source-element.js b/ext/js/dom/text-source-element.js new file mode 100644 index 00000000..45186636 --- /dev/null +++ b/ext/js/dom/text-source-element.js @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2016-2021 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +class TextSourceElement { + constructor(element, fullContent=null, startOffset=0, endOffset=0) { + this._element = element; + this._fullContent = (typeof fullContent === 'string' ? fullContent : TextSourceElement.getElementContent(element)); + this._startOffset = startOffset; + this._endOffset = endOffset; + this._content = this._fullContent.substring(this._startOffset, this._endOffset); + } + + get element() { + return this._element; + } + + get fullContent() { + return this._fullContent; + } + + get startOffset() { + return this._startOffset; + } + + get endOffset() { + return this._endOffset; + } + + get isConnected() { + return this._element.isConnected; + } + + clone() { + return new TextSourceElement(this._element, this._fullContent, this._startOffset, this._endOffset); + } + + cleanup() { + // NOP + } + + text() { + return this._content; + } + + setEndOffset(length, fromEnd=false) { + if (fromEnd) { + const delta = Math.min(this._fullContent.length - this._endOffset, length); + this._endOffset += delta; + this._content = this._fullContent.substring(this._startOffset, this._endOffset); + return delta; + } else { + const delta = Math.min(this._fullContent.length - this._startOffset, length); + this._endOffset = this._startOffset + delta; + this._content = this._fullContent.substring(this._startOffset, this._endOffset); + return delta; + } + } + + setStartOffset(length) { + const delta = Math.min(this._startOffset, length); + this._startOffset -= delta; + this._content = this._fullContent.substring(this._startOffset, this._endOffset); + return delta; + } + + collapse(toStart) { + if (toStart) { + this._endOffset = this._startOffset; + } else { + this._startOffset = this._endOffset; + } + this._content = ''; + } + + getRect() { + return this._element.getBoundingClientRect(); + } + + getWritingMode() { + return 'horizontal-tb'; + } + + select() { + // NOP + } + + deselect() { + // NOP + } + + hasSameStart(other) { + return ( + typeof other === 'object' && + other !== null && + other instanceof TextSourceElement && + this._element === other.element && + this._fullContent === other.fullContent && + this._startOffset === other.startOffset + ); + } + + getNodesInRange() { + return [this._element]; + } + + static getElementContent(element) { + let content; + switch (element.nodeName.toUpperCase()) { + case 'BUTTON': + content = element.textContent; + break; + case 'IMG': + content = element.getAttribute('alt') || ''; + break; + default: + content = `${element.value}`; + break; + } + + // Remove zero-width non-joiner + content = content.replace(/\u200c/g, ''); + + return content; + } +} diff --git a/ext/js/dom/text-source-range.js b/ext/js/dom/text-source-range.js new file mode 100644 index 00000000..377016da --- /dev/null +++ b/ext/js/dom/text-source-range.js @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2016-2021 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/* global + * DOMTextScanner + * DocumentUtil + */ + +class TextSourceRange { + constructor(range, content, imposterContainer, imposterSourceElement) { + this._range = range; + this._rangeStartOffset = range.startOffset; + this._content = content; + this._imposterContainer = imposterContainer; + this._imposterSourceElement = imposterSourceElement; + } + + get range() { + return this._range; + } + + get rangeStartOffset() { + return this._rangeStartOffset; + } + + get imposterSourceElement() { + return this._imposterSourceElement; + } + + get isConnected() { + return ( + this._range.startContainer.isConnected && + this._range.endContainer.isConnected + ); + } + + clone() { + return new TextSourceRange(this._range.cloneRange(), this._content, this._imposterContainer, this._imposterSourceElement); + } + + cleanup() { + if (this._imposterContainer !== null && this._imposterContainer.parentNode !== null) { + this._imposterContainer.parentNode.removeChild(this._imposterContainer); + } + } + + text() { + return this._content; + } + + setEndOffset(length, layoutAwareScan, fromEnd=false) { + const state = ( + fromEnd ? + new DOMTextScanner(this._range.endContainer, this._range.endOffset, !layoutAwareScan, layoutAwareScan).seek(length) : + new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan).seek(length) + ); + this._range.setEnd(state.node, state.offset); + this._content = (fromEnd ? this._content + state.content : state.content); + return length - state.remainder; + } + + setStartOffset(length, layoutAwareScan) { + const state = new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length); + this._range.setStart(state.node, state.offset); + this._rangeStartOffset = this._range.startOffset; + this._content = state.content + this._content; + return length - state.remainder; + } + + collapse(toStart) { + this._range.collapse(toStart); + this._content = ''; + } + + getRect() { + return this._range.getBoundingClientRect(); + } + + getWritingMode() { + return TextSourceRange.getElementWritingMode(TextSourceRange.getParentElement(this._range.startContainer)); + } + + select() { + const selection = window.getSelection(); + selection.removeAllRanges(); + selection.addRange(this._range); + } + + deselect() { + const selection = window.getSelection(); + selection.removeAllRanges(); + } + + hasSameStart(other) { + if (!( + typeof other === 'object' && + other !== null && + other instanceof TextSourceRange + )) { + return false; + } + if (this._imposterSourceElement !== null) { + return ( + this._imposterSourceElement === other.imposterSourceElement && + this._rangeStartOffset === other.rangeStartOffset + ); + } else { + try { + return this._range.compareBoundaryPoints(Range.START_TO_START, other.range) === 0; + } catch (e) { + if (e.name === 'WrongDocumentError') { + // This can happen with shadow DOMs if the ranges are in different documents. + return false; + } + throw e; + } + } + } + + getNodesInRange() { + return DocumentUtil.getNodesInRange(this._range); + } + + static getParentElement(node) { + while (node !== null && node.nodeType !== Node.ELEMENT_NODE) { + node = node.parentNode; + } + return node; + } + + static getElementWritingMode(element) { + if (element !== null) { + const style = window.getComputedStyle(element); + const writingMode = style.writingMode; + if (typeof writingMode === 'string') { + return TextSourceRange.normalizeWritingMode(writingMode); + } + } + return 'horizontal-tb'; + } + + static normalizeWritingMode(writingMode) { + switch (writingMode) { + case 'lr': + case 'lr-tb': + case 'rl': + return 'horizontal-tb'; + case 'tb': + return 'vertical-lr'; + case 'tb-rl': + return 'vertical-rl'; + default: + return writingMode; + } + } +} |