summaryrefslogtreecommitdiff
path: root/ext/fg/js/dom-text-scanner.js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fg/js/dom-text-scanner.js')
-rw-r--r--ext/fg/js/dom-text-scanner.js551
1 files changed, 0 insertions, 551 deletions
diff --git a/ext/fg/js/dom-text-scanner.js b/ext/fg/js/dom-text-scanner.js
deleted file mode 100644
index 71e74fc3..00000000
--- a/ext/fg/js/dom-text-scanner.js
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (C) 2020-2021 Yomichan Authors
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-/**
- * A class used to scan text in a document.
- */
-class DOMTextScanner {
- /**
- * Creates a new instance of a DOMTextScanner.
- * @param node The DOM Node to start at.
- * @param offset The character offset in to start at when node is a text node.
- * Use 0 for non-text nodes.
- */
- constructor(node, offset, forcePreserveWhitespace=false, generateLayoutContent=true) {
- const ruby = DOMTextScanner.getParentRubyElement(node);
- const resetOffset = (ruby !== null);
- if (resetOffset) { node = ruby; }
-
- this._node = node;
- this._offset = offset;
- this._content = '';
- this._remainder = 0;
- this._resetOffset = resetOffset;
- this._newlines = 0;
- this._lineHasWhitespace = false;
- this._lineHasContent = false;
- this._forcePreserveWhitespace = forcePreserveWhitespace;
- this._generateLayoutContent = generateLayoutContent;
- }
-
- /**
- * Gets the current node being scanned.
- * @returns A DOM Node.
- */
- get node() {
- return this._node;
- }
-
- /**
- * Gets the current offset corresponding to the node being scanned.
- * This value is only applicable for text nodes.
- * @returns An integer.
- */
- get offset() {
- return this._offset;
- }
-
- /**
- * Gets the remaining number of characters that weren't scanned in the last seek() call.
- * This value is usually 0 unless the end of the document was reached.
- * @returns An integer.
- */
- get remainder() {
- return this._remainder;
- }
-
- /**
- * Gets the accumulated content string resulting from calls to seek().
- * @returns A string.
- */
- get content() {
- return this._content;
- }
-
- /**
- * Seeks a given length in the document and accumulates the text content.
- * @param length A positive or negative integer corresponding to how many characters
- * should be added to content. Content is only added to the accumulation string,
- * never removed, so mixing seek calls with differently signed length values
- * may give unexpected results.
- * @returns this
- */
- seek(length) {
- const forward = (length >= 0);
- this._remainder = (forward ? length : -length);
- if (length === 0) { return this; }
-
- const TEXT_NODE = Node.TEXT_NODE;
- const ELEMENT_NODE = Node.ELEMENT_NODE;
-
- const generateLayoutContent = this._generateLayoutContent;
- let node = this._node;
- let lastNode = node;
- let resetOffset = this._resetOffset;
- let newlines = 0;
- while (node !== null) {
- let enterable = false;
- const nodeType = node.nodeType;
-
- if (nodeType === TEXT_NODE) {
- lastNode = node;
- if (!(
- forward ?
- this._seekTextNodeForward(node, resetOffset) :
- this._seekTextNodeBackward(node, resetOffset)
- )) {
- // Length reached
- break;
- }
- } else if (nodeType === ELEMENT_NODE) {
- lastNode = node;
- this._offset = 0;
- [enterable, newlines] = DOMTextScanner.getElementSeekInfo(node);
- if (newlines > this._newlines && generateLayoutContent) {
- this._newlines = newlines;
- }
- }
-
- const exitedNodes = [];
- node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes);
-
- for (const exitedNode of exitedNodes) {
- if (exitedNode.nodeType !== ELEMENT_NODE) { continue; }
- newlines = DOMTextScanner.getElementSeekInfo(exitedNode)[1];
- if (newlines > this._newlines && generateLayoutContent) {
- this._newlines = newlines;
- }
- }
-
- resetOffset = true;
- }
-
- this._node = lastNode;
- this._resetOffset = resetOffset;
-
- return this;
- }
-
- // Private
-
- /**
- * Seeks forward in a text node.
- * @param textNode The text node to use.
- * @param resetOffset Whether or not the text offset should be reset.
- * @returns true if scanning should continue, or false if the scan length has been reached.
- */
- _seekTextNodeForward(textNode, resetOffset) {
- const nodeValue = textNode.nodeValue;
- const nodeValueLength = nodeValue.length;
- const [preserveNewlines, preserveWhitespace] = (
- this._forcePreserveWhitespace ?
- [true, true] :
- DOMTextScanner.getWhitespaceSettings(textNode)
- );
-
- let lineHasWhitespace = this._lineHasWhitespace;
- let lineHasContent = this._lineHasContent;
- let content = this._content;
- let offset = resetOffset ? 0 : this._offset;
- let remainder = this._remainder;
- let newlines = this._newlines;
-
- while (offset < nodeValueLength) {
- const char = nodeValue[offset];
- const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
- ++offset;
-
- if (charAttributes === 0) {
- // Character should be ignored
- continue;
- } else if (charAttributes === 1) {
- // Character is collapsable whitespace
- lineHasWhitespace = true;
- } else {
- // Character should be added to the content
- if (newlines > 0) {
- if (content.length > 0) {
- const useNewlineCount = Math.min(remainder, newlines);
- content += '\n'.repeat(useNewlineCount);
- remainder -= useNewlineCount;
- newlines -= useNewlineCount;
- } else {
- newlines = 0;
- }
- lineHasContent = false;
- lineHasWhitespace = false;
- if (remainder <= 0) {
- --offset; // Revert character offset
- break;
- }
- }
-
- lineHasContent = (charAttributes === 2); // 3 = character is a newline
-
- if (lineHasWhitespace) {
- if (lineHasContent) {
- content += ' ';
- lineHasWhitespace = false;
- if (--remainder <= 0) {
- --offset; // Revert character offset
- break;
- }
- } else {
- lineHasWhitespace = false;
- }
- }
-
- content += char;
-
- if (--remainder <= 0) { break; }
- }
- }
-
- this._lineHasWhitespace = lineHasWhitespace;
- this._lineHasContent = lineHasContent;
- this._content = content;
- this._offset = offset;
- this._remainder = remainder;
- this._newlines = newlines;
-
- return (remainder > 0);
- }
-
- /**
- * Seeks backward in a text node.
- * This function is nearly the same as _seekTextNodeForward, with the following differences:
- * - Iteration condition is reversed to check if offset is greater than 0.
- * - offset is reset to nodeValueLength instead of 0.
- * - offset is decremented instead of incremented.
- * - offset is decremented before getting the character.
- * - offset is reverted by incrementing instead of decrementing.
- * - content string is prepended instead of appended.
- * @param textNode The text node to use.
- * @param resetOffset Whether or not the text offset should be reset.
- * @returns true if scanning should continue, or false if the scan length has been reached.
- */
- _seekTextNodeBackward(textNode, resetOffset) {
- const nodeValue = textNode.nodeValue;
- const nodeValueLength = nodeValue.length;
- const [preserveNewlines, preserveWhitespace] = (
- this._forcePreserveWhitespace ?
- [true, true] :
- DOMTextScanner.getWhitespaceSettings(textNode)
- );
-
- let lineHasWhitespace = this._lineHasWhitespace;
- let lineHasContent = this._lineHasContent;
- let content = this._content;
- let offset = resetOffset ? nodeValueLength : this._offset;
- let remainder = this._remainder;
- let newlines = this._newlines;
-
- while (offset > 0) {
- --offset;
- const char = nodeValue[offset];
- const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
-
- if (charAttributes === 0) {
- // Character should be ignored
- continue;
- } else if (charAttributes === 1) {
- // Character is collapsable whitespace
- lineHasWhitespace = true;
- } else {
- // Character should be added to the content
- if (newlines > 0) {
- if (content.length > 0) {
- const useNewlineCount = Math.min(remainder, newlines);
- content = '\n'.repeat(useNewlineCount) + content;
- remainder -= useNewlineCount;
- newlines -= useNewlineCount;
- } else {
- newlines = 0;
- }
- lineHasContent = false;
- lineHasWhitespace = false;
- if (remainder <= 0) {
- ++offset; // Revert character offset
- break;
- }
- }
-
- lineHasContent = (charAttributes === 2); // 3 = character is a newline
-
- if (lineHasWhitespace) {
- if (lineHasContent) {
- content = ' ' + content;
- lineHasWhitespace = false;
- if (--remainder <= 0) {
- ++offset; // Revert character offset
- break;
- }
- } else {
- lineHasWhitespace = false;
- }
- }
-
- content = char + content;
-
- if (--remainder <= 0) { break; }
- }
- }
-
- this._lineHasWhitespace = lineHasWhitespace;
- this._lineHasContent = lineHasContent;
- this._content = content;
- this._offset = offset;
- this._remainder = remainder;
- this._newlines = newlines;
-
- return (remainder > 0);
- }
-
- // Static helpers
-
- /**
- * Gets the next node in the document for a specified scanning direction.
- * @param node The current DOM Node.
- * @param forward Whether to scan forward in the document or backward.
- * @param visitChildren Whether the children of the current node should be visited.
- * @param exitedNodes An array which stores nodes which were exited.
- * @returns The next node in the document, or null if there is no next node.
- */
- static getNextNode(node, forward, visitChildren, exitedNodes) {
- let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null;
- if (next === null) {
- while (true) {
- exitedNodes.push(node);
-
- next = (forward ? node.nextSibling : node.previousSibling);
- if (next !== null) { break; }
-
- next = node.parentNode;
- if (next === null) { break; }
-
- node = next;
- }
- }
- return next;
- }
-
- /**
- * Gets the parent element of a given Node.
- * @param node The node to check.
- * @returns The parent element if one exists, otherwise null.
- */
- static getParentElement(node) {
- while (node !== null && node.nodeType !== Node.ELEMENT_NODE) {
- node = node.parentNode;
- }
- return node;
- }
-
- /**
- * Gets the parent <ruby> element of a given node, if one exists. For efficiency purposes,
- * this only checks the immediate parent elements and does not check all ancestors, so
- * there are cases where the node may be in a ruby element but it is not returned.
- * @param node The node to check.
- * @returns A <ruby> node if the input node is contained in one, otherwise null.
- */
- static getParentRubyElement(node) {
- node = DOMTextScanner.getParentElement(node);
- if (node !== null && node.nodeName.toUpperCase() === 'RT') {
- node = node.parentNode;
- if (node !== null && node.nodeName.toUpperCase() === 'RUBY') {
- return node;
- }
- }
- return null;
- }
-
- /**
- * @returns [enterable: boolean, newlines: integer]
- * The enterable value indicates whether the content of this node should be entered.
- * The newlines value corresponds to the number of newline characters that should be added.
- * 1 newline corresponds to a simple new line in the layout.
- * 2 newlines corresponds to a significant visual distinction since the previous content.
- */
- static getElementSeekInfo(element) {
- let enterable = true;
- switch (element.nodeName.toUpperCase()) {
- case 'HEAD':
- case 'RT':
- case 'SCRIPT':
- case 'STYLE':
- return [false, 0];
- case 'BR':
- return [false, 1];
- case 'TEXTAREA':
- case 'INPUT':
- case 'BUTTON':
- enterable = false;
- break;
- }
-
- const style = window.getComputedStyle(element);
- const display = style.display;
-
- const visible = (display !== 'none' && DOMTextScanner.isStyleVisible(style));
- let newlines = 0;
-
- if (!visible) {
- enterable = false;
- } else {
- switch (style.position) {
- case 'absolute':
- case 'fixed':
- case 'sticky':
- newlines = 2;
- break;
- }
- if (newlines === 0 && DOMTextScanner.doesCSSDisplayChangeLayout(display)) {
- newlines = 1;
- }
- }
-
- return [enterable, newlines];
- }
-
- /**
- * Gets information about how whitespace characters are treated.
- * @param textNode The Text node to check.
- * @returns [preserveNewlines: boolean, preserveWhitespace: boolean]
- * The value of preserveNewlines indicates whether or not newline characters are treated as line breaks.
- * The value of preserveWhitespace indicates whether or not sequences of whitespace characters are collapsed.
- */
- static getWhitespaceSettings(textNode) {
- const element = DOMTextScanner.getParentElement(textNode);
- if (element !== null) {
- const style = window.getComputedStyle(element);
- switch (style.whiteSpace) {
- case 'pre':
- case 'pre-wrap':
- case 'break-spaces':
- return [true, true];
- case 'pre-line':
- return [true, false];
- }
- }
- return [false, false];
- }
-
- /**
- * Gets attributes for the specified character.
- * @param character A string containing a single character.
- * @returns An integer representing the attributes of the character.
- * 0: Character should be ignored.
- * 1: Character is collapsable whitespace.
- * 2: Character should be added to the content.
- * 3: Character should be added to the content and is a newline.
- */
- static getCharacterAttributes(character, preserveNewlines, preserveWhitespace) {
- switch (character.charCodeAt(0)) {
- case 0x09: // Tab ('\t')
- case 0x0c: // Form feed ('\f')
- case 0x0d: // Carriage return ('\r')
- case 0x20: // Space (' ')
- return preserveWhitespace ? 2 : 1;
- case 0x0a: // Line feed ('\n')
- return preserveNewlines ? 3 : 1;
- case 0x200c: // Zero-width non-joiner ('\u200c')
- return 0;
- default: // Other
- return 2;
- }
- }
-
- /**
- * Checks whether a given style is visible or not.
- * This function does not check style.display === 'none'.
- * @param style An object implementing the CSSStyleDeclaration interface.
- * @returns true if the style should result in an element being visible, otherwise false.
- */
- static isStyleVisible(style) {
- return !(
- style.visibility === 'hidden' ||
- parseFloat(style.opacity) <= 0 ||
- parseFloat(style.fontSize) <= 0 ||
- (
- !DOMTextScanner.isStyleSelectable(style) &&
- (
- DOMTextScanner.isCSSColorTransparent(style.color) ||
- DOMTextScanner.isCSSColorTransparent(style.webkitTextFillColor)
- )
- )
- );
- }
-
- /**
- * Checks whether a given style is selectable or not.
- * @param style An object implementing the CSSStyleDeclaration interface.
- * @returns true if the style is selectable, otherwise false.
- */
- static isStyleSelectable(style) {
- return !(
- style.userSelect === 'none' ||
- style.webkitUserSelect === 'none' ||
- style.MozUserSelect === 'none' ||
- style.msUserSelect === 'none'
- );
- }
-
- /**
- * Checks whether a CSS color is transparent or not.
- * @param cssColor A CSS color string, expected to be encoded in rgb(a) form.
- * @returns true if the color is transparent, otherwise false.
- */
- static isCSSColorTransparent(cssColor) {
- return (
- typeof cssColor === 'string' &&
- cssColor.startsWith('rgba(') &&
- /,\s*0.?0*\)$/.test(cssColor)
- );
- }
-
- /**
- * Checks whether a CSS display value will cause a layout change for text.
- * @param cssDisplay A CSS string corresponding to the value of the display property.
- * @returns true if the layout is changed by this value, otherwise false.
- */
- static doesCSSDisplayChangeLayout(cssDisplay) {
- let pos = cssDisplay.indexOf(' ');
- if (pos >= 0) {
- // Truncate to <display-outside> part
- cssDisplay = cssDisplay.substring(0, pos);
- }
-
- pos = cssDisplay.indexOf('-');
- if (pos >= 0) {
- // Truncate to first part of kebab-case value
- cssDisplay = cssDisplay.substring(0, pos);
- }
-
- switch (cssDisplay) {
- case 'block':
- case 'flex':
- case 'grid':
- case 'list': // list-item
- case 'table': // table, table-*
- return true;
- case 'ruby': // rubt-*
- return (pos >= 0);
- default:
- return false;
- }
- }
-}