diff options
Diffstat (limited to 'ext/js/dom/simple-dom-parser.js')
-rw-r--r-- | ext/js/dom/simple-dom-parser.js | 121 |
1 files changed, 88 insertions, 33 deletions
diff --git a/ext/js/dom/simple-dom-parser.js b/ext/js/dom/simple-dom-parser.js index 3e84b783..bca1cd88 100644 --- a/ext/js/dom/simple-dom-parser.js +++ b/ext/js/dom/simple-dom-parser.js @@ -18,55 +18,91 @@ import * as parse5 from '../../lib/parse5.js'; +/** + * @augments import('simple-dom-parser').ISimpleDomParser + */ export class SimpleDOMParser { + /** + * @param {string} content + */ constructor(content) { - this._document = parse5.parse(content); + /** @type {import('parse5')} */ + // @ts-expect-error - parse5 global is not defined in typescript declaration + this._parse5Lib = /** @type {import('parse5')} */ (parse5); + /** @type {import('parse5').TreeAdapter<import('parse5').DefaultTreeAdapterMap>} */ + this._treeAdapter = this._parse5Lib.defaultTreeAdapter; + /** @type {import('simple-dom-parser').Parse5Document} */ + this._document = this._parse5Lib.parse(content, { + treeAdapter: this._treeAdapter + }); + /** @type {RegExp} */ this._patternHtmlWhitespace = /[\t\r\n\f ]+/g; } - getElementById(id, root=null) { + /** + * @param {string} id + * @param {import('simple-dom-parser').Element} [root] + * @returns {?import('simple-dom-parser').Element} + */ + getElementById(id, root) { for (const node of this._allNodes(root)) { - if (typeof node.tagName === 'string' && this.getAttribute(node, 'id') === id) { - return node; - } + if (!this._treeAdapter.isElementNode(node) || this.getAttribute(node, 'id') !== id) { continue; } + return node; } return null; } - getElementByTagName(tagName, root=null) { + /** + * @param {string} tagName + * @param {import('simple-dom-parser').Element} [root] + * @returns {?import('simple-dom-parser').Element} + */ + getElementByTagName(tagName, root) { for (const node of this._allNodes(root)) { - if (node.tagName === tagName) { - return node; - } + if (!this._treeAdapter.isElementNode(node) || node.tagName !== tagName) { continue; } + return node; } return null; } - getElementsByTagName(tagName, root=null) { + /** + * @param {string} tagName + * @param {import('simple-dom-parser').Element} [root] + * @returns {import('simple-dom-parser').Element[]} + */ + getElementsByTagName(tagName, root) { const results = []; for (const node of this._allNodes(root)) { - if (node.tagName === tagName) { - results.push(node); - } + if (!this._treeAdapter.isElementNode(node) || node.tagName !== tagName) { continue; } + results.push(node); } return results; } - getElementsByClassName(className, root=null) { + /** + * @param {string} className + * @param {import('simple-dom-parser').Element} [root] + * @returns {import('simple-dom-parser').Element[]} + */ + getElementsByClassName(className, root) { const results = []; for (const node of this._allNodes(root)) { - if (typeof node.tagName === 'string') { - const nodeClassName = this.getAttribute(node, 'class'); - if (nodeClassName !== null && this._hasToken(nodeClassName, className)) { - results.push(node); - } + if (!this._treeAdapter.isElementNode(node)) { continue; } + const nodeClassName = this.getAttribute(node, 'class'); + if (nodeClassName !== null && this._hasToken(nodeClassName, className)) { + results.push(node); } } return results; } + /** + * @param {import('simple-dom-parser').Element} element + * @param {string} attribute + * @returns {?string} + */ getAttribute(element, attribute) { - for (const attr of element.attrs) { + for (const attr of /** @type {import('simple-dom-parser').Parse5Element} */ (element).attrs) { if ( attr.name === attribute && typeof attr.namespace === 'undefined' @@ -77,43 +113,62 @@ export class SimpleDOMParser { return null; } + /** + * @param {import('simple-dom-parser').Element} element + * @returns {string} + */ getTextContent(element) { let source = ''; for (const node of this._allNodes(element)) { - if (node.nodeName === '#text') { + if (this._treeAdapter.isTextNode(node)) { source += node.value; } } return source; } + /** + * @returns {boolean} + */ static isSupported() { return typeof parse5 !== 'undefined'; } // Private + /** + * @param {import('simple-dom-parser').Element|undefined} root + * @returns {Generator<import('simple-dom-parser').Parse5ChildNode, void, unknown>} + * @yields {import('simple-dom-parser').Parse5ChildNode} + */ *_allNodes(root) { - if (root === null) { - root = this._document; - } - // Depth-first pre-order traversal - const nodeQueue = [root]; + /** @type {import('simple-dom-parser').Parse5ChildNode[]} */ + const nodeQueue = []; + if (typeof root !== 'undefined') { + nodeQueue.push(/** @type {import('simple-dom-parser').Parse5Element} */ (root)); + } else { + nodeQueue.push(...this._document.childNodes); + } while (nodeQueue.length > 0) { - const node = nodeQueue.pop(); - + const node = /** @type {import('simple-dom-parser').Parse5ChildNode} */ (nodeQueue.pop()); yield node; - - const childNodes = node.childNodes; - if (typeof childNodes !== 'undefined') { - for (let i = childNodes.length - 1; i >= 0; --i) { - nodeQueue.push(childNodes[i]); + if (this._treeAdapter.isElementNode(node)) { + const {childNodes} = node; + if (typeof childNodes !== 'undefined') { + for (let i = childNodes.length - 1; i >= 0; --i) { + nodeQueue.push(childNodes[i]); + } } } } } + /** + * @param {string} tokenListString + * @param {string} token + * @returns {boolean} + */ _hasToken(tokenListString, token) { let start = 0; const pattern = this._patternHtmlWhitespace; |