From 8b6f526dc69094963f7e0e17e72d3e0cdfd3b41a Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 14 May 2022 18:13:04 -0400 Subject: Regex optimizations (#2132) * Remove regex construction for SimpleDOMParser.getElementsByClassName * Remove regex construction for CssStyleApplier._getRulesForClass * Rename, add jsdoc for clarity --- ext/js/dom/sandbox/css-style-applier.js | 53 ++++++++++++++++++++++++++++++--- ext/js/dom/simple-dom-parser.js | 18 +++++++++-- 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/ext/js/dom/sandbox/css-style-applier.js b/ext/js/dom/sandbox/css-style-applier.js index 14564ed6..9952fa7d 100644 --- a/ext/js/dom/sandbox/css-style-applier.js +++ b/ext/js/dom/sandbox/css-style-applier.js @@ -20,6 +20,14 @@ * that is the same across different browsers. */ class CssStyleApplier { + /** + * @typedef {object} CssRule + * @property {string} selectors A CSS selector string representing one or more selectors. + * @property {[string, string][]} styles A list of CSS property and value pairs. + * @property {string} styles[][0] The CSS property. + * @property {string} styles[][1] The CSS value. + */ + /** * Creates a new instance of the class. * @param styleDataUrl The local URL to the JSON file continaing the style rules. @@ -37,6 +45,9 @@ class CssStyleApplier { this._styleDataUrl = styleDataUrl; this._styleData = []; this._cachedRules = new Map(); + // eslint-disable-next-line no-control-regex + this._patternHtmlWhitespace = /[\t\r\n\x0C ]+/g; + this._patternClassNameCharacter = /[0-9a-zA-Z-_]/; } /** @@ -65,7 +76,7 @@ class CssStyleApplier { const className = element.getAttribute('class'); if (className.length === 0) { continue; } let cssTextNew = ''; - for (const {selectorText, styles} of this._getRulesForClass(className)) { + for (const {selectorText, styles} of this._getCandidateCssRulesForClass(className)) { if (!element.matches(selectorText)) { continue; } cssTextNew += this._getCssText(styles); } @@ -99,17 +110,22 @@ class CssStyleApplier { return await response.json(); } - _getRulesForClass(className) { + /** + * Gets an array of candidate CSS rules which might match a specific class. + * @param {string} className A whitespace-separated list of classes. + * @returns {CssRule[]} An array of candidate CSS rules. + */ + _getCandidateCssRulesForClass(className) { let rules = this._cachedRules.get(className); if (typeof rules !== 'undefined') { return rules; } rules = []; this._cachedRules.set(className, rules); - const classNamePattern = new RegExp(`.${className}(?![0-9a-zA-Z-])`, ''); + const classList = this._getTokens(className); for (const {selectors, styles} of this._styleData) { const selectorText = selectors.join(','); - if (!classNamePattern.test(selectorText)) { continue; } + if (!this._selectorMatches(selectorText, classList)) { continue; } rules.push({selectorText, styles}); } @@ -123,4 +139,33 @@ class CssStyleApplier { } return cssText; } + + _selectorMatches(selectorText, classList) { + const pattern = this._patternClassNameCharacter; + for (const item of classList) { + const prefixedItem = `.${item}`; + let start = 0; + while (true) { + const index = selectorText.indexOf(prefixedItem, start); + if (index < 0) { break; } + start = index + prefixedItem.length; + if (start >= selectorText.length || !pattern.test(selectorText[start])) { return true; } + } + } + return false; + } + + _getTokens(tokenListString) { + let start = 0; + const pattern = this._patternHtmlWhitespace; + pattern.lastIndex = 0; + const result = []; + while (true) { + const match = pattern.exec(tokenListString); + const end = match === null ? tokenListString.length : match.index; + if (end > start) { result.push(tokenListString.substring(start, end)); } + if (match === null) { return result; } + start = end + match[0].length; + } + } } diff --git a/ext/js/dom/simple-dom-parser.js b/ext/js/dom/simple-dom-parser.js index 09f3e914..bc327f5e 100644 --- a/ext/js/dom/simple-dom-parser.js +++ b/ext/js/dom/simple-dom-parser.js @@ -22,6 +22,8 @@ class SimpleDOMParser { constructor(content) { this._document = parse5.parse(content); + // eslint-disable-next-line no-control-regex + this._patternHtmlWhitespace = /[\t\r\n\x0C ]+/g; } getElementById(id, root=null) { @@ -54,11 +56,10 @@ class SimpleDOMParser { getElementsByClassName(className, root=null) { const results = []; - const classNamePattern = new RegExp(`(^|\\s)${escapeRegExp(className)}(\\s|$)`); for (const node of this._allNodes(root)) { if (typeof node.tagName === 'string') { const nodeClassName = this.getAttribute(node, 'class'); - if (nodeClassName !== null && classNamePattern.test(nodeClassName)) { + if (nodeClassName !== null && this._hasToken(nodeClassName, className)) { results.push(node); } } @@ -114,4 +115,17 @@ class SimpleDOMParser { } } } + + _hasToken(tokenListString, token) { + let start = 0; + const pattern = this._patternHtmlWhitespace; + pattern.lastIndex = 0; + while (true) { + const match = pattern.exec(tokenListString); + const end = match === null ? tokenListString.length : match.index; + if (end > start && tokenListString.substring(start, end) === token) { return true; } + if (match === null) { return false; } + start = end + match[0].length; + } + } } -- cgit v1.2.3