summaryrefslogtreecommitdiff
path: root/ext/js/dom/simple-dom-parser.js
blob: 3e84b7830e6cdd11746689fa75c6e1d107da66d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
 * Copyright (C) 2023  Yomitan Authors
 * Copyright (C) 2020-2022  Yomichan Authors
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

import * as parse5 from '../../lib/parse5.js';

export class SimpleDOMParser {
    constructor(content) {
        this._document = parse5.parse(content);
        this._patternHtmlWhitespace = /[\t\r\n\f ]+/g;
    }

    getElementById(id, root=null) {
        for (const node of this._allNodes(root)) {
            if (typeof node.tagName === 'string' && this.getAttribute(node, 'id') === id) {
                return node;
            }
        }
        return null;
    }

    getElementByTagName(tagName, root=null) {
        for (const node of this._allNodes(root)) {
            if (node.tagName === tagName) {
                return node;
            }
        }
        return null;
    }

    getElementsByTagName(tagName, root=null) {
        const results = [];
        for (const node of this._allNodes(root)) {
            if (node.tagName === tagName) {
                results.push(node);
            }
        }
        return results;
    }

    getElementsByClassName(className, root=null) {
        const results = [];
        for (const node of this._allNodes(root)) {
            if (typeof node.tagName === 'string') {
                const nodeClassName = this.getAttribute(node, 'class');
                if (nodeClassName !== null && this._hasToken(nodeClassName, className)) {
                    results.push(node);
                }
            }
        }
        return results;
    }

    getAttribute(element, attribute) {
        for (const attr of element.attrs) {
            if (
                attr.name === attribute &&
                typeof attr.namespace === 'undefined'
            ) {
                return attr.value;
            }
        }
        return null;
    }

    getTextContent(element) {
        let source = '';
        for (const node of this._allNodes(element)) {
            if (node.nodeName === '#text') {
                source += node.value;
            }
        }
        return source;
    }

    static isSupported() {
        return typeof parse5 !== 'undefined';
    }

    // Private

    *_allNodes(root) {
        if (root === null) {
            root = this._document;
        }

        // Depth-first pre-order traversal
        const nodeQueue = [root];
        while (nodeQueue.length > 0) {
            const node = nodeQueue.pop();

            yield node;

            const childNodes = node.childNodes;
            if (typeof childNodes !== 'undefined') {
                for (let i = childNodes.length - 1; i >= 0; --i) {
                    nodeQueue.push(childNodes[i]);
                }
            }
        }
    }

    _hasToken(tokenListString, token) {
        let start = 0;
        const pattern = this._patternHtmlWhitespace;
        pattern.lastIndex = 0;
        while (true) {
            const match = pattern.exec(tokenListString);
            const end = match === null ? tokenListString.length : match.index;
            if (end > start && tokenListString.substring(start, end) === token) { return true; }
            if (match === null) { return false; }
            start = end + match[0].length;
        }
    }
}