From 17ebe6a754d53fad97ab607f17e2bba8d8565361 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Thu, 10 Sep 2020 11:30:01 -0400 Subject: Create abstraction class for parsing DOM (#798) --- ext/bg/js/audio-uri-builder.js | 31 ++++++++++++++++++++-------- ext/bg/js/simple-dom-parser.js | 46 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 8 deletions(-) create mode 100644 ext/bg/js/simple-dom-parser.js (limited to 'ext/bg/js') diff --git a/ext/bg/js/audio-uri-builder.js b/ext/bg/js/audio-uri-builder.js index a6b563d8..1c64e008 100644 --- a/ext/bg/js/audio-uri-builder.js +++ b/ext/bg/js/audio-uri-builder.js @@ -16,6 +16,7 @@ */ /* global + * SimpleDOMParser * jp */ @@ -99,12 +100,23 @@ class AudioUriBuilder { }); const responseText = await response.text(); - const dom = new DOMParser().parseFromString(responseText, 'text/html'); + const dom = new SimpleDOMParser(responseText); for (const row of dom.getElementsByClassName('dc-result-row')) { try { - const url = row.querySelector('audio>source[src]').getAttribute('src'); - const reading = row.getElementsByClassName('dc-vocab_kana').item(0).textContent; - if (url && reading && (!definition.reading || definition.reading === reading)) { + const audio = dom.getElementByTagName('audio', row); + if (audio === null) { continue; } + + const source = dom.getElementByTagName('source', audio); + if (source === null) { continue; } + + const url = dom.getAttribute(source, 'src'); + if (url === null) { continue; } + + const readings = dom.getElementsByClassName('dc-vocab_kana'); + if (readings.length === 0) { continue; } + + const reading = dom.getTextContent(readings[0]); + if (reading && (!definition.reading || definition.reading === reading)) { return this.normalizeUrl(url, 'https://www.japanesepod101.com', '/learningcenter/reference/'); } } catch (e) { @@ -127,13 +139,16 @@ class AudioUriBuilder { }); const responseText = await response.text(); - const dom = new DOMParser().parseFromString(responseText, 'text/html'); + const dom = new SimpleDOMParser(responseText); try { const audio = dom.getElementById(`audio_${definition.expression}:${definition.reading}`); if (audio !== null) { - const url = audio.getElementsByTagName('source').item(0).getAttribute('src'); - if (url) { - return this.normalizeUrl(url, 'https://jisho.org', '/search/'); + const source = dom.getElementByTagName('source', audio); + if (source !== null) { + const url = dom.getAttribute(source, 'src'); + if (url !== null) { + return this.normalizeUrl(url, 'https://jisho.org', '/search/'); + } } } } catch (e) { diff --git a/ext/bg/js/simple-dom-parser.js b/ext/bg/js/simple-dom-parser.js new file mode 100644 index 00000000..258b1f76 --- /dev/null +++ b/ext/bg/js/simple-dom-parser.js @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2020 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +class SimpleDOMParser { + constructor(content) { + this._document = new DOMParser().parseFromString(content, 'text/html'); + } + + getElementById(id, root=null) { + return (root || this._document).querySelector(`[id='${id}']`); + } + + getElementByTagName(tagName, root=null) { + return (root || this._document).querySelector(tagName); + } + + getElementsByTagName(tagName, root=null) { + return [...(root || this._document).querySelectorAll(tagName)]; + } + + getElementsByClassName(className, root=null) { + return [...(root || this._document).querySelectorAll(`.${className}`)]; + } + + getAttribute(element, attribute) { + return element.hasAttribute(attribute) ? element.getAttribute(attribute) : null; + } + + getTextContent(element) { + return element.textContent; + } +} -- cgit v1.2.3