/* * Copyright (C) 2017-2021 Yomichan Authors * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. */ /* global * JsonSchemaValidator * NativeSimpleDOMParser * SimpleDOMParser */ class AudioDownloader { constructor({japaneseUtil, requestBuilder}) { this._japaneseUtil = japaneseUtil; this._requestBuilder = requestBuilder; this._customAudioListSchema = null; this._schemaValidator = null; this._getInfoHandlers = new Map([ ['jpod101', this._getInfoJpod101.bind(this)], ['jpod101-alternate', this._getInfoJpod101Alternate.bind(this)], ['jisho', this._getInfoJisho.bind(this)], ['text-to-speech', this._getInfoTextToSpeech.bind(this)], ['text-to-speech-reading', this._getInfoTextToSpeechReading.bind(this)], ['custom', this._getInfoCustom.bind(this)] ]); } async getExpressionAudioInfoList(source, expression, reading, details) { const handler = this._getInfoHandlers.get(source); if (typeof handler === 'function') { try { return await handler(expression, reading, details); } catch (e) { // NOP } } return []; } async downloadExpressionAudio(sources, expression, reading, details) { for (const source of sources) { const infoList = await this.getExpressionAudioInfoList(source, expression, reading, details); for (const info of infoList) { switch (info.type) { case 'url': try { return await this._downloadAudioFromUrl(info.url, source); } catch (e) { // NOP } break; } } } throw new Error('Could not download audio'); } // Private _normalizeUrl(url, base) { return new URL(url, base).href; } async _getInfoJpod101(expression, reading) { let kana = reading; let kanji = expression; if (!kana && this._japaneseUtil.isStringEntirelyKana(kanji)) { kana = kanji; kanji = null; } const params = []; if (kanji) { params.push(`kanji=${encodeURIComponent(kanji)}`); } if (kana) { params.push(`kana=${encodeURIComponent(kana)}`); } const url = `https://assets.languagepod101.com/dictionary/japanese/audiomp3.php?${params.join('&')}`; return [{type: 'url', url}]; } async _getInfoJpod101Alternate(expression, reading) { const fetchUrl = 'https://www.japanesepod101.com/learningcenter/reference/dictionary_post'; const data = `post=dictionary_reference&match_type=exact&search_query=${encodeURIComponent(expression)}&vulgar=true`; const response = await this._requestBuilder.fetchAnonymous(fetchUrl, { method: 'POST', mode: 'cors', cache: 'default', credentials: 'omit', redirect: 'follow', referrerPolicy: 'no-referrer', headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, body: data }); const responseText = await response.text(); const dom = this._createSimpleDOMParser(responseText); for (const row of dom.getElementsByClassName('dc-result-row')) { try { const audio = dom.getElementByTagName('audio', row); if (audio === null) { continue; } const source = dom.getElementByTagName('source', audio); if (source === null) { continue; } let url = dom.getAttribute(source, 'src'); if (url === null) { continue; } const htmlReadings = dom.getElementsByClassName('dc-vocab_kana'); if (htmlReadings.length === 0) { continue; } const htmlReading = dom.getTextContent(htmlReadings[0]); if (htmlReading && (!reading || reading === htmlReading)) { url = this._normalizeUrl(url, response.url); return [{type: 'url', url}]; } } catch (e) { // NOP } } throw new Error('Failed to find audio URL'); } async _getInfoJisho(expression, reading) { const fetchUrl = `https://jisho.org/search/${expression}`; const response = await this._requestBuilder.fetchAnonymous(fetchUrl, { method: 'GET', mode: 'cors', cache: 'default', credentials: 'omit', redirect: 'follow', referrerPolicy: 'no-referrer' }); const responseText = await response.text(); const dom = this._createSimpleDOMParser(responseText); try { const audio = dom.getElementById(`audio_${expression}:${reading}`); if (audio !== null) { const source = dom.getElementByTagName('source', audio); if (source !== null) { let url = dom.getAttribute(source, 'src'); if (url !== null) { url = this._normalizeUrl(url, response.url); return [{type: 'url', url}]; } } } } catch (e) { // NOP } throw new Error('Failed to find audio URL'); } async _getInfoTextToSpeech(expression, reading, {textToSpeechVoice}) { if (!textToSpeechVoice) { throw new Error('No voice'); } return [{type: 'tts', text: expression, voice: textToSpeechVoice}]; } async _getInfoTextToSpeechReading(expression, reading, {textToSpeechVoice}) { if (!textToSpeechVoice) { throw new Error('No voice'); } return [{type: 'tts', text: reading || expression, voice: textToSpeechVoice}]; } async _getInfoCustom(expression, reading, {customSourceUrl, customSourceType}) { if (typeof customSourceUrl !== 'string') { throw new Error('No custom URL defined'); } const data = {expression, reading}; const url = customSourceUrl.replace(/\{([^}]*)\}/g, (m0, m1) => (Object.prototype.hasOwnProperty.call(data, m1) ? `${data[m1]}` : m0)); switch (customSourceType) { case 'json': return await this._getInfoCustomJson(url); default: return [{type: 'url', url}]; } } async _getInfoCustomJson(url) { const response = await this._requestBuilder.fetchAnonymous(url, { method: 'GET', mode: 'cors', cache: 'default', credentials: 'omit', redirect: 'follow', referrerPolicy: 'no-referrer' }); if (!response.ok) { throw new Error(`Invalid response: ${response.status}`); } const responseJson = await response.json(); const schema = await this._getCustomAudioListSchema(); if (this._schemaValidator === null) { this._schemaValidator = new JsonSchemaValidator(); } this._schemaValidator.validate(responseJson, schema); const results = []; for (const {url: url2, name} of responseJson.audioSources) { const info = {type: 'url', url: url2}; if (typeof name === 'string') { info.name = name; } results.push(info); } return results; } async _downloadAudioFromUrl(url, source) { const response = await this._requestBuilder.fetchAnonymous(url, { method: 'GET', mode: 'cors', cache: 'default', credentials: 'omit', redirect: 'follow', referrerPolicy: 'no-referrer' }); if (!response.ok) { throw new Error(`Invalid response: ${response.status}`); } const arrayBuffer = await response.arrayBuffer(); if (!await this._isAudioBinaryValid(arrayBuffer, source)) { throw new Error('Could not retrieve audio'); } const data = this._arrayBufferToBase64(arrayBuffer); const contentType = response.headers.get('Content-Type'); return {data, contentType}; } async _isAudioBinaryValid(arrayBuffer, source) { switch (source) { case 'jpod101': { const digest = await this._arrayBufferDigest(arrayBuffer); switch (digest) { case 'ae6398b5a27bc8c0a771df6c907ade794be15518174773c58c7c7ddd17098906': // Invalid audio return false; default: return true; } } default: return true; } } async _arrayBufferDigest(arrayBuffer) { const hash = new Uint8Array(await crypto.subtle.digest('SHA-256', new Uint8Array(arrayBuffer))); let digest = ''; for (const byte of hash) { digest += byte.toString(16).padStart(2, '0'); } return digest; } _arrayBufferToBase64(arrayBuffer) { return btoa(String.fromCharCode(...new Uint8Array(arrayBuffer))); } _createSimpleDOMParser(content) { if (typeof NativeSimpleDOMParser !== 'undefined' && NativeSimpleDOMParser.isSupported()) { return new NativeSimpleDOMParser(content); } else if (typeof SimpleDOMParser !== 'undefined' && SimpleDOMParser.isSupported()) { return new SimpleDOMParser(content); } else { throw new Error('DOM parsing not supported'); } } async _getCustomAudioListSchema() { let schema = this._customAudioListSchema; if (schema === null) { const url = chrome.runtime.getURL('/data/schemas/custom-audio-list-schema.json'); const response = await fetch(url, { method: 'GET', mode: 'no-cors', cache: 'default', credentials: 'omit', redirect: 'follow', referrerPolicy: 'no-referrer' }); schema = await response.json(); this._customAudioListSchema = schema; } return schema; } }