aboutsummaryrefslogtreecommitdiff
path: root/ext/js/media/audio-downloader.js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/js/media/audio-downloader.js')
-rw-r--r--ext/js/media/audio-downloader.js317
1 files changed, 317 insertions, 0 deletions
diff --git a/ext/js/media/audio-downloader.js b/ext/js/media/audio-downloader.js
new file mode 100644
index 00000000..4e77419b
--- /dev/null
+++ b/ext/js/media/audio-downloader.js
@@ -0,0 +1,317 @@
+/*
+ * Copyright (C) 2017-2021 Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/* global
+ * JsonSchemaValidator
+ * NativeSimpleDOMParser
+ * SimpleDOMParser
+ */
+
+class AudioDownloader {
+ constructor({japaneseUtil, requestBuilder}) {
+ this._japaneseUtil = japaneseUtil;
+ this._requestBuilder = requestBuilder;
+ this._customAudioListSchema = null;
+ this._schemaValidator = null;
+ this._getInfoHandlers = new Map([
+ ['jpod101', this._getInfoJpod101.bind(this)],
+ ['jpod101-alternate', this._getInfoJpod101Alternate.bind(this)],
+ ['jisho', this._getInfoJisho.bind(this)],
+ ['text-to-speech', this._getInfoTextToSpeech.bind(this)],
+ ['text-to-speech-reading', this._getInfoTextToSpeechReading.bind(this)],
+ ['custom', this._getInfoCustom.bind(this)]
+ ]);
+ }
+
+ async getExpressionAudioInfoList(source, expression, reading, details) {
+ const handler = this._getInfoHandlers.get(source);
+ if (typeof handler === 'function') {
+ try {
+ return await handler(expression, reading, details);
+ } catch (e) {
+ // NOP
+ }
+ }
+ return [];
+ }
+
+ async downloadExpressionAudio(sources, expression, reading, details) {
+ for (const source of sources) {
+ const infoList = await this.getExpressionAudioInfoList(source, expression, reading, details);
+ for (const info of infoList) {
+ switch (info.type) {
+ case 'url':
+ try {
+ return await this._downloadAudioFromUrl(info.url, source);
+ } catch (e) {
+ // NOP
+ }
+ break;
+ }
+ }
+ }
+
+ throw new Error('Could not download audio');
+ }
+
+ // Private
+
+ _normalizeUrl(url, base) {
+ return new URL(url, base).href;
+ }
+
+ async _getInfoJpod101(expression, reading) {
+ let kana = reading;
+ let kanji = expression;
+
+ if (!kana && this._japaneseUtil.isStringEntirelyKana(kanji)) {
+ kana = kanji;
+ kanji = null;
+ }
+
+ const params = [];
+ if (kanji) {
+ params.push(`kanji=${encodeURIComponent(kanji)}`);
+ }
+ if (kana) {
+ params.push(`kana=${encodeURIComponent(kana)}`);
+ }
+
+ const url = `https://assets.languagepod101.com/dictionary/japanese/audiomp3.php?${params.join('&')}`;
+ return [{type: 'url', url}];
+ }
+
+ async _getInfoJpod101Alternate(expression, reading) {
+ const fetchUrl = 'https://www.japanesepod101.com/learningcenter/reference/dictionary_post';
+ const data = `post=dictionary_reference&match_type=exact&search_query=${encodeURIComponent(expression)}&vulgar=true`;
+ const response = await this._requestBuilder.fetchAnonymous(fetchUrl, {
+ method: 'POST',
+ mode: 'cors',
+ cache: 'default',
+ credentials: 'omit',
+ redirect: 'follow',
+ referrerPolicy: 'no-referrer',
+ headers: {
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ },
+ body: data
+ });
+ const responseText = await response.text();
+
+ const dom = this._createSimpleDOMParser(responseText);
+ for (const row of dom.getElementsByClassName('dc-result-row')) {
+ try {
+ const audio = dom.getElementByTagName('audio', row);
+ if (audio === null) { continue; }
+
+ const source = dom.getElementByTagName('source', audio);
+ if (source === null) { continue; }
+
+ let url = dom.getAttribute(source, 'src');
+ if (url === null) { continue; }
+
+ const htmlReadings = dom.getElementsByClassName('dc-vocab_kana');
+ if (htmlReadings.length === 0) { continue; }
+
+ const htmlReading = dom.getTextContent(htmlReadings[0]);
+ if (htmlReading && (!reading || reading === htmlReading)) {
+ url = this._normalizeUrl(url, response.url);
+ return [{type: 'url', url}];
+ }
+ } catch (e) {
+ // NOP
+ }
+ }
+
+ throw new Error('Failed to find audio URL');
+ }
+
+ async _getInfoJisho(expression, reading) {
+ const fetchUrl = `https://jisho.org/search/${expression}`;
+ const response = await this._requestBuilder.fetchAnonymous(fetchUrl, {
+ method: 'GET',
+ mode: 'cors',
+ cache: 'default',
+ credentials: 'omit',
+ redirect: 'follow',
+ referrerPolicy: 'no-referrer'
+ });
+ const responseText = await response.text();
+
+ const dom = this._createSimpleDOMParser(responseText);
+ try {
+ const audio = dom.getElementById(`audio_${expression}:${reading}`);
+ if (audio !== null) {
+ const source = dom.getElementByTagName('source', audio);
+ if (source !== null) {
+ let url = dom.getAttribute(source, 'src');
+ if (url !== null) {
+ url = this._normalizeUrl(url, response.url);
+ return [{type: 'url', url}];
+ }
+ }
+ }
+ } catch (e) {
+ // NOP
+ }
+
+ throw new Error('Failed to find audio URL');
+ }
+
+ async _getInfoTextToSpeech(expression, reading, {textToSpeechVoice}) {
+ if (!textToSpeechVoice) {
+ throw new Error('No voice');
+ }
+ return [{type: 'tts', text: expression, voice: textToSpeechVoice}];
+ }
+
+ async _getInfoTextToSpeechReading(expression, reading, {textToSpeechVoice}) {
+ if (!textToSpeechVoice) {
+ throw new Error('No voice');
+ }
+ return [{type: 'tts', text: reading || expression, voice: textToSpeechVoice}];
+ }
+
+ async _getInfoCustom(expression, reading, {customSourceUrl, customSourceType}) {
+ if (typeof customSourceUrl !== 'string') {
+ throw new Error('No custom URL defined');
+ }
+ const data = {expression, reading};
+ const url = customSourceUrl.replace(/\{([^}]*)\}/g, (m0, m1) => (Object.prototype.hasOwnProperty.call(data, m1) ? `${data[m1]}` : m0));
+
+ switch (customSourceType) {
+ case 'json':
+ return await this._getInfoCustomJson(url);
+ default:
+ return [{type: 'url', url}];
+ }
+ }
+
+ async _getInfoCustomJson(url) {
+ const response = await this._requestBuilder.fetchAnonymous(url, {
+ method: 'GET',
+ mode: 'cors',
+ cache: 'default',
+ credentials: 'omit',
+ redirect: 'follow',
+ referrerPolicy: 'no-referrer'
+ });
+
+ if (!response.ok) {
+ throw new Error(`Invalid response: ${response.status}`);
+ }
+
+ const responseJson = await response.json();
+
+ const schema = await this._getCustomAudioListSchema();
+ if (this._schemaValidator === null) {
+ this._schemaValidator = new JsonSchemaValidator();
+ }
+ this._schemaValidator.validate(responseJson, schema);
+
+ const results = [];
+ for (const {url: url2, name} of responseJson.audioSources) {
+ const info = {type: 'url', url: url2};
+ if (typeof name === 'string') { info.name = name; }
+ results.push(info);
+ }
+ return results;
+ }
+
+ async _downloadAudioFromUrl(url, source) {
+ const response = await this._requestBuilder.fetchAnonymous(url, {
+ method: 'GET',
+ mode: 'cors',
+ cache: 'default',
+ credentials: 'omit',
+ redirect: 'follow',
+ referrerPolicy: 'no-referrer'
+ });
+
+ if (!response.ok) {
+ throw new Error(`Invalid response: ${response.status}`);
+ }
+
+ const arrayBuffer = await response.arrayBuffer();
+
+ if (!await this._isAudioBinaryValid(arrayBuffer, source)) {
+ throw new Error('Could not retrieve audio');
+ }
+
+ const data = this._arrayBufferToBase64(arrayBuffer);
+ const contentType = response.headers.get('Content-Type');
+ return {data, contentType};
+ }
+
+ async _isAudioBinaryValid(arrayBuffer, source) {
+ switch (source) {
+ case 'jpod101':
+ {
+ const digest = await this._arrayBufferDigest(arrayBuffer);
+ switch (digest) {
+ case 'ae6398b5a27bc8c0a771df6c907ade794be15518174773c58c7c7ddd17098906': // Invalid audio
+ return false;
+ default:
+ return true;
+ }
+ }
+ default:
+ return true;
+ }
+ }
+
+ async _arrayBufferDigest(arrayBuffer) {
+ const hash = new Uint8Array(await crypto.subtle.digest('SHA-256', new Uint8Array(arrayBuffer)));
+ let digest = '';
+ for (const byte of hash) {
+ digest += byte.toString(16).padStart(2, '0');
+ }
+ return digest;
+ }
+
+ _arrayBufferToBase64(arrayBuffer) {
+ return btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));
+ }
+
+ _createSimpleDOMParser(content) {
+ if (typeof NativeSimpleDOMParser !== 'undefined' && NativeSimpleDOMParser.isSupported()) {
+ return new NativeSimpleDOMParser(content);
+ } else if (typeof SimpleDOMParser !== 'undefined' && SimpleDOMParser.isSupported()) {
+ return new SimpleDOMParser(content);
+ } else {
+ throw new Error('DOM parsing not supported');
+ }
+ }
+
+ async _getCustomAudioListSchema() {
+ let schema = this._customAudioListSchema;
+ if (schema === null) {
+ const url = chrome.runtime.getURL('/data/schemas/custom-audio-list-schema.json');
+ const response = await fetch(url, {
+ method: 'GET',
+ mode: 'no-cors',
+ cache: 'default',
+ credentials: 'omit',
+ redirect: 'follow',
+ referrerPolicy: 'no-referrer'
+ });
+ schema = await response.json();
+ this._customAudioListSchema = schema;
+ }
+ return schema;
+ }
+}