/*
* Copyright (C) 2023-2024 Yomitan Authors
* Copyright (C) 2017-2022 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
import {RequestBuilder} from '../background/request-builder.js';
import {ExtensionError} from '../core/extension-error.js';
import {readResponseJson} from '../core/json.js';
import {arrayBufferToBase64} from '../data/array-buffer-util.js';
import {JsonSchema} from '../data/json-schema.js';
import {NativeSimpleDOMParser} from '../dom/native-simple-dom-parser.js';
import {SimpleDOMParser} from '../dom/simple-dom-parser.js';
import {isStringEntirelyKana} from '../language/ja/japanese.js';
/** @type {RequestInit} */
const DEFAULT_REQUEST_INIT_PARAMS = {
method: 'GET',
mode: 'cors',
cache: 'default',
credentials: 'omit',
redirect: 'follow',
referrerPolicy: 'no-referrer',
};
export class AudioDownloader {
/**
* @param {RequestBuilder} requestBuilder
*/
constructor(requestBuilder) {
/** @type {RequestBuilder} */
this._requestBuilder = requestBuilder;
/** @type {?JsonSchema} */
this._customAudioListSchema = null;
/** @type {Map} */
this._getInfoHandlers = new Map(/** @type {[name: import('settings').AudioSourceType, handler: import('audio-downloader').GetInfoHandler][]} */ ([
['jpod101', this._getInfoJpod101.bind(this)],
['jpod101-alternate', this._getInfoJpod101Alternate.bind(this)],
['jisho', this._getInfoJisho.bind(this)],
['lingua-libre', this._getInfoLinguaLibre.bind(this)],
['wiktionary', this._getInfoWiktionary.bind(this)],
['text-to-speech', this._getInfoTextToSpeech.bind(this)],
['text-to-speech-reading', this._getInfoTextToSpeechReading.bind(this)],
['custom', this._getInfoCustom.bind(this)],
['custom-json', this._getInfoCustomJson.bind(this)],
]));
/** @type {Intl.DisplayNames} */
this._regionNames = new Intl.DisplayNames(['en'], {type: 'region'});
}
/**
* @param {import('audio').AudioSourceInfo} source
* @param {string} term
* @param {string} reading
* @param {import('language').LanguageSummary} languageSummary
* @returns {Promise}
*/
async getTermAudioInfoList(source, term, reading, languageSummary) {
const handler = this._getInfoHandlers.get(source.type);
if (typeof handler === 'function') {
try {
return await handler(term, reading, source, languageSummary);
} catch (e) {
// NOP
}
}
return [];
}
/**
* @param {import('audio').AudioSourceInfo[]} sources
* @param {?number} preferredAudioIndex
* @param {string} term
* @param {string} reading
* @param {?number} idleTimeout
* @param {import('language').LanguageSummary} languageSummary
* @returns {Promise}
*/
async downloadTermAudio(sources, preferredAudioIndex, term, reading, idleTimeout, languageSummary) {
const errors = [];
const requiredAudioSources = this._getRequiredAudioSources(languageSummary.iso, sources);
for (const source of [...sources, ...requiredAudioSources]) {
let infoList = await this.getTermAudioInfoList(source, term, reading, languageSummary);
if (typeof preferredAudioIndex === 'number') {
infoList = (preferredAudioIndex >= 0 && preferredAudioIndex < infoList.length ? [infoList[preferredAudioIndex]] : []);
}
for (const info of infoList) {
switch (info.type) {
case 'url':
try {
return await this._downloadAudioFromUrl(info.url, source.type, idleTimeout);
} catch (e) {
errors.push(e);
}
break;
}
}
}
const error = new ExtensionError('Could not download audio');
error.data = {errors};
throw error;
}
// Private
/**
* @param {string} language
* @param {import('audio').AudioSourceInfo[]} sources
* @returns {import('audio').AudioSourceInfo[]}
*/
_getRequiredAudioSources(language, sources) {
/** @type {Set} */
const requiredSources = language === 'ja' ?
new Set([
'jpod101',
'jpod101-alternate',
'jisho',
]) :
new Set([
'lingua-libre',
'wiktionary',
]);
for (const {type} of sources) {
requiredSources.delete(type);
}
return [...requiredSources].map((type) => ({type, url: '', voice: ''}));
}
/**
* @param {string} url
* @param {string} base
* @returns {string}
*/
_normalizeUrl(url, base) {
return new URL(url, base).href;
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoJpod101(term, reading) {
if (reading === term && isStringEntirelyKana(term)) {
reading = term;
term = '';
}
const params = new URLSearchParams();
if (term.length > 0) {
params.set('kanji', term);
}
if (reading.length > 0) {
params.set('kana', reading);
}
const url = `https://assets.languagepod101.com/dictionary/japanese/audiomp3.php?${params.toString()}`;
return [{type: 'url', url}];
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoJpod101Alternate(term, reading) {
const fetchUrl = 'https://www.japanesepod101.com/learningcenter/reference/dictionary_post';
const data = new URLSearchParams({
post: 'dictionary_reference',
match_type: 'exact',
search_query: term,
vulgar: 'true',
});
const response = await this._requestBuilder.fetchAnonymous(fetchUrl, {
...DEFAULT_REQUEST_INIT_PARAMS,
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: data,
});
const responseText = await response.text();
const dom = this._createSimpleDOMParser(responseText);
for (const row of dom.getElementsByClassName('dc-result-row')) {
try {
const audio = dom.getElementByTagName('audio', row);
if (audio === null) { continue; }
const source = dom.getElementByTagName('source', audio);
if (source === null) { continue; }
let url = dom.getAttribute(source, 'src');
if (url === null) { continue; }
const htmlReadings = dom.getElementsByClassName('dc-vocab_kana');
if (htmlReadings.length === 0) { continue; }
const htmlReading = dom.getTextContent(htmlReadings[0]);
if (htmlReading && (reading === term || reading === htmlReading)) {
url = this._normalizeUrl(url, response.url);
return [{type: 'url', url}];
}
} catch (e) {
// NOP
}
}
throw new Error('Failed to find audio URL');
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoJisho(term, reading) {
const fetchUrl = `https://jisho.org/search/${term}`;
const response = await this._requestBuilder.fetchAnonymous(fetchUrl, DEFAULT_REQUEST_INIT_PARAMS);
const responseText = await response.text();
const dom = this._createSimpleDOMParser(responseText);
try {
const audio = dom.getElementById(`audio_${term}:${reading}`);
if (audio !== null) {
const source = dom.getElementByTagName('source', audio);
if (source !== null) {
let url = dom.getAttribute(source, 'src');
if (url !== null) {
url = this._normalizeUrl(url, response.url);
return [{type: 'url', url}];
}
}
}
} catch (e) {
// NOP
}
throw new Error('Failed to find audio URL');
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoLinguaLibre(term, _reading, _details, languageSummary) {
if (typeof languageSummary !== 'object' || languageSummary === null) {
throw new Error('Invalid arguments');
}
const {iso639_3} = languageSummary;
const searchCategory = `incategory:"Lingua_Libre_pronunciation-${iso639_3}"`;
const searchString = `-${term}.wav`;
const fetchUrl = `https://commons.wikimedia.org/w/api.php?action=query&format=json&list=search&srsearch=intitle:/${searchString}/i+${searchCategory}&srnamespace=6&origin=*`;
/**
* @param {string} filename
* @param {string} fileUser
* @returns {boolean}
*/
const validateFilename = (filename, fileUser) => {
const validFilenameTest = new RegExp(`^File:LL-Q\\d+\\s+\\(${iso639_3}\\)-${fileUser}-${term}\\.wav$`, 'i');
return validFilenameTest.test(filename);
};
return await this.getInfoWikimediaCommons(fetchUrl, validateFilename);
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoWiktionary(term, _reading, _details, languageSummary) {
if (typeof languageSummary !== 'object' || languageSummary === null) {
throw new Error('Invalid arguments');
}
const {iso} = languageSummary;
const searchString = `${iso}(-[a-zA-Z]{2})?-${term}[0123456789]*.ogg`;
const fetchUrl = `https://commons.wikimedia.org/w/api.php?action=query&format=json&list=search&srsearch=intitle:/${searchString}/i&srnamespace=6&origin=*`;
/**
* @param {string} filename
* @returns {boolean}
*/
const validateFilename = (filename) => {
const validFilenameTest = new RegExp(`^File:${iso}(-\\w\\w)?-${term}\\d*\\.ogg$`, 'i');
return validFilenameTest.test(filename);
};
/**
* @param {string} filename
* @param {string} fileUser
* @returns {string}
*/
const displayName = (filename, fileUser) => {
const match = filename.match(new RegExp(`^File:${iso}(-\\w\\w)-${term}`, 'i'));
if (match === null) {
return fileUser;
}
const region = match[1].substring(1).toUpperCase();
const regionName = this._regionNames.of(region);
return `(${regionName}) ${fileUser}`;
};
return await this.getInfoWikimediaCommons(fetchUrl, validateFilename, displayName);
}
/**
* @param {string} fetchUrl
* @param {(filename: string, fileUser: string) => boolean} validateFilename
* @param {(filename: string, fileUser: string) => string} [displayName]
* @returns {Promise}
*/
async getInfoWikimediaCommons(fetchUrl, validateFilename, displayName = (_filename, fileUser) => fileUser) {
const response = await this._requestBuilder.fetchAnonymous(fetchUrl, DEFAULT_REQUEST_INIT_PARAMS);
/** @type {import('audio-downloader').WikimediaCommonsLookupResponse} */
const lookupResponse = await readResponseJson(response);
const lookupResults = lookupResponse.query.search;
const fetchFileInfos = lookupResults.map(async ({title}) => {
const fileInfoURL = `https://commons.wikimedia.org/w/api.php?action=query&format=json&titles=${title}&prop=imageinfo&iiprop=user|url&origin=*`;
const response2 = await this._requestBuilder.fetchAnonymous(fileInfoURL, DEFAULT_REQUEST_INIT_PARAMS);
/** @type {import('audio-downloader').WikimediaCommonsFileResponse} */
const fileResponse = await readResponseJson(response2);
const fileResults = fileResponse.query.pages;
const results = [];
for (const page of Object.values(fileResults)) {
const fileUrl = page.imageinfo[0].url;
const fileUser = page.imageinfo[0].user;
if (validateFilename(title, fileUser)) {
results.push({type: 'url', url: fileUrl, name: displayName(title, fileUser)});
}
}
return /** @type {import('audio-downloader').Info1[]} */ (results);
});
return (await Promise.all(fetchFileInfos)).flat();
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoTextToSpeech(term, reading, details) {
if (typeof details !== 'object' || details === null) {
throw new Error('Invalid arguments');
}
const {voice} = details;
if (typeof voice !== 'string') {
throw new Error('Invalid voice');
}
return [{type: 'tts', text: term, voice: voice}];
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoTextToSpeechReading(term, reading, details) {
if (typeof details !== 'object' || details === null) {
throw new Error('Invalid arguments');
}
const {voice} = details;
if (typeof voice !== 'string') {
throw new Error('Invalid voice');
}
return [{type: 'tts', text: reading, voice: voice}];
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoCustom(term, reading, details) {
if (typeof details !== 'object' || details === null) {
throw new Error('Invalid arguments');
}
let {url} = details;
if (typeof url !== 'string') {
throw new Error('Invalid url');
}
url = this._getCustomUrl(term, reading, url);
return [{type: 'url', url}];
}
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoCustomJson(term, reading, details) {
if (typeof details !== 'object' || details === null) {
throw new Error('Invalid arguments');
}
let {url} = details;
if (typeof url !== 'string') {
throw new Error('Invalid url');
}
url = this._getCustomUrl(term, reading, url);
const response = await this._requestBuilder.fetchAnonymous(url, DEFAULT_REQUEST_INIT_PARAMS);
if (!response.ok) {
throw new Error(`Invalid response: ${response.status}`);
}
/** @type {import('audio-downloader').CustomAudioList} */
const responseJson = await readResponseJson(response);
if (this._customAudioListSchema === null) {
const schema = await this._getCustomAudioListSchema();
this._customAudioListSchema = new JsonSchema(/** @type {import('ext/json-schema').Schema} */ (schema));
}
this._customAudioListSchema.validate(responseJson);
/** @type {import('audio-downloader').Info[]} */
const results = [];
for (const {url: url2, name} of responseJson.audioSources) {
/** @type {import('audio-downloader').Info1} */
const info = {type: 'url', url: url2};
if (typeof name === 'string') { info.name = name; }
results.push(info);
}
return results;
}
/**
* @param {string} term
* @param {string} reading
* @param {string} url
* @returns {string}
* @throws {Error}
*/
_getCustomUrl(term, reading, url) {
if (typeof url !== 'string') {
throw new Error('No custom URL defined');
}
const data = {term, reading};
/**
* @param {string} m0
* @param {string} m1
* @returns {string}
*/
const replacer = (m0, m1) => (
Object.prototype.hasOwnProperty.call(data, m1) ?
`${data[/** @type {'term'|'reading'} */ (m1)]}` :
m0
);
return url.replace(/\{([^}]*)\}/g, replacer);
}
/**
* @param {string} url
* @param {import('settings').AudioSourceType} sourceType
* @param {?number} idleTimeout
* @returns {Promise}
*/
async _downloadAudioFromUrl(url, sourceType, idleTimeout) {
let signal;
/** @type {?import('request-builder.js').ProgressCallback} */
let onProgress = null;
/** @type {?import('core').Timeout} */
let idleTimer = null;
if (typeof idleTimeout === 'number') {
const abortController = new AbortController();
({signal} = abortController);
const onIdleTimeout = () => {
abortController.abort('Idle timeout');
};
onProgress = (done) => {
if (idleTimer !== null) {
clearTimeout(idleTimer);
}
idleTimer = done ? null : setTimeout(onIdleTimeout, idleTimeout);
};
idleTimer = setTimeout(onIdleTimeout, idleTimeout);
}
const response = await this._requestBuilder.fetchAnonymous(url, {
...DEFAULT_REQUEST_INIT_PARAMS,
signal,
});
if (!response.ok) {
throw new Error(`Invalid response: ${response.status}`);
}
const arrayBuffer = await RequestBuilder.readFetchResponseArrayBuffer(response, onProgress);
if (idleTimer !== null) {
clearTimeout(idleTimer);
}
if (!await this._isAudioBinaryValid(arrayBuffer, sourceType)) {
throw new Error('Could not retrieve audio');
}
const data = arrayBufferToBase64(arrayBuffer);
const contentType = response.headers.get('Content-Type');
return {data, contentType};
}
/**
* @param {ArrayBuffer} arrayBuffer
* @param {import('settings').AudioSourceType} sourceType
* @returns {Promise}
*/
async _isAudioBinaryValid(arrayBuffer, sourceType) {
switch (sourceType) {
case 'jpod101':
{
const digest = await this._arrayBufferDigest(arrayBuffer);
switch (digest) {
case 'ae6398b5a27bc8c0a771df6c907ade794be15518174773c58c7c7ddd17098906': // Invalid audio
return false;
default:
return true;
}
}
default:
return true;
}
}
/**
* @param {ArrayBuffer} arrayBuffer
* @returns {Promise}
*/
async _arrayBufferDigest(arrayBuffer) {
const hash = new Uint8Array(await crypto.subtle.digest('SHA-256', new Uint8Array(arrayBuffer)));
let digest = '';
for (const byte of hash) {
digest += byte.toString(16).padStart(2, '0');
}
return digest;
}
/**
* @param {string} content
* @returns {import('simple-dom-parser').ISimpleDomParser}
* @throws {Error}
*/
_createSimpleDOMParser(content) {
if (typeof NativeSimpleDOMParser !== 'undefined' && NativeSimpleDOMParser.isSupported()) {
return new NativeSimpleDOMParser(content);
} else if (typeof SimpleDOMParser !== 'undefined' && SimpleDOMParser.isSupported()) {
return new SimpleDOMParser(content);
} else {
throw new Error('DOM parsing not supported');
}
}
/**
* @returns {Promise}
*/
async _getCustomAudioListSchema() {
const url = chrome.runtime.getURL('/data/schemas/custom-audio-list-schema.json');
const response = await fetch(url, {
...DEFAULT_REQUEST_INIT_PARAMS,
mode: 'no-cors',
});
return await readResponseJson(response);
}
}