diff options
Diffstat (limited to 'ext/js/media')
| -rw-r--r-- | ext/js/media/audio-downloader.js | 317 | ||||
| -rw-r--r-- | ext/js/media/media-utility.js | 132 | 
2 files changed, 449 insertions, 0 deletions
| diff --git a/ext/js/media/audio-downloader.js b/ext/js/media/audio-downloader.js new file mode 100644 index 00000000..4e77419b --- /dev/null +++ b/ext/js/media/audio-downloader.js @@ -0,0 +1,317 @@ +/* + * Copyright (C) 2017-2021  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +/* global + * JsonSchemaValidator + * NativeSimpleDOMParser + * SimpleDOMParser + */ + +class AudioDownloader { +    constructor({japaneseUtil, requestBuilder}) { +        this._japaneseUtil = japaneseUtil; +        this._requestBuilder = requestBuilder; +        this._customAudioListSchema = null; +        this._schemaValidator = null; +        this._getInfoHandlers = new Map([ +            ['jpod101', this._getInfoJpod101.bind(this)], +            ['jpod101-alternate', this._getInfoJpod101Alternate.bind(this)], +            ['jisho', this._getInfoJisho.bind(this)], +            ['text-to-speech', this._getInfoTextToSpeech.bind(this)], +            ['text-to-speech-reading', this._getInfoTextToSpeechReading.bind(this)], +            ['custom', this._getInfoCustom.bind(this)] +        ]); +    } + +    async getExpressionAudioInfoList(source, expression, reading, details) { +        const handler = this._getInfoHandlers.get(source); +        if (typeof handler === 'function') { +            try { +                return await handler(expression, reading, details); +            } catch (e) { +                // NOP +            } +        } +        return []; +    } + +    async downloadExpressionAudio(sources, expression, reading, details) { +        for (const source of sources) { +            const infoList = await this.getExpressionAudioInfoList(source, expression, reading, details); +            for (const info of infoList) { +                switch (info.type) { +                    case 'url': +                        try { +                            return await this._downloadAudioFromUrl(info.url, source); +                        } catch (e) { +                            // NOP +                        } +                        break; +                } +            } +        } + +        throw new Error('Could not download audio'); +    } + +    // Private + +    _normalizeUrl(url, base) { +        return new URL(url, base).href; +    } + +    async _getInfoJpod101(expression, reading) { +        let kana = reading; +        let kanji = expression; + +        if (!kana && this._japaneseUtil.isStringEntirelyKana(kanji)) { +            kana = kanji; +            kanji = null; +        } + +        const params = []; +        if (kanji) { +            params.push(`kanji=${encodeURIComponent(kanji)}`); +        } +        if (kana) { +            params.push(`kana=${encodeURIComponent(kana)}`); +        } + +        const url = `https://assets.languagepod101.com/dictionary/japanese/audiomp3.php?${params.join('&')}`; +        return [{type: 'url', url}]; +    } + +    async _getInfoJpod101Alternate(expression, reading) { +        const fetchUrl = 'https://www.japanesepod101.com/learningcenter/reference/dictionary_post'; +        const data = `post=dictionary_reference&match_type=exact&search_query=${encodeURIComponent(expression)}&vulgar=true`; +        const response = await this._requestBuilder.fetchAnonymous(fetchUrl, { +            method: 'POST', +            mode: 'cors', +            cache: 'default', +            credentials: 'omit', +            redirect: 'follow', +            referrerPolicy: 'no-referrer', +            headers: { +                'Content-Type': 'application/x-www-form-urlencoded' +            }, +            body: data +        }); +        const responseText = await response.text(); + +        const dom = this._createSimpleDOMParser(responseText); +        for (const row of dom.getElementsByClassName('dc-result-row')) { +            try { +                const audio = dom.getElementByTagName('audio', row); +                if (audio === null) { continue; } + +                const source = dom.getElementByTagName('source', audio); +                if (source === null) { continue; } + +                let url = dom.getAttribute(source, 'src'); +                if (url === null) { continue; } + +                const htmlReadings = dom.getElementsByClassName('dc-vocab_kana'); +                if (htmlReadings.length === 0) { continue; } + +                const htmlReading = dom.getTextContent(htmlReadings[0]); +                if (htmlReading && (!reading || reading === htmlReading)) { +                    url = this._normalizeUrl(url, response.url); +                    return [{type: 'url', url}]; +                } +            } catch (e) { +                // NOP +            } +        } + +        throw new Error('Failed to find audio URL'); +    } + +    async _getInfoJisho(expression, reading) { +        const fetchUrl = `https://jisho.org/search/${expression}`; +        const response = await this._requestBuilder.fetchAnonymous(fetchUrl, { +            method: 'GET', +            mode: 'cors', +            cache: 'default', +            credentials: 'omit', +            redirect: 'follow', +            referrerPolicy: 'no-referrer' +        }); +        const responseText = await response.text(); + +        const dom = this._createSimpleDOMParser(responseText); +        try { +            const audio = dom.getElementById(`audio_${expression}:${reading}`); +            if (audio !== null) { +                const source = dom.getElementByTagName('source', audio); +                if (source !== null) { +                    let url = dom.getAttribute(source, 'src'); +                    if (url !== null) { +                        url = this._normalizeUrl(url, response.url); +                        return [{type: 'url', url}]; +                    } +                } +            } +        } catch (e) { +            // NOP +        } + +        throw new Error('Failed to find audio URL'); +    } + +    async _getInfoTextToSpeech(expression, reading, {textToSpeechVoice}) { +        if (!textToSpeechVoice) { +            throw new Error('No voice'); +        } +        return [{type: 'tts', text: expression, voice: textToSpeechVoice}]; +    } + +    async _getInfoTextToSpeechReading(expression, reading, {textToSpeechVoice}) { +        if (!textToSpeechVoice) { +            throw new Error('No voice'); +        } +        return [{type: 'tts', text: reading || expression, voice: textToSpeechVoice}]; +    } + +    async _getInfoCustom(expression, reading, {customSourceUrl, customSourceType}) { +        if (typeof customSourceUrl !== 'string') { +            throw new Error('No custom URL defined'); +        } +        const data = {expression, reading}; +        const url = customSourceUrl.replace(/\{([^}]*)\}/g, (m0, m1) => (Object.prototype.hasOwnProperty.call(data, m1) ? `${data[m1]}` : m0)); + +        switch (customSourceType) { +            case 'json': +                return await this._getInfoCustomJson(url); +            default: +                return [{type: 'url', url}]; +        } +    } + +    async _getInfoCustomJson(url) { +        const response = await this._requestBuilder.fetchAnonymous(url, { +            method: 'GET', +            mode: 'cors', +            cache: 'default', +            credentials: 'omit', +            redirect: 'follow', +            referrerPolicy: 'no-referrer' +        }); + +        if (!response.ok) { +            throw new Error(`Invalid response: ${response.status}`); +        } + +        const responseJson = await response.json(); + +        const schema = await this._getCustomAudioListSchema(); +        if (this._schemaValidator === null) { +            this._schemaValidator = new JsonSchemaValidator(); +        } +        this._schemaValidator.validate(responseJson, schema); + +        const results = []; +        for (const {url: url2, name} of responseJson.audioSources) { +            const info = {type: 'url', url: url2}; +            if (typeof name === 'string') { info.name = name; } +            results.push(info); +        } +        return results; +    } + +    async _downloadAudioFromUrl(url, source) { +        const response = await this._requestBuilder.fetchAnonymous(url, { +            method: 'GET', +            mode: 'cors', +            cache: 'default', +            credentials: 'omit', +            redirect: 'follow', +            referrerPolicy: 'no-referrer' +        }); + +        if (!response.ok) { +            throw new Error(`Invalid response: ${response.status}`); +        } + +        const arrayBuffer = await response.arrayBuffer(); + +        if (!await this._isAudioBinaryValid(arrayBuffer, source)) { +            throw new Error('Could not retrieve audio'); +        } + +        const data = this._arrayBufferToBase64(arrayBuffer); +        const contentType = response.headers.get('Content-Type'); +        return {data, contentType}; +    } + +    async _isAudioBinaryValid(arrayBuffer, source) { +        switch (source) { +            case 'jpod101': +            { +                const digest = await this._arrayBufferDigest(arrayBuffer); +                switch (digest) { +                    case 'ae6398b5a27bc8c0a771df6c907ade794be15518174773c58c7c7ddd17098906': // Invalid audio +                        return false; +                    default: +                        return true; +                } +            } +            default: +                return true; +        } +    } + +    async _arrayBufferDigest(arrayBuffer) { +        const hash = new Uint8Array(await crypto.subtle.digest('SHA-256', new Uint8Array(arrayBuffer))); +        let digest = ''; +        for (const byte of hash) { +            digest += byte.toString(16).padStart(2, '0'); +        } +        return digest; +    } + +    _arrayBufferToBase64(arrayBuffer) { +        return btoa(String.fromCharCode(...new Uint8Array(arrayBuffer))); +    } + +    _createSimpleDOMParser(content) { +        if (typeof NativeSimpleDOMParser !== 'undefined' && NativeSimpleDOMParser.isSupported()) { +            return new NativeSimpleDOMParser(content); +        } else if (typeof SimpleDOMParser !== 'undefined' && SimpleDOMParser.isSupported()) { +            return new SimpleDOMParser(content); +        } else { +            throw new Error('DOM parsing not supported'); +        } +    } + +    async _getCustomAudioListSchema() { +        let schema = this._customAudioListSchema; +        if (schema === null) { +            const url = chrome.runtime.getURL('/data/schemas/custom-audio-list-schema.json'); +            const response = await fetch(url, { +                method: 'GET', +                mode: 'no-cors', +                cache: 'default', +                credentials: 'omit', +                redirect: 'follow', +                referrerPolicy: 'no-referrer' +            }); +            schema = await response.json(); +            this._customAudioListSchema = schema; +        } +        return schema; +    } +} diff --git a/ext/js/media/media-utility.js b/ext/js/media/media-utility.js new file mode 100644 index 00000000..b4fbe04d --- /dev/null +++ b/ext/js/media/media-utility.js @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2020-2021  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * MediaUtility is a class containing helper methods related to media processing. + */ +class MediaUtility { +    /** +     * Gets the file extension of a file path. URL search queries and hash +     * fragments are not handled. +     * @param path The path to the file. +     * @returns The file extension, including the '.', or an empty string +     *   if there is no file extension. +     */ +    getFileNameExtension(path) { +        const match = /\.[^./\\]*$/.exec(path); +        return match !== null ? match[0] : ''; +    } + +    /** +     * Gets an image file's media type using a file path. +     * @param path The path to the file. +     * @returns The media type string if it can be determined from the file path, +     *   otherwise null. +     */ +    getImageMediaTypeFromFileName(path) { +        switch (this.getFileNameExtension(path).toLowerCase()) { +            case '.apng': +                return 'image/apng'; +            case '.bmp': +                return 'image/bmp'; +            case '.gif': +                return 'image/gif'; +            case '.ico': +            case '.cur': +                return 'image/x-icon'; +            case '.jpg': +            case '.jpeg': +            case '.jfif': +            case '.pjpeg': +            case '.pjp': +                return 'image/jpeg'; +            case '.png': +                return 'image/png'; +            case '.svg': +                return 'image/svg+xml'; +            case '.tif': +            case '.tiff': +                return 'image/tiff'; +            case '.webp': +                return 'image/webp'; +            default: +                return null; +        } +    } + +    /** +     * Gets the file extension for a corresponding media type. +     * @param mediaType The media type to use. +     * @returns A file extension including the dot for the media type, +     *   otherwise null. +     */ +    getFileExtensionFromImageMediaType(mediaType) { +        switch (mediaType) { +            case 'image/apng': +                return '.apng'; +            case 'image/bmp': +                return '.bmp'; +            case 'image/gif': +                return '.gif'; +            case 'image/x-icon': +                return '.ico'; +            case 'image/jpeg': +                return '.jpeg'; +            case 'image/png': +                return '.png'; +            case 'image/svg+xml': +                return '.svg'; +            case 'image/tiff': +                return '.tiff'; +            case 'image/webp': +                return '.webp'; +            default: +                return null; +        } +    } + +    /** +     * Gets the file extension for a corresponding media type. +     * @param mediaType The media type to use. +     * @returns A file extension including the dot for the media type, +     *   otherwise null. +     */ +    getFileExtensionFromAudioMediaType(mediaType) { +        switch (mediaType) { +            case 'audio/mpeg': +            case 'audio/mp3': +                return '.mp3'; +            case 'audio/mp4': +                return '.mp4'; +            case 'audio/ogg': +            case 'audio/vorbis': +                return '.ogg'; +            case 'audio/vnd.wav': +            case 'audio/wave': +            case 'audio/wav': +            case 'audio/x-wav': +            case 'audio/x-pn-wav': +                return '.wav'; +            case 'audio/flac': +                return '.flac'; +            case 'audio/webm': +                return '.webm'; +            default: +                return null; +        } +    } +} |