From a4e236f13426f9df5db864edda63eecf26c386c7 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Sun, 8 Oct 2023 20:15:44 +0200 Subject: add NHK 日本語発音アクセント辞典 audio source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bulk-audio/bulk-audio.py | 1 + bulk-audio/get | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/bulk-audio/bulk-audio.py b/bulk-audio/bulk-audio.py index 1fa84bb..15ab33f 100755 --- a/bulk-audio/bulk-audio.py +++ b/bulk-audio/bulk-audio.py @@ -12,6 +12,7 @@ from time import sleep real_stdout = sys.stdout class TrashFileIO(object): def write(self, x): pass + def flush(self): pass trash_out = TrashFileIO() sys.stdout = trash_out diff --git a/bulk-audio/get b/bulk-audio/get index bc3839d..21dda29 100755 --- a/bulk-audio/get +++ b/bulk-audio/get @@ -1,10 +1,14 @@ #!/bin/sh SCRIPT_NAME="$0" -SOURCES="lp101,lp101_alt,jisho" +SOURCES="lp101,lp101_alt,jisho,nhk" KANJI="" KANA="" +urlencode() { + printf '%s' "$1" | od -An -tx1 | tr ' ' '%' | tr '[:lower:]' '[:upper:]' +} + lp101() { URL="https://assets.languagepod101.com/dictionary/japanese/audiomp3.php?kanji=$KANJI&kana=$KANA" # 52288 is the content-length of the "the audio for this clip is currently @@ -34,8 +38,17 @@ jisho() { [ $? -ne 0 ] && return URL="$(echo "$HTML" | pup "audio[id=\"audio_$KANJI:$KANA\"] source attr{src}" | head -n1)" [ -z "$URL" ] && return - URL="https:$URL" - curl -so - "$URL" + curl -so - "https:$URL" + exit 0 +} + +nhk() { + BASE_URL="https://sakura-paris.org" + HTML="$(curl -s "$BASE_URL/dict/NHK日本語発音アクセント辞典/prefix/$KANJI")" + [ $? -ne 0 ] && return + URL="$(echo "$HTML" | pup "#$(urlencode $KANJI) audio source[src\$=\".mp3\"] attr{src}" | head -n1)" + [ -z "$URL" ] && return + curl -so - "$BASE_URL$URL" exit 0 } -- cgit v1.2.3