aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-10-08 20:15:44 +0200
committerlonkaars <loek@pipeframe.xyz>2023-10-08 20:15:44 +0200
commita4e236f13426f9df5db864edda63eecf26c386c7 (patch)
tree3b4927cea1c861303c95b7534e44d4dc913eeba8
parent711b0913548ebb8b91779633024f51e3133fd162 (diff)
add NHK 日本語発音アクセント辞典 audio source
-rwxr-xr-xbulk-audio/bulk-audio.py1
-rwxr-xr-xbulk-audio/get19
2 files changed, 17 insertions, 3 deletions
diff --git a/bulk-audio/bulk-audio.py b/bulk-audio/bulk-audio.py
index 1fa84bb..15ab33f 100755
--- a/bulk-audio/bulk-audio.py
+++ b/bulk-audio/bulk-audio.py
@@ -12,6 +12,7 @@ from time import sleep
real_stdout = sys.stdout
class TrashFileIO(object):
def write(self, x): pass
+ def flush(self): pass
trash_out = TrashFileIO()
sys.stdout = trash_out
diff --git a/bulk-audio/get b/bulk-audio/get
index bc3839d..21dda29 100755
--- a/bulk-audio/get
+++ b/bulk-audio/get
@@ -1,10 +1,14 @@
#!/bin/sh
SCRIPT_NAME="$0"
-SOURCES="lp101,lp101_alt,jisho"
+SOURCES="lp101,lp101_alt,jisho,nhk"
KANJI=""
KANA=""
+urlencode() {
+ printf '%s' "$1" | od -An -tx1 | tr ' ' '%' | tr '[:lower:]' '[:upper:]'
+}
+
lp101() {
URL="https://assets.languagepod101.com/dictionary/japanese/audiomp3.php?kanji=$KANJI&kana=$KANA"
# 52288 is the content-length of the "the audio for this clip is currently
@@ -34,8 +38,17 @@ jisho() {
[ $? -ne 0 ] && return
URL="$(echo "$HTML" | pup "audio[id=\"audio_$KANJI:$KANA\"] source attr{src}" | head -n1)"
[ -z "$URL" ] && return
- URL="https:$URL"
- curl -so - "$URL"
+ curl -so - "https:$URL"
+ exit 0
+}
+
+nhk() {
+ BASE_URL="https://sakura-paris.org"
+ HTML="$(curl -s "$BASE_URL/dict/NHK日本語発音アクセント辞典/prefix/$KANJI")"
+ [ $? -ne 0 ] && return
+ URL="$(echo "$HTML" | pup "#$(urlencode $KANJI) audio source[src\$=\".mp3\"] attr{src}" | head -n1)"
+ [ -z "$URL" ] && return
+ curl -so - "$BASE_URL$URL"
exit 0
}