aboutsummaryrefslogtreecommitdiff
path: root/bulk-audio
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-10-08 11:20:49 +0200
committerlonkaars <loek@pipeframe.xyz>2023-10-08 11:20:49 +0200
commit855483401c04f4e741733736de7958b8d88db849 (patch)
tree0e3cdeabc621e153a1290ff5f80851a66ab7e7eb /bulk-audio
parentb3e77c6773b3033cc6e35098026b850a93c6841b (diff)
auto-detect Anki profile and media dir using Anki Python API, and add CLI parameters
Diffstat (limited to 'bulk-audio')
-rwxr-xr-xbulk-audio/bulk-audio.py59
1 files changed, 35 insertions, 24 deletions
diff --git a/bulk-audio/bulk-audio.py b/bulk-audio/bulk-audio.py
index f3c8ef6..9d34cdc 100755
--- a/bulk-audio/bulk-audio.py
+++ b/bulk-audio/bulk-audio.py
@@ -5,19 +5,10 @@ import subprocess
import hashlib
import os
import re
+import argparse
from math import floor, log10
-
import aqt
-# change these variables
-AUDIO_FILENAME_PREFIX = "refold-tools-"
-# the anki user to which notes of type NOTE_TYPE belong
-ANKI_USER = "ルーク"
-# the note type name of notes that should get audio fields filled in (see Tools > Manage note types)
-NOTE_TYPE = "Sentence mining"
-# field name to be filled with "[audio:...]" or "noaudio"
-AUDIO_FIELD_NAME = "Audio"
-
# this function only works for refold-tools sentence mining card template
pattern = re.compile("^([^[、 【]+)[^【]*(【(.+)】)?")
def note2kanji_kana(note):
@@ -31,13 +22,32 @@ def note2kanji_kana(note):
kana = kana.replace("・", "")
return (kanji, kana)
-def main():
- ANKI_PATH = os.path.join(os.environ["XDG_DATA_HOME"], "Anki2", ANKI_USER)
- ANKI_COLLECTION = os.path.join(ANKI_PATH, "collection.anki2")
- ANKI_MEDIA = os.path.join(ANKI_PATH, "collection.media")
- col = aqt.Collection(ANKI_COLLECTION)
+def parse_args(argv):
+ parser = argparse.ArgumentParser(
+ description="Bulk Japanese audio downloader (refold-tools)",
+ epilog="""This program calls Anki internally, so any CLI options supported
+ by Anki are forwarded. Run `anki -h` to see available options""",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.usage = f"{argv[0]} [options] [anki options]"
+ parser.add_argument("-n", "--note-type", help="note type to add audio to", default="Sentence mining")
+ parser.add_argument("-a", "--audio-field", help="field name to modify with audio", default="Audio")
+ parser.add_argument("-f", "--filename-prefix", help="download filename prefix", default="refold-tools-")
+ return parser.parse_known_args(argv[1:])
- model = col.models.by_name(NOTE_TYPE)
+def main():
+ options, args = parse_args(sys.argv)
+ args.insert(0, sys.argv[0]) # restore first index of argv (QT crashes if argv[] is empty)
+
+ # forward remaining CLI parameters to Anki
+ app = aqt._run(args, False)
+ # load last open profile if no profile was specified on command line (option parsed by Anki)
+ if not aqt.mw.pm.name:
+ aqt.mw.pm.load(aqt.mw.pm.last_loaded_profile_name())
+ col = aqt.Collection(aqt.mw.pm.collectionPath())
+ media_dir = col.media.dir()
+
+ model = col.models.by_name(options.note_type)
note_ids = col.models.nids(model)
edited_notes = 0
@@ -45,19 +55,19 @@ def main():
for note_index, note_id in enumerate(note_ids):
note = col.get_note(note_id)
note_index_format = ("{:0" + str(floor(log10(len(note_ids))) + 1) + "d}/{:d}").format(note_index + 1, len(note_ids))
- print(f"[{note_index_format}] ", end="")
+ print(f"[nid:{note_id}] ({note_index_format}) ", end="")
# bulk clear audio field (dev only)
- # note[AUDIO_FIELD_NAME] = ""
+ # note[options.audio_field] = ""
# note.flush()
- # print(f"cleared \"{AUDIO_FIELD_NAME}\" field!")
+ # print(f"cleared \"{options.audio_field}\" field!")
# continue
# autosave deck every 20 cards
if note_index % 20 == 0: col.save()
# skip any notes that already have audio
- if len(note[AUDIO_FIELD_NAME]) > 0:
+ if len(note[options.audio_field]) > 0:
print("skipped -> audio field not empty")
continue
@@ -71,22 +81,22 @@ def main():
# attempt to download audio
exit_code, data = get(kanji, kana)
if exit_code != 0:
- note[AUDIO_FIELD_NAME] = "noaudio"
+ note[options.audio_field] = "noaudio"
note.flush()
print("skipped -> no recording available, marked as 'noaudio'")
continue
# save audio if download was succesful
digest = hashlib.sha1(data).hexdigest()
- filename = f"{AUDIO_FILENAME_PREFIX}{digest}.mp3"
- output_path = os.path.join(ANKI_MEDIA, filename)
+ filename = f"{options.filename_prefix}{digest}.mp3"
+ output_path = os.path.join(media_dir, filename)
with open(output_path, "wb+") as f:
f.write(data)
f.close()
# set audio field to audio filename
audio_str = f"[sound:{filename}]"
- note[AUDIO_FIELD_NAME] = audio_str
+ note[options.audio_field] = audio_str
note.flush()
print(f"written audio as {audio_str}")
edited_notes += 1
@@ -97,6 +107,7 @@ def main():
print("-- Done: no edits --")
else:
print(f"-- Done: succesfully edited {edited_notes} note{'' if edited_notes == 1 else 's'} --")
+ print("TODO: circumvent below error message (anki python api problems, notes were edited succesfully though):")
# run ./get to get audio data from stdout
# returns (exit_code, stdout_data)