auto-detect Anki profile and media dir using Anki Python API, and add CLI parameters

author: lonkaars <loek@pipeframe.xyz> 2023-10-08 11:20:49 +0200
committer: lonkaars <loek@pipeframe.xyz> 2023-10-08 11:20:49 +0200
commit: 855483401c04f4e741733736de7958b8d88db849 (patch)
tree: 0e3cdeabc621e153a1290ff5f80851a66ab7e7eb /bulk-audio/bulk-audio.py
parent: b3e77c6773b3033cc6e35098026b850a93c6841b (diff)
1 files changed, 35 insertions, 24 deletions
diff --git a/bulk-audio/bulk-audio.py b/bulk-audio/bulk-audio.py
index f3c8ef6..9d34cdc 100755
--- a/bulk-audio/bulk-audio.py
+++ b/bulk-audio/bulk-audio.py
@@ -5,19 +5,10 @@ import subprocess
 import hashlib
 import os
 import re
+import argparse
 from math import floor, log10
-
 import aqt
 
-# change these variables
-AUDIO_FILENAME_PREFIX = "refold-tools-"
-# the anki user to which notes of type NOTE_TYPE belong
-ANKI_USER = "ルーク"
-# the note type name of notes that should get audio fields filled in (see Tools > Manage note types)
-NOTE_TYPE = "Sentence mining"
-# field name to be filled with "[audio:...]" or "noaudio"
-AUDIO_FIELD_NAME = "Audio"
-
 # this function only works for refold-tools sentence mining card template
 pattern = re.compile("^([^[、 【]+)[^【]*(【(.+)】)?")
 def note2kanji_kana(note):
@@ -31,13 +22,32 @@ def note2kanji_kana(note):
   kana = kana.replace("・", "")
   return (kanji, kana)
 
-def main():
-  ANKI_PATH = os.path.join(os.environ["XDG_DATA_HOME"], "Anki2", ANKI_USER)
-  ANKI_COLLECTION = os.path.join(ANKI_PATH, "collection.anki2")
-  ANKI_MEDIA = os.path.join(ANKI_PATH, "collection.media")
-  col = aqt.Collection(ANKI_COLLECTION)
+def parse_args(argv):
+  parser = argparse.ArgumentParser(
+    description="Bulk Japanese audio downloader (refold-tools)",
+    epilog="""This program calls Anki internally, so any CLI options supported
+    by Anki are forwarded. Run `anki -h` to see available options""",
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+  )
+  parser.usage = f"{argv[0]} [options] [anki options]"
+  parser.add_argument("-n", "--note-type", help="note type to add audio to", default="Sentence mining")
+  parser.add_argument("-a", "--audio-field", help="field name to modify with audio", default="Audio")
+  parser.add_argument("-f", "--filename-prefix", help="download filename prefix", default="refold-tools-")
+  return parser.parse_known_args(argv[1:])
 
-  model = col.models.by_name(NOTE_TYPE)
+def main():
+  options, args = parse_args(sys.argv)
+  args.insert(0, sys.argv[0]) # restore first index of argv (QT crashes if argv[] is empty)
+
+  # forward remaining CLI parameters to Anki
+  app = aqt._run(args, False)
+  # load last open profile if no profile was specified on command line (option parsed by Anki)
+  if not aqt.mw.pm.name:
+    aqt.mw.pm.load(aqt.mw.pm.last_loaded_profile_name())
+  col = aqt.Collection(aqt.mw.pm.collectionPath())
+  media_dir = col.media.dir()
+
+  model = col.models.by_name(options.note_type)
   note_ids = col.models.nids(model)
 
   edited_notes = 0
@@ -45,19 +55,19 @@ def main():
   for note_index, note_id in enumerate(note_ids):
     note = col.get_note(note_id)
     note_index_format = ("{:0" + str(floor(log10(len(note_ids))) + 1) + "d}/{:d}").format(note_index + 1, len(note_ids))
-    print(f"[{note_index_format}] ", end="")
+    print(f"[nid:{note_id}] ({note_index_format}) ", end="")
 
     # bulk clear audio field (dev only)
-    # note[AUDIO_FIELD_NAME] = ""
+    # note[options.audio_field] = ""
     # note.flush()
-    # print(f"cleared \"{AUDIO_FIELD_NAME}\" field!")
+    # print(f"cleared \"{options.audio_field}\" field!")
     # continue
 
     # autosave deck every 20 cards
     if note_index % 20 == 0: col.save()
 
     # skip any notes that already have audio
-    if len(note[AUDIO_FIELD_NAME]) > 0:
+    if len(note[options.audio_field]) > 0:
       print("skipped -> audio field not empty")
       continue
 
@@ -71,22 +81,22 @@ def main():
     # attempt to download audio
     exit_code, data = get(kanji, kana)
     if exit_code != 0:
-      note[AUDIO_FIELD_NAME] = "noaudio"
+      note[options.audio_field] = "noaudio"
       note.flush()
       print("skipped -> no recording available, marked as 'noaudio'")
       continue
 
     # save audio if download was succesful
     digest = hashlib.sha1(data).hexdigest()
-    filename = f"{AUDIO_FILENAME_PREFIX}{digest}.mp3"
-    output_path = os.path.join(ANKI_MEDIA, filename)
+    filename = f"{options.filename_prefix}{digest}.mp3"
+    output_path = os.path.join(media_dir, filename)
     with open(output_path, "wb+") as f:
       f.write(data)
       f.close()
 
     # set audio field to audio filename
     audio_str = f"[sound:{filename}]"
-    note[AUDIO_FIELD_NAME] = audio_str
+    note[options.audio_field] = audio_str
     note.flush()
     print(f"written audio as {audio_str}")
     edited_notes += 1
@@ -97,6 +107,7 @@ def main():
     print("-- Done: no edits --")
   else:
     print(f"-- Done: succesfully edited {edited_notes} note{'' if edited_notes == 1 else 's'} --")
+  print("TODO: circumvent below error message (anki python api problems, notes were edited succesfully though):")
 
 # run ./get to get audio data from stdout
 # returns (exit_code, stdout_data)
author	lonkaars <loek@pipeframe.xyz>	2023-10-08 11:20:49 +0200
committer	lonkaars <loek@pipeframe.xyz>	2023-10-08 11:20:49 +0200
commit	855483401c04f4e741733736de7958b8d88db849 (patch)
tree	0e3cdeabc621e153a1290ff5f80851a66ab7e7eb /bulk-audio/bulk-audio.py
parent	b3e77c6773b3033cc6e35098026b850a93c6841b (diff)