From aa1d328a857da0da037f450e89fd0b77424aaaf6 Mon Sep 17 00:00:00 2001 From: Loek Le Blansch Date: Thu, 15 Aug 2024 13:56:14 +0200 Subject: clean up TW TL field + fix escape --- anki-card-template/bulk-cleanup.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/anki-card-template/bulk-cleanup.py b/anki-card-template/bulk-cleanup.py index 5c2c437..8bf3e5c 100755 --- a/anki-card-template/bulk-cleanup.py +++ b/anki-card-template/bulk-cleanup.py @@ -43,7 +43,7 @@ def recurseplainify(soup): continue if el.name == 'ruby': - output += f'[{el.text}]({el.rt.text})' + output += f'[{escape(el.text)}]({escape(el.rt.text)})' continue output += recurseplainify(el) @@ -81,17 +81,25 @@ def main(): note = col.get_note(note_id) print(f"[nid:{note_id}] ({note_index_format.format(note_index + 1, len(note_ids))})", end="") - if note['Complete sentence'].find('<') >= 0: + field = 'Complete sentence' + if note[field].find('<') >= 0: print(" -> sentence HTML to plain", end="") - note['Complete sentence'] = html2cardtemplate(note['Complete sentence']) + note[field] = html2cardtemplate(note[field]) edited = True - if note['Target word reading'].find('<') >= 0: - soup = BeautifulSoup(note['Target word reading']) - note['Target word reading'] = soup.get_text() + field = 'Target word reading' + if note[field].find('<') >= 0: + soup = BeautifulSoup(note[field]) + note[field] = soup.get_text() print(" -> stripped HTML from TW reading", end="") edited = True + field = 'Target word translation' + if note[field].find('<') >= 0: + print(" -> TW TL to plain", end="") + note[field] = html2cardtemplate(note[field]) + edited = True + if not edited: print("unmodified", end="\r") else: -- cgit v1.2.3