Add ability to specify multiple metapads

This commit is contained in:
Nick Hahn 2022-09-15 19:31:00 +02:00
parent 226a9c4a38
commit 7c770166a7
2 changed files with 15 additions and 15 deletions

View file

@ -36,12 +36,12 @@ pip install -r requirements.txt
Start the extractor Start the extractor
``` ```
python hedgedoc-image.py meta_pad new_netloc python hedgedoc-image.py --replace new_netloc meta_pad1 meta_pad2 ...
``` ```
For example: For example:
``` ```
python hedgedoc-image.py https://md.margau.net/dbk-meta pad.hacknang.de python hedgedoc-image.py --replace pad.hacknang.de https://md.margau.net/dbk-meta
``` ```
### Produced files ### Produced files

View file

@ -2,7 +2,6 @@ import requests
import json import json
import re import re
import argparse import argparse
import os import os
from urllib.parse import urlparse from urllib.parse import urlparse
@ -12,7 +11,7 @@ from os.path import exists
PATH = "images/" PATH = "images/"
NEW_NETLOC = "" NEW_NETLOC = ""
seen_pads = [] seen_pads = set()
pads_json = [] pads_json = []
@ -67,7 +66,7 @@ class PadPage:
def download_and_process_content(self): def download_and_process_content(self):
print(f"Downloading pad at {self.old_url}") print(f"Downloading pad at {self.old_url}")
seen_pads.append(self.old_url) seen_pads.add(self.old_url)
r = requests.get(self.old_url + "/download") r = requests.get(self.old_url + "/download")
if r.status_code == 200: if r.status_code == 200:
self.content = r.text self.content = r.text
@ -109,9 +108,9 @@ class PadPage:
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Recursivly downloads all images from a hedgedoc pad") parser = argparse.ArgumentParser(description="Recursivly downloads all images from a hedgedoc pad")
parser.add_argument("pad_url", metavar="N", type=str, nargs=1, parser.add_argument("pad_url", metavar="N", type=str, nargs="+",
help="url of the pad to start searching") help="urls of the pad to start searching")
parser.add_argument("new_pad_url", metavar="N", type=str, nargs=1, parser.add_argument("--replace", dest="new_pad_url", metavar="B", type=str, nargs=1,
help="url of the new pad for generating the json") help="url of the new pad for generating the json")
args = parser.parse_args() args = parser.parse_args()
@ -123,19 +122,20 @@ if __name__ == "__main__":
old_file = json.load(f) old_file = json.load(f)
for entry in old_file: for entry in old_file:
seen_pads.append(entry["old_url"]) seen_pads.add(entry["old_url"])
pads_json.extend(old_file) pads_json.extend(old_file)
print(f"Seen Pads: {seen_pads}") print(f"Seen Pads: {seen_pads}")
NEW_NETLOC = args.new_pad_url[0] NEW_NETLOC = args.new_pad_url[0]
pad = PadPage(args.pad_url[0]) for pad_url in args.pad_url:
pad.download_and_process_content() pad = PadPage(pad_url)
pad.download_images() pad.download_and_process_content()
pad.gather_linked_pads() pad.download_images()
pads_json.append(pad.to_dict()) pad.gather_linked_pads()
pad.recursive() pads_json.append(pad.to_dict())
pad.recursive()
print(f"We have seen {len(seen_pads)} which were {seen_pads}") print(f"We have seen {len(seen_pads)} which were {seen_pads}")