Add ability to specify multiple metapads
This commit is contained in:
parent
226a9c4a38
commit
7c770166a7
2 changed files with 15 additions and 15 deletions
|
@ -36,12 +36,12 @@ pip install -r requirements.txt
|
|||
|
||||
Start the extractor
|
||||
```
|
||||
python hedgedoc-image.py meta_pad new_netloc
|
||||
python hedgedoc-image.py --replace new_netloc meta_pad1 meta_pad2 ...
|
||||
```
|
||||
|
||||
For example:
|
||||
```
|
||||
python hedgedoc-image.py https://md.margau.net/dbk-meta pad.hacknang.de
|
||||
python hedgedoc-image.py --replace pad.hacknang.de https://md.margau.net/dbk-meta
|
||||
```
|
||||
|
||||
### Produced files
|
||||
|
|
|
@ -2,7 +2,6 @@ import requests
|
|||
import json
|
||||
import re
|
||||
import argparse
|
||||
|
||||
import os
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
@ -12,7 +11,7 @@ from os.path import exists
|
|||
PATH = "images/"
|
||||
NEW_NETLOC = ""
|
||||
|
||||
seen_pads = []
|
||||
seen_pads = set()
|
||||
|
||||
pads_json = []
|
||||
|
||||
|
@ -67,7 +66,7 @@ class PadPage:
|
|||
|
||||
def download_and_process_content(self):
|
||||
print(f"Downloading pad at {self.old_url}")
|
||||
seen_pads.append(self.old_url)
|
||||
seen_pads.add(self.old_url)
|
||||
r = requests.get(self.old_url + "/download")
|
||||
if r.status_code == 200:
|
||||
self.content = r.text
|
||||
|
@ -109,9 +108,9 @@ class PadPage:
|
|||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Recursivly downloads all images from a hedgedoc pad")
|
||||
parser.add_argument("pad_url", metavar="N", type=str, nargs=1,
|
||||
help="url of the pad to start searching")
|
||||
parser.add_argument("new_pad_url", metavar="N", type=str, nargs=1,
|
||||
parser.add_argument("pad_url", metavar="N", type=str, nargs="+",
|
||||
help="urls of the pad to start searching")
|
||||
parser.add_argument("--replace", dest="new_pad_url", metavar="B", type=str, nargs=1,
|
||||
help="url of the new pad for generating the json")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
@ -123,19 +122,20 @@ if __name__ == "__main__":
|
|||
old_file = json.load(f)
|
||||
|
||||
for entry in old_file:
|
||||
seen_pads.append(entry["old_url"])
|
||||
seen_pads.add(entry["old_url"])
|
||||
|
||||
pads_json.extend(old_file)
|
||||
print(f"Seen Pads: {seen_pads}")
|
||||
|
||||
|
||||
NEW_NETLOC = args.new_pad_url[0]
|
||||
pad = PadPage(args.pad_url[0])
|
||||
pad.download_and_process_content()
|
||||
pad.download_images()
|
||||
pad.gather_linked_pads()
|
||||
pads_json.append(pad.to_dict())
|
||||
pad.recursive()
|
||||
for pad_url in args.pad_url:
|
||||
pad = PadPage(pad_url)
|
||||
pad.download_and_process_content()
|
||||
pad.download_images()
|
||||
pad.gather_linked_pads()
|
||||
pads_json.append(pad.to_dict())
|
||||
pad.recursive()
|
||||
|
||||
print(f"We have seen {len(seen_pads)} which were {seen_pads}")
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue