From 11b5541c0f9feac4a3f7e808a4afdeee3b10c7e5 Mon Sep 17 00:00:00 2001 From: Nick Hahn Date: Fri, 16 Sep 2022 23:06:07 +0200 Subject: [PATCH] Also save pad content on crawl --- hedgedoc-image.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hedgedoc-image.py b/hedgedoc-image.py index 753fd44..05dd570 100644 --- a/hedgedoc-image.py +++ b/hedgedoc-image.py @@ -8,15 +8,17 @@ from urllib.parse import urlparse from typing import List from os.path import exists -PATH = "images/" +IMG_PATH = "images/" +PAD_PATH = "pads/" NEW_NETLOC = "" seen_pads = set() pads_json = [] -os.makedirs(PATH, exist_ok=True) -os.makedirs(PATH + "uploads", exist_ok=True) +os.makedirs(PAD_PATH, exist_ok=True) +os.makedirs(IMG_PATH, exist_ok=True) +os.makedirs(IMG_PATH + "uploads", exist_ok=True) class Image: @@ -27,7 +29,7 @@ class Image: def download(self): r = requests.get(self.old_url) - p = PATH + urlparse(self.old_url).path + p = IMG_PATH + urlparse(self.old_url).path with open(p, 'wb') as f: f.write(r.content) print(f"Downloaded image {self.old_url} to {p}") @@ -70,6 +72,8 @@ class PadPage: r = requests.get(self.old_url + "/download") if r.status_code == 200: self.content = r.text + with open(PAD_PATH + urlparse(self.old_url).path + ".md", "w") as f: + f.write(self.content) num = self._find_images() print(f"Found {num} images") else: