Also save pad content on crawl

This commit is contained in:
Nick Hahn 2022-09-16 23:06:07 +02:00
parent 7c770166a7
commit 11b5541c0f

View file

@ -8,15 +8,17 @@ from urllib.parse import urlparse
from typing import List
from os.path import exists
PATH = "images/"
IMG_PATH = "images/"
PAD_PATH = "pads/"
NEW_NETLOC = ""
seen_pads = set()
pads_json = []
os.makedirs(PATH, exist_ok=True)
os.makedirs(PATH + "uploads", exist_ok=True)
os.makedirs(PAD_PATH, exist_ok=True)
os.makedirs(IMG_PATH, exist_ok=True)
os.makedirs(IMG_PATH + "uploads", exist_ok=True)
class Image:
@ -27,7 +29,7 @@ class Image:
def download(self):
r = requests.get(self.old_url)
p = PATH + urlparse(self.old_url).path
p = IMG_PATH + urlparse(self.old_url).path
with open(p, 'wb') as f:
f.write(r.content)
print(f"Downloaded image {self.old_url} to {p}")
@ -70,6 +72,8 @@ class PadPage:
r = requests.get(self.old_url + "/download")
if r.status_code == 200:
self.content = r.text
with open(PAD_PATH + urlparse(self.old_url).path + ".md", "w") as f:
f.write(self.content)
num = self._find_images()
print(f"Found {num} images")
else: