Also save pad content on crawl
This commit is contained in:
parent
7c770166a7
commit
11b5541c0f
1 changed files with 8 additions and 4 deletions
|
@ -8,15 +8,17 @@ from urllib.parse import urlparse
|
|||
from typing import List
|
||||
from os.path import exists
|
||||
|
||||
PATH = "images/"
|
||||
IMG_PATH = "images/"
|
||||
PAD_PATH = "pads/"
|
||||
NEW_NETLOC = ""
|
||||
|
||||
seen_pads = set()
|
||||
|
||||
pads_json = []
|
||||
|
||||
os.makedirs(PATH, exist_ok=True)
|
||||
os.makedirs(PATH + "uploads", exist_ok=True)
|
||||
os.makedirs(PAD_PATH, exist_ok=True)
|
||||
os.makedirs(IMG_PATH, exist_ok=True)
|
||||
os.makedirs(IMG_PATH + "uploads", exist_ok=True)
|
||||
|
||||
|
||||
class Image:
|
||||
|
@ -27,7 +29,7 @@ class Image:
|
|||
|
||||
def download(self):
|
||||
r = requests.get(self.old_url)
|
||||
p = PATH + urlparse(self.old_url).path
|
||||
p = IMG_PATH + urlparse(self.old_url).path
|
||||
with open(p, 'wb') as f:
|
||||
f.write(r.content)
|
||||
print(f"Downloaded image {self.old_url} to {p}")
|
||||
|
@ -70,6 +72,8 @@ class PadPage:
|
|||
r = requests.get(self.old_url + "/download")
|
||||
if r.status_code == 200:
|
||||
self.content = r.text
|
||||
with open(PAD_PATH + urlparse(self.old_url).path + ".md", "w") as f:
|
||||
f.write(self.content)
|
||||
num = self._find_images()
|
||||
print(f"Found {num} images")
|
||||
else:
|
||||
|
|
Loading…
Add table
Reference in a new issue