From 2b30b277e70d471890a9015867b3df8ec0b41a55 Mon Sep 17 00:00:00 2001
From: Patrick Schwarz <ps@p-schwarz.de>
Date: Tue, 13 Sep 2022 21:25:02 +0200
Subject: [PATCH 1/9] Unsuccessfully try to change cypress test to migrate a
 bunch of pads using json file

---
 cypress/e2e/hedgedoc-migrator.cy.js | 71 +++++++++++++++++++----------
 cypress/fixtures/pads.json          | 10 ++++
 2 files changed, 58 insertions(+), 23 deletions(-)
 create mode 100644 cypress/fixtures/pads.json

diff --git a/cypress/e2e/hedgedoc-migrator.cy.js b/cypress/e2e/hedgedoc-migrator.cy.js
index 82c5430..faa1003 100644
--- a/cypress/e2e/hedgedoc-migrator.cy.js
+++ b/cypress/e2e/hedgedoc-migrator.cy.js
@@ -1,31 +1,56 @@
+String.prototype.replaceLast = function (what, replacement) {
+    var pcs = this.split(what);
+    var lastPc = pcs.pop();
+    return pcs.join(what) + replacement + lastPc;
+}
+
 async function getContent(url) {
   const res = await fetch(url.concat('/download'));
   return res.text();
 }
 
-async function migrateDocument(url, baseUrl) {
-  const content = await getContent(url);
-  cy.request({
-    url: baseUrl.concat('/new'),
-    method: 'POST',
-    headers: {
-      'Content-Type': 'text/markdown',
-      'Access-Control-Allow-Origin': new URL(baseUrl).hostname,
-    },
-    body: content,
-  }).then((res) => {
-    const redirect = res.redirects[0].split(' ')[1];
-    cy.visit(url);
-    cy.get('#view-mode-toggle-edit').click({force: true});
-    cy.get('.CodeMirror-scroll').type('{ctrl}a{backspace}');
-    cy.get('.CodeMirror-scroll').type(`Moved to [${redirect}](${redirect})`);
-  });
+async function migrateDocument(oldUrl, newUrl) {
+  // get content of old pad
+  var content = await getContent(oldUrl);
+  
+  // replace URLs
+  content = content.replaceAll(new URL(oldUrl).hostname, new URL(newUrl).hostname);
+  
+  // visit new pad url (not possible via post api request because pad may already exists)
+  // Caution: Content of new pad url will be overwritten!
+  // Caution: History will not be moved to new pad url
+  cy.visit(newUrl.concat('?edit'));
+  cy.window().then((win) => {
+  
+  	// Write Content
+  	cy.get('.CodeMirror-scroll').type('{ctrl}a{backspace}');
+  	cy.get('.CodeMirror-scroll').type(content);
+  	
+  	// Visit old pad and replace content
+  	cy.visit(oldUrl);
+   	cy.get('#view-mode-toggle-edit').click({force: true});
+  	cy.get('.CodeMirror-scroll').type('{ctrl}a{backspace}');
+  	cy.get('.CodeMirror-scroll').type(`# 301 Pad moved{enter}=> [${newUrl}](${newUrl})`);
+  	
+  })
+  
 }
 
 describe('Migrate document', () => {
-  it('passes', async () => {
-    const baseUrl = 'https://md.margau.net';
-    const url = 'https://md.margau.net/H0JO3L5DS-6Yhv4RrdS-tw';
-    migrateDocument(url, baseUrl);
-  });
-});
+	it('passes', async () => {
+  
+    	// Read list of pads to migrate
+    	cy.fixture('pads').then( pads => {
+
+	    	if(pads.length === 0) {
+    			console.log("Didn't find any pad urls to migrate");
+    		}
+    	
+    		for (const pad of pads) {
+   				migrateDocument(pad.oldUrl, pad.newUrl);
+	   		}  	
+
+    	})
+	
+	})
+})
diff --git a/cypress/fixtures/pads.json b/cypress/fixtures/pads.json
new file mode 100644
index 0000000..98ca924
--- /dev/null
+++ b/cypress/fixtures/pads.json
@@ -0,0 +1,10 @@
+[
+   {
+      "oldUrl":"https://md.margau.net/test",
+      "newUrl":"https://pad.hacknang.de/test"
+   },
+   {
+      "oldUrl":"https://md.margau.net/test3",
+      "newUrl":"https://pad.hacknang.de/test2"
+   }
+]

From d10cf37bfd61b7b84013ab8e54ca52b74498339e Mon Sep 17 00:00:00 2001
From: Patrick Schwarz <ps@p-schwarz.de>
Date: Wed, 14 Sep 2022 00:14:31 +0200
Subject: [PATCH 2/9] Fix non working non working migration script with cypress
 async magic

---
 cypress/e2e/hedgedoc-migrator.cy.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cypress/e2e/hedgedoc-migrator.cy.js b/cypress/e2e/hedgedoc-migrator.cy.js
index faa1003..6c671ff 100644
--- a/cypress/e2e/hedgedoc-migrator.cy.js
+++ b/cypress/e2e/hedgedoc-migrator.cy.js
@@ -40,14 +40,14 @@ describe('Migrate document', () => {
 	it('passes', async () => {
   
     	// Read list of pads to migrate
-    	cy.fixture('pads').then( pads => {
+    	cy.fixture('pads').then(async(pads) => {
 
 	    	if(pads.length === 0) {
     			console.log("Didn't find any pad urls to migrate");
     		}
     	
     		for (const pad of pads) {
-   				migrateDocument(pad.oldUrl, pad.newUrl);
+   				await migrateDocument(pad.oldUrl, pad.newUrl);
 	   		}  	
 
     	})

From 7a422ef89bb09df9b00466ce8586376632e17bce Mon Sep 17 00:00:00 2001
From: Nick Hahn <nick.hahn@posteo.de>
Date: Wed, 14 Sep 2022 14:04:35 +0200
Subject: [PATCH 3/9] Add python extractor

---
 .gitignore        |   5 ++
 README.md         |  31 +++++++++++
 hedgedoc-image.py | 130 ++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt  |   8 +++
 4 files changed, 174 insertions(+)
 create mode 100644 hedgedoc-image.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
index ceaea36..fde6bf5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
+# Python
+__pycache__
+venv/
+images/
+pads.json
 # ---> Node
 # Logs
 logs
diff --git a/README.md b/README.md
index ef10214..319aef6 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,37 @@ Follow the guide over [here](https://docs.cypress.io/guides/getting-started/inst
 
 You also require NodeJS. Run `npm ci` to install the required packages.
 
+Create new python venv:
+```
+python -m venv venv
+```
+
+Activate the new environment (linux)
+
+```
+source venv/bin/activate
+```
+
+Install the requirements
+```
+pip -r requirements.txt
+```
+
 ## Execution
 
 To start cypress, simply execute `npx cypress open`. Then click `E2E Testing` and run using electron. This step could be automated using the `cypress` [API](https://docs.cypress.io/guides/guides/module-api).
+
+Start the extractor
+```
+python hedgedoc-image.py meta_pad new_netloc
+```
+
+For example:
+```
+python hedgedoc-image.py https://md.margau.net/dbk-meta pad.hacknang.de
+```
+
+## Produced files
+
+The python scripts produces a `pads.json` which contains the mapping from `old_url` to `new_url`.
+All images land in `images/uploads`. Only images hosted on the `old_pads` URL are saved
diff --git a/hedgedoc-image.py b/hedgedoc-image.py
new file mode 100644
index 0000000..184b275
--- /dev/null
+++ b/hedgedoc-image.py
@@ -0,0 +1,130 @@
+import requests
+import json
+import re
+import argparse
+
+import os
+
+from urllib.parse import urlparse
+from typing import List
+
+PATH = "images/"
+NEW_NETLOC = ""
+
+seen_pads = []
+
+pads_json = []
+
+os.makedirs(PATH, exist_ok=True)
+os.makedirs(PATH + "uploads", exist_ok=True)
+# TODO: Loop detection
+# TODO: Recursion
+class Image:
+    def __init__(self, _old_url: str) -> None:
+        self.old_url = _old_url
+        old = urlparse(self.old_url)
+        self.new_url = "." + old.path;
+
+    def download(self):
+        print("HERE")
+        r = requests.get(self.old_url)
+        p = PATH + urlparse(self.old_url).path
+        with open(p, 'wb') as f:
+            f.write(r.content)
+            print(f"Downloaded image {self.old_url} to {p}")
+
+
+class PadPage:
+    def __init__(self, _old_url: str):
+        self.old_url: str = _old_url
+        #self.name: str = _name
+        self.images: List = []
+        self.linked_pads = []
+        self.content: str = ""
+
+    def gather_linked_pads(self):
+        regex = r'https://[\w\d.]+/(?!upload)[\w\-_]+'
+        matches = re.findall(regex, self.content)
+
+        full_url = urlparse(self.old_url)
+        for match in matches:
+            print(f"match: {match}")
+            url = urlparse(match)
+            if url.netloc == full_url.netloc:
+                self.linked_pads.append(PadPage(match))
+            else:
+                print("Dropped pad, wrong netloc")
+
+    def to_dict(self) -> dict:
+        old = urlparse(self.old_url)
+        new = old._replace(netloc="pad.hacknang.de")
+        return {"old_url": self.old_url, "new_url": new.geturl()}
+
+    def to_json(self) -> str:
+        old = urlparse(self.old_url)
+        new = old._replace(netloc=NEW_NETLOC)
+        return json.dumps({"old_url": self.old_url, "new_url": new.geturl()})
+
+    def download_and_process_content(self):
+        print(f"Downloading pad at {self.old_url}")
+        seen_pads.append(self.old_url)
+        r = requests.get(self.old_url + "/download")
+        if r.status_code == 200:
+            self.content = r.text
+            num = self._find_images()
+            print(f"Found {num} images")
+        else:
+            print(f"Error downloading Pad {self.old_url}, got HTTP status code {r.status_code}")
+
+    # returns number of images found
+    def _find_images(self) -> int:
+        regex = r'https://[\w\d.]+/uploads/[\w\d]+\.(?:png|jpg|jpeg|webp)'
+        matches = re.findall(regex, self.content)
+
+        full_url = urlparse(self.old_url)
+        for match in matches:
+            print(f"match: {match}")
+            url = urlparse(match)
+            if url.netloc == full_url.netloc:
+                i = Image(match)
+                self.images.append(i)
+            else:
+                print("Dropped pad, wrong netloc")
+        return len(matches)
+
+    def download_images(self):
+        for i in self.images:
+            i.download()
+
+    def recursive(self):
+        for pad in self.linked_pads:
+            if pad.old_url not in seen_pads:
+                print(f"New pad found: {pad.old_url}")
+                pad.download_and_process_content()
+                pad.download_images()
+                pad.gather_linked_pads()
+                pads_json.append(pad.to_dict())
+                pad.recursive()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Recursivly downloads all images from a hedgedoc pad")
+    parser.add_argument("pad_url", metavar="N", type=str, nargs=1,
+                        help="url of the pad to start searching")
+    parser.add_argument("new_pad_url", metavar="N", type=str, nargs=1,
+                        help="url of the new pad for generating the json")
+
+    args = parser.parse_args()
+
+    NEW_NETLOC = args.new_pad_url[0]
+    pad = PadPage(args.pad_url[0])
+    pad.download_and_process_content()
+    pad.download_images()
+    pad.gather_linked_pads()
+    pads_json.append(pad.to_dict())
+    pad.recursive()
+
+    print(f"We have seen {len(seen_pads)} which were {seen_pads}")
+
+    with open("pads.json", "w") as f:
+        f.write(json.dumps(pads_json))
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a20d73b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+beautifulsoup4==4.11.1
+bs4==0.0.1
+certifi==2022.6.15.1
+charset-normalizer==2.1.1
+idna==3.3
+requests==2.28.1
+soupsieve==2.3.2.post1
+urllib3==1.26.12

From 425b13d7a4df033d00469c77fd21b91cc955bf45 Mon Sep 17 00:00:00 2001
From: Nick Hahn <nick.hahn@posteo.de>
Date: Thu, 15 Sep 2022 18:34:42 +0200
Subject: [PATCH 4/9] Preserve old runs in pads.json

---
 hedgedoc-image.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/hedgedoc-image.py b/hedgedoc-image.py
index 184b275..dcb5d90 100644
--- a/hedgedoc-image.py
+++ b/hedgedoc-image.py
@@ -7,6 +7,7 @@ import os
 
 from urllib.parse import urlparse
 from typing import List
+from os.path import exists
 
 PATH = "images/"
 NEW_NETLOC = ""
@@ -17,8 +18,8 @@ pads_json = []
 
 os.makedirs(PATH, exist_ok=True)
 os.makedirs(PATH + "uploads", exist_ok=True)
-# TODO: Loop detection
-# TODO: Recursion
+
+
 class Image:
     def __init__(self, _old_url: str) -> None:
         self.old_url = _old_url
@@ -26,7 +27,6 @@ class Image:
         self.new_url = "." + old.path;
 
     def download(self):
-        print("HERE")
         r = requests.get(self.old_url)
         p = PATH + urlparse(self.old_url).path
         with open(p, 'wb') as f:
@@ -116,6 +116,19 @@ if __name__ == "__main__":
 
     args = parser.parse_args()
 
+    old_file = []
+
+    if exists("pads.json"):
+        with open("pads.json", "r") as f:
+            old_file = json.load(f)
+
+        for entry in old_file:
+            seen_pads.append(entry["old_url"])
+
+        pads_json.extend(old_file)
+        print(f"Seen Pads: {seen_pads}")
+
+
     NEW_NETLOC = args.new_pad_url[0]
     pad = PadPage(args.pad_url[0])
     pad.download_and_process_content()

From 226a9c4a38b0a1b1b07e47067429ef08fd26316e Mon Sep 17 00:00:00 2001
From: Nick Hahn <nick.hahn@posteo.de>
Date: Thu, 15 Sep 2022 18:36:16 +0200
Subject: [PATCH 5/9] Cleaned up readme

---
 README.md | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 319aef6..7215df9 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,20 @@
 
 Simple script to migrate content from one Hedgedoc instance to another.
 
-## Setup
+## Cypress
+
 
 Follow the guide over [here](https://docs.cypress.io/guides/getting-started/installing-cypress#Linux-Prerequisites).
 
 You also require NodeJS. Run `npm ci` to install the required packages.
 
+### Execution
+
+To start cypress, simply execute `npx cypress open`. Then click `E2E Testing` and run using electron. This step could be automated using the `cypress` [API](https://docs.cypress.io/guides/guides/module-api).
+
+
+## Python
+
 Create new python venv:
 ```
 python -m venv venv
@@ -21,12 +29,10 @@ source venv/bin/activate
 
 Install the requirements
 ```
-pip -r requirements.txt
+pip install -r requirements.txt
 ```
 
-## Execution
-
-To start cypress, simply execute `npx cypress open`. Then click `E2E Testing` and run using electron. This step could be automated using the `cypress` [API](https://docs.cypress.io/guides/guides/module-api).
+### Execution
 
 Start the extractor
 ```
@@ -38,7 +44,7 @@ For example:
 python hedgedoc-image.py https://md.margau.net/dbk-meta pad.hacknang.de
 ```
 
-## Produced files
+### Produced files
 
 The python scripts produces a `pads.json` which contains the mapping from `old_url` to `new_url`.
 All images land in `images/uploads`. Only images hosted on the `old_pads` URL are saved

From 7c770166a75b2930292e0c80ac466c21299ac7d3 Mon Sep 17 00:00:00 2001
From: Nick Hahn <nick.hahn@posteo.de>
Date: Thu, 15 Sep 2022 19:31:00 +0200
Subject: [PATCH 6/9] Add ability to specify multiple metapads

---
 README.md         |  4 ++--
 hedgedoc-image.py | 26 +++++++++++++-------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 7215df9..64ccbb3 100644
--- a/README.md
+++ b/README.md
@@ -36,12 +36,12 @@ pip install -r requirements.txt
 
 Start the extractor
 ```
-python hedgedoc-image.py meta_pad new_netloc
+python hedgedoc-image.py --replace new_netloc meta_pad1 meta_pad2 ...
 ```
 
 For example:
 ```
-python hedgedoc-image.py https://md.margau.net/dbk-meta pad.hacknang.de
+python hedgedoc-image.py --replace pad.hacknang.de https://md.margau.net/dbk-meta
 ```
 
 ### Produced files
diff --git a/hedgedoc-image.py b/hedgedoc-image.py
index dcb5d90..753fd44 100644
--- a/hedgedoc-image.py
+++ b/hedgedoc-image.py
@@ -2,7 +2,6 @@ import requests
 import json
 import re
 import argparse
-
 import os
 
 from urllib.parse import urlparse
@@ -12,7 +11,7 @@ from os.path import exists
 PATH = "images/"
 NEW_NETLOC = ""
 
-seen_pads = []
+seen_pads = set()
 
 pads_json = []
 
@@ -67,7 +66,7 @@ class PadPage:
 
     def download_and_process_content(self):
         print(f"Downloading pad at {self.old_url}")
-        seen_pads.append(self.old_url)
+        seen_pads.add(self.old_url)
         r = requests.get(self.old_url + "/download")
         if r.status_code == 200:
             self.content = r.text
@@ -109,9 +108,9 @@ class PadPage:
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Recursivly downloads all images from a hedgedoc pad")
-    parser.add_argument("pad_url", metavar="N", type=str, nargs=1,
-                        help="url of the pad to start searching")
-    parser.add_argument("new_pad_url", metavar="N", type=str, nargs=1,
+    parser.add_argument("pad_url", metavar="N", type=str, nargs="+",
+                        help="urls of the pad to start searching")
+    parser.add_argument("--replace", dest="new_pad_url", metavar="B", type=str, nargs=1,
                         help="url of the new pad for generating the json")
 
     args = parser.parse_args()
@@ -123,19 +122,20 @@ if __name__ == "__main__":
             old_file = json.load(f)
 
         for entry in old_file:
-            seen_pads.append(entry["old_url"])
+            seen_pads.add(entry["old_url"])
 
         pads_json.extend(old_file)
         print(f"Seen Pads: {seen_pads}")
 
 
     NEW_NETLOC = args.new_pad_url[0]
-    pad = PadPage(args.pad_url[0])
-    pad.download_and_process_content()
-    pad.download_images()
-    pad.gather_linked_pads()
-    pads_json.append(pad.to_dict())
-    pad.recursive()
+    for pad_url in args.pad_url:
+        pad = PadPage(pad_url)
+        pad.download_and_process_content()
+        pad.download_images()
+        pad.gather_linked_pads()
+        pads_json.append(pad.to_dict())
+        pad.recursive()
 
     print(f"We have seen {len(seen_pads)} which were {seen_pads}")
 

From 11b5541c0f9feac4a3f7e808a4afdeee3b10c7e5 Mon Sep 17 00:00:00 2001
From: Nick Hahn <nick.hahn@posteo.de>
Date: Fri, 16 Sep 2022 23:06:07 +0200
Subject: [PATCH 7/9] Also save pad content on crawl

---
 hedgedoc-image.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hedgedoc-image.py b/hedgedoc-image.py
index 753fd44..05dd570 100644
--- a/hedgedoc-image.py
+++ b/hedgedoc-image.py
@@ -8,15 +8,17 @@ from urllib.parse import urlparse
 from typing import List
 from os.path import exists
 
-PATH = "images/"
+IMG_PATH = "images/"
+PAD_PATH = "pads/"
 NEW_NETLOC = ""
 
 seen_pads = set()
 
 pads_json = []
 
-os.makedirs(PATH, exist_ok=True)
-os.makedirs(PATH + "uploads", exist_ok=True)
+os.makedirs(PAD_PATH, exist_ok=True)
+os.makedirs(IMG_PATH, exist_ok=True)
+os.makedirs(IMG_PATH + "uploads", exist_ok=True)
 
 
 class Image:
@@ -27,7 +29,7 @@ class Image:
 
     def download(self):
         r = requests.get(self.old_url)
-        p = PATH + urlparse(self.old_url).path
+        p = IMG_PATH + urlparse(self.old_url).path
         with open(p, 'wb') as f:
             f.write(r.content)
             print(f"Downloaded image {self.old_url} to {p}")
@@ -70,6 +72,8 @@ class PadPage:
         r = requests.get(self.old_url + "/download")
         if r.status_code == 200:
             self.content = r.text
+            with open(PAD_PATH + urlparse(self.old_url).path + ".md", "w") as f:
+                f.write(self.content)
             num = self._find_images()
             print(f"Found {num} images")
         else:

From dea3fa9fe65f64870399af0cb0708d5b7902e2a8 Mon Sep 17 00:00:00 2001
From: Nick Hahn <nick.hahn@posteo.de>
Date: Fri, 16 Sep 2022 23:24:28 +0200
Subject: [PATCH 8/9] New json format (snake_case to camelCase)

---
 hedgedoc-image.py   | 4 ++--
 pads/19IN-2Sem-Java | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 pads/19IN-2Sem-Java

diff --git a/hedgedoc-image.py b/hedgedoc-image.py
index 05dd570..d1ab53c 100644
--- a/hedgedoc-image.py
+++ b/hedgedoc-image.py
@@ -59,12 +59,12 @@ class PadPage:
     def to_dict(self) -> dict:
         old = urlparse(self.old_url)
         new = old._replace(netloc="pad.hacknang.de")
-        return {"old_url": self.old_url, "new_url": new.geturl()}
+        return {"oldUrl": self.old_url, "newUrl": new.geturl()}
 
     def to_json(self) -> str:
         old = urlparse(self.old_url)
         new = old._replace(netloc=NEW_NETLOC)
-        return json.dumps({"old_url": self.old_url, "new_url": new.geturl()})
+        return json.dumps({"oldUrl": self.old_url, "newUrl": new.geturl()})
 
     def download_and_process_content(self):
         print(f"Downloading pad at {self.old_url}")
diff --git a/pads/19IN-2Sem-Java b/pads/19IN-2Sem-Java
new file mode 100644
index 0000000..526e9b1
--- /dev/null
+++ b/pads/19IN-2Sem-Java
@@ -0,0 +1 @@
+# Java
\ No newline at end of file

From 6f7c991948526ece8ff474da1092563bc4092558 Mon Sep 17 00:00:00 2001
From: Nick Hahn <nick.hahn@posteo.de>
Date: Sat, 17 Sep 2022 11:40:16 +0200
Subject: [PATCH 9/9] Add script that checks if pad is already moved

---
 .gitignore           |  2 ++
 hedgedoc-is-moved.py | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 hedgedoc-is-moved.py

diff --git a/.gitignore b/.gitignore
index fde6bf5..61a7128 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,9 @@
 __pycache__
 venv/
 images/
+pads/
 pads.json
+new_pads.json
 # ---> Node
 # Logs
 logs
diff --git a/hedgedoc-is-moved.py b/hedgedoc-is-moved.py
new file mode 100644
index 0000000..eb2057e
--- /dev/null
+++ b/hedgedoc-is-moved.py
@@ -0,0 +1,27 @@
+import requests
+import json
+import os
+
+from os.path import exists
+
+pads_json = []
+new_json= []
+
+if exists("pads.json"):
+    with open("pads.json", "r") as f:
+        old_file = json.load(f)
+
+    pads_json.extend(old_file)
+
+    for pad in pads_json:
+        print("Downloading: " + pad['oldUrl'] + "/download")
+        r = requests.head(pad['oldUrl'] + "/download")
+        if int(r.headers['content-length']) > 150:
+            new_json.append(pad)
+
+
+        with open("new_pads.json", "w") as f:
+            f.write(json.dumps(new_json))
+
+else:
+    print("Give me pads.json")