diff --git a/__pycache__/scraper.cpython-312.pyc b/__pycache__/scraper.cpython-312.pyc
index 98f2a8fe0cb9814e7543b4291b1c61344efe2b16..f74863a53ea0e12effc8b81a5c15b218f1ef3cc2 100644
Binary files a/__pycache__/scraper.cpython-312.pyc and b/__pycache__/scraper.cpython-312.pyc differ
diff --git a/crisis_events.db b/crisis_events.db
index c0173a3a9bd6203f6fd569ab359d40f6b60276f0..20d733224bf5bbaa1bdec121901d4ff43d8fe84c 100644
Binary files a/crisis_events.db and b/crisis_events.db differ
diff --git a/crisis_events_database_metadata.json b/crisis_events_database_metadata.json
index 96f6edc7c9c9db46747cabb70150c3deb5df1add..34450736630c8dabcaab761cb7933ef8d69ebd50 100644
--- a/crisis_events_database_metadata.json
+++ b/crisis_events_database_metadata.json
@@ -1 +1 @@
-{"Users": 2, "Collections": 2}
\ No newline at end of file
+{"Users": 2, "Collections": 5}
\ No newline at end of file
diff --git a/flask_backend.py b/flask_backend.py
index 98cfa6dbbd71cdcf8a4c7f379f39c138eea29096..de564cc907f0ee630e034383c49b862f9bc8f7b2 100644
--- a/flask_backend.py
+++ b/flask_backend.py
@@ -13,6 +13,7 @@ import glob
 import summarizer_implementations.t5 as t5
 import summarizer_implementations.nltk_summarizer as nltk
 import summarizer_implementations.bert as bert
+import scraper
 
 app = Flask(__name__)
 app.config['JWT_SECRET_KEY'] = 'PI'
@@ -227,7 +228,7 @@ def v1_summarize_t5():
 
     glob = ""
     
-    if collection["type"] == "text":
+    if collection["type"] == "text" or collection["type"] == "url" or collection["type"] == "html":
         for file_path in items:
             with open(file_path, "r", encoding="utf8") as f:
                 glob += f.read()
@@ -264,7 +265,7 @@ def v1_summarize_nltk():
     # files = database.get_raw_text_files(collection_info["collection_id"])
     
     glob = ""
-    if collection["type"] == "text":
+    if collection["type"] == "text" or collection["type"] == "url" or collection["type"] == "html":
         for file_path in items:
             with open(file_path, "r", encoding="utf8") as f:
                 glob += f.read()
@@ -302,7 +303,7 @@ def v1_summarize_bert():
     # files = database.get_raw_text_files(collection_info["collection_id"])
     
     glob = ""
-    if collection["type"] == "text":
+    if collection["type"] == "text" or collection["type"] == "url" or collection["type"] == "html":
         for file_path in items:
             with open(file_path, "r", encoding="utf8") as f:
                 glob += f.read()
@@ -323,10 +324,9 @@ def v1_summarize_bert():
             "status":"failure",
             }, 500
     
-
-@app.route('/api/v1/upload_raw_text', methods=['POST'])
-def v1_upload_raw_text():
-    "http://127.0.0.1:5000//api/v1/upload_raw_text?collection=0"
+@app.route('/api/v1/upload_url_file', methods=['POST'])
+def v1_upload_url_file():
+    "http://127.0.0.1:5000//api/v1/upload_url_file?collection=0"
     collection_id = request.args.get("collection")
 
     if not collection_id:
@@ -336,19 +336,34 @@ def v1_upload_raw_text():
 
     os.makedirs(f"./backend/storage/{collection_id}",exist_ok=True)
 
-    for zip_file in request.files.keys():
-        path = f"./backend/storage/{collection_id}/{zip_file}"
-        request.files[zip_file].save(path)
+    for url_file in request.files.keys():
+        path = f"./backend/storage/{collection_id}/{url_file}"
+        request.files[url_file].save(path)
+        print(f"Saved: {path}")
 
-        with zipfile.ZipFile(path, 'r') as zip_ref:
-            zip_ref.extractall(f"./backend/storage/{collection_id}")
+        with open(path, 'r', encoding='utf-8') as file:
+            urls = [line.strip() for line in file]
+
+        for url in urls: #iterate through list of URLs
+            try:
+                text = scraper.scrape_url(url)
+                if text.lower().startswith('error'): #special case
+                    continue
+            except Exception as e:
+                print(f"Error: {e}")
+                continue #disregard url
+
+            file_name = url.split('/')[-1].strip() + '.txt' #determines file name by extracting last part of URL string
+            if file_name == '.txt': #special case
+                file_name = url.split('/')[-2].strip() + '.txt'
+            file_path = f'./backend/storage/{collection_id}/{file_name}'
+            with open(file_path, 'w', encoding='utf-8') as text_file: #write file to storage path with given name
+                text_file.write(text)
 
         glob_path = r'./backend/storage/' + collection_id + r'/*.txt'
-
         for file_path in glob.glob(glob_path):
             database.create_raw_text_file(collection_id, file_path)
 
-
     return {"status":"success"}, 200
 
 @app.route('/api/v1/upload_raw_html', methods=['POST'])
@@ -368,20 +383,25 @@ def v1_upload_raw_html():
         request.files[zip_file].save(path)
 
         with zipfile.ZipFile(path, 'r') as zip_ref:
-            zip_ref.extractall(f"./backend/storage/{collection_id}")
-
-        glob_path = r'./backend/storage/' + collection_id + r'/*.html'
+            #zip_ref.extractall(f"./backend/storage/{collection_id}")
+            for file_name in zip_ref.namelist(): #loop through each file name in zipped file
+                with zip_ref.open(file_name) as file:
+                    content = file.read()
+                    text = scraper.scrape_html(content)
+                file_path = f'./backend/storage/{collection_id}/{file_name}'
+                with open(file_path, 'w', encoding='utf-8') as text_file:
+                    text_file.write(text)
 
-        ##deprecated code
+        glob_path = r'./backend/storage/' + collection_id + r'/*.txt'
         for file_path in glob.glob(glob_path):
             database.create_raw_text_file(collection_id, file_path)
-        ##
 
     return {"status":"success"}, 200
+    
 
-@app.route('/api/v1/upload_url_file', methods=['POST'])
-def v1_upload_url_file():
-    "http://127.0.0.1:5000//api/v1/upload_url_file?collection=0"
+@app.route('/api/v1/upload_raw_text', methods=['POST'])
+def v1_upload_raw_text():
+    "http://127.0.0.1:5000//api/v1/upload_raw_text?collection=0"
     collection_id = request.args.get("collection")
 
     if not collection_id:
@@ -391,10 +411,17 @@ def v1_upload_url_file():
 
     os.makedirs(f"./backend/storage/{collection_id}",exist_ok=True)
 
-    for url_file in request.files.keys():
-        path = f"./backend/storage/{collection_id}/{url_file}"
-        request.files[url_file].save(path)
-        print(f"Saved: {path}")
+    for zip_file in request.files.keys():
+        path = f"./backend/storage/{collection_id}/{zip_file}"
+        request.files[zip_file].save(path)
+
+        with zipfile.ZipFile(path, 'r') as zip_ref:
+            zip_ref.extractall(f"./backend/storage/{collection_id}")
+
+        glob_path = r'./backend/storage/' + collection_id + r'/*.txt'
+
+        for file_path in glob.glob(glob_path):
+            database.create_raw_text_file(collection_id, file_path)
 
 
     return {"status":"success"}, 200
@@ -415,10 +442,11 @@ def __get_items(id):
     elif c_type == "url":
         glob_path = r'./backend/storage/' + id + r'/*.txt'
         for file_path in glob.glob(glob_path):
-            with open(file_path) as file:
-                items = [line.rstrip() for line in file]
+            items.append(file_path)
+            # with open(file_path) as file:
+            #     items = [line.rstrip() for line in file]
     elif c_type == "html":
-        glob_path = r'./backend/storage/' + id + r'/*.html'
+        glob_path = r'./backend/storage/' + id + r'/*.txt'
         for file_path in glob.glob(glob_path):
             items.append(file_path)
 
diff --git a/scraper.py b/scraper.py
index 0cb74c07205fa69bd5e081f57acc0add9ef5f7c4..aec3fe1b09b02c1253851ef25cbd1b86b815989c 100644
--- a/scraper.py
+++ b/scraper.py
@@ -64,6 +64,28 @@ def scrape_multi_webpage(filename):
                     all_text += result + "\n"
     return json.dumps({"URLs": urls, "Text": all_text})
 
+def scrape_url(link):
+    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
+
+    try:
+        resp = r.get(link, headers=headers)
+        resp.raise_for_status()
+    except requests.exceptions.RequestException as exception:
+        return f"Error identified: {exception}"
+
+    soupObject = BeautifulSoup(resp.content, "html.parser")
+    text = ""
+    for paragraph in soupObject.find_all("p"):
+        text += paragraph.text + "\n"
+    return text
+
+def scrape_html(htmlText):
+    soupObject = BeautifulSoup(htmlText, 'html.parser')
+    text = ""
+    for paragraph in soupObject.find_all("p"):
+        text += paragraph.text + "\n"
+    return text
+
 
 
 # webpage = "https://www.cbsnews.com/news/enrique-marquez-san-bernardino-shooter-friend-pleads-guilty-to-supplying-weapons/"