Skip to content
Snippets Groups Projects
Commit 82f5fcf1 authored by Farhan Mohammed's avatar Farhan Mohammed
Browse files

Update scraper.py

parent a7d79b10
No related branches found
No related tags found
No related merge requests found
import requests as r
from bs4 import BeautifulSoup
import textwrap
# Returns text of web page
def scrape_webpage(link):
resp = r.get(link)
soupObject = BeautifulSoup(resp.content, "html.parser")
text = soupObject.text
return text
t = scrape_webpage("https://news.yahoo.com/monterey-park-mass-shooting-everything-we-know-about-the-lunar-new-year-massacre-165348528.html?guccounter=1")
print(t)
text = ""
for paragraph in soupObject.find_all("p"):
text += paragraph.text + "\n"
return text
webpage = "https://www.cbsnews.com/news/enrique-marquez-san-bernardino-shooter-friend-pleads-guilty-to-supplying-weapons/"
print(scrape_webpage(webpage))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment