Skip to content
Snippets Groups Projects
Commit b791620d authored by Farhan Mohammed's avatar Farhan Mohammed
Browse files

Added API endpoint

parent 68b6940a
No related branches found
No related tags found
No related merge requests found
import requests as r
import requests.exceptions
from flask import Flask, request, jsonify
from requests.exceptions import RequestException
from bs4 import BeautifulSoup
import textwrap
import json
app = Flask(__name__)
@app.route('/scrape', methods=['GET'])
def scrape_webpage_api():
link = request.args.get('link') # Retrieve the 'link' parameter from the URL query string
if not link:
return jsonify({"error": "No link provided"}), 400
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
try:
resp = r.get(link, headers=headers)
resp.raise_for_status()
except RequestException as exception:
return jsonify({"error": str(exception)}), 500 # Return the error as JSON
soup = BeautifulSoup(resp.content, "html.parser")
text = ""
for paragraph in soup.find_all("p"):
text += paragraph.text + "\n"
return jsonify({"link": link, "text": text}) # Return the response as JSON
# Returns text of web page
def scrape_webpage(link):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment