From 320edd22c468ce36f093cbb86cb8fcffdf2d54e0 Mon Sep 17 00:00:00 2001 From: starek21 <starek21@vt.edu> Date: Wed, 1 May 2024 15:18:56 -0400 Subject: [PATCH] added scraper unit test --- test-scraper.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 test-scraper.py diff --git a/test-scraper.py b/test-scraper.py new file mode 100644 index 0000000..2686ef0 --- /dev/null +++ b/test-scraper.py @@ -0,0 +1,27 @@ +import unittest +from scraper import scrape_webpage + +class TestWebScraping(unittest.TestCase): + + def test_scrape_valid_page(self): + """ + Test scraping a valid webpage to ensure it retrieves text. + """ + # This link should be replaced with a known, stable webpage for testing + test_link = "http://example.com" + result = scrape_webpage(test_link) + self.assertIn("link", result) + self.assertIn("text", result) + self.assertNotEqual(result["text"], "", "The scraped text should not be empty.") + + def test_scrape_invalid_page(self): + """ + Test scraping an invalid webpage to check error handling. + """ + # This should be a link that will likely return a 404 or other client/server error + test_link = "http://example.com/nonexistentpage" + result = scrape_webpage(test_link) + self.assertTrue("Error identified" in result) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file -- GitLab