Skip to content
Snippets Groups Projects
test-scraper.py 997 B
Newer Older
  • Learn to ignore specific revisions
  • Tarek Shah's avatar
    Tarek Shah committed
    import unittest
    
    Tarek Shah's avatar
    Tarek Shah committed
    from scraper import scrape_single_webpage
    
    Tarek Shah's avatar
    Tarek Shah committed
    
    class TestWebScraping(unittest.TestCase):
    
        def test_scrape_valid_page(self):
            """
            Test scraping a valid webpage to ensure it retrieves text.
            """
            # This link should be replaced with a known, stable webpage for testing
            test_link = "http://example.com"
    
    Tarek Shah's avatar
    Tarek Shah committed
            result = scrape_single_webpage(test_link)
    
    Tarek Shah's avatar
    Tarek Shah committed
            self.assertIn("link", result)
            self.assertIn("text", result)
            self.assertNotEqual(result["text"], "", "The scraped text should not be empty.")
    
        def test_scrape_invalid_page(self):
            """
            Test scraping an invalid webpage to check error handling.
            """
            # This should be a link that will likely return a 404 or other client/server error
            test_link = "http://example.com/nonexistentpage"
    
    Tarek Shah's avatar
    Tarek Shah committed
            result = scrape_single_webpage(test_link)
    
    Tarek Shah's avatar
    Tarek Shah committed
            self.assertTrue("Error identified" in result)
    
    if __name__ == '__main__':
        unittest.main()