Advertisement
Jackspade9624

crawler1.py

May 30th, 2025 (edited)
16
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.17 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import requests
  4. from bs4 import BeautifulSoup
  5.  
  6. def crawl_page(url):
  7. """Fetches and parses a webpage, extracting links."""
  8. try:
  9. response = requests.get(url)
  10. response.raise_for_status() # Raise an exception for bad status codes
  11. soup = BeautifulSoup(response.text, 'html.parser')
  12. links = soup.find_all('a', href=True)
  13. return links
  14. except requests.exceptions.RequestException as e:
  15. print(f"Error fetching {url}: {e}")
  16. return []
  17.  
  18. def main():
  19. """Prompts the user for a URL and crawls the page."""
  20. while True:
  21. url = input("Enter the URL to crawl (or type 'exit' to quit): ")
  22. if url.lower() == 'exit':
  23. break
  24. if not url.startswith("http://") and not url.startswith("https://"):
  25. print("Please enter a valid URL (e.g., http://example.com)")
  26. continue
  27.  
  28. print(f"Crawling: {url}")
  29. links = crawl_page(url)
  30.  
  31. if links:
  32. for link in links:
  33. print(link['href'])
  34. else:
  35. print("No links found or error occurred.")
  36.  
  37. if __name__ == "__main__":
  38. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement