Advertisement
Jackspade9624

crawler2.py

May 30th, 2025 (edited)
13
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.01 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import requests from bs4 import BeautifulSoup
  4. import sys
  5.  
  6. def crawl(url):
  7. try:
  8. response = requests.get(url)
  9. response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
  10. except requests.exceptions.RequestException as e:
  11. print(f"Error fetching URL: {e}")
  12. return
  13.  
  14. soup = BeautifulSoup(response.content, 'html.parser')
  15. links = []
  16. for a_tag in soup.find_all('a', href=True):
  17. link = a_tag['href']
  18. if link.startswith('http'):
  19. links.append(link)
  20. else:
  21. links.append(url + link)
  22.  
  23.  
  24. if links:
  25. print("Links found:")
  26. for link in links:
  27. print(link)
  28. else:
  29. print("No links found on this page.")
  30.  
  31. if __name__ == "__main__":
  32. if len(sys.argv) != 2:
  33. print("Usage: python krawler.py <url>")
  34. sys.exit(1)
  35.  
  36. url = sys.argv[1]
  37. crawl(url)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement