Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import requests from bs4 import BeautifulSoup
- import sys
- def crawl(url):
- try:
- response = requests.get(url)
- response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
- except requests.exceptions.RequestException as e:
- print(f"Error fetching URL: {e}")
- return
- soup = BeautifulSoup(response.content, 'html.parser')
- links = []
- for a_tag in soup.find_all('a', href=True):
- link = a_tag['href']
- if link.startswith('http'):
- links.append(link)
- else:
- links.append(url + link)
- if links:
- print("Links found:")
- for link in links:
- print(link)
- else:
- print("No links found on this page.")
- if __name__ == "__main__":
- if len(sys.argv) != 2:
- print("Usage: python krawler.py <url>")
- sys.exit(1)
- url = sys.argv[1]
- crawl(url)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement