Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import requests
- from bs4 import BeautifulSoup
- def crawl_page(url):
- """Fetches and parses a webpage, extracting links."""
- try:
- response = requests.get(url)
- response.raise_for_status() # Raise an exception for bad status codes
- soup = BeautifulSoup(response.text, 'html.parser')
- links = soup.find_all('a', href=True)
- return links
- except requests.exceptions.RequestException as e:
- print(f"Error fetching {url}: {e}")
- return []
- def main():
- """Prompts the user for a URL and crawls the page."""
- while True:
- url = input("Enter the URL to crawl (or type 'exit' to quit): ")
- if url.lower() == 'exit':
- break
- if not url.startswith("http://") and not url.startswith("https://"):
- print("Please enter a valid URL (e.g., http://example.com)")
- continue
- print(f"Crawling: {url}")
- links = crawl_page(url)
- if links:
- for link in links:
- print(link['href'])
- else:
- print("No links found or error occurred.")
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement