Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # date: 2025.07.02
- # [web scraping - How to use Python to download a pdf file from a link (not button!) - Stack Overflow](https://stackoverflow.com/questions/79686767/how-to-use-python-to-download-a-pdf-file-from-a-link-not-button?noredirect=1#comment140558944_79686767)
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- import time
- # ---
- import selenium
- print('Selenium:', selenium.__version__) # Selenium: 4.19.0 # or newer
- # ---
- options = webdriver.ChromeOptions()
- options.add_experimental_option('prefs', {
- #"download.default_directory": "C:/Users/517/Download", # Change default directory for downloads
- #"download.prompt_for_download": False, # To auto download the file
- #"download.directory_upgrade": True,
- "plugins.always_open_pdf_externally": True # It will not show PDF directly in Chrome
- })
- # newer Selenium can automatically download driver - so it doesn't need `service=`
- driver = webdriver.Chrome(options=options)
- page_url = "https://www.waters.com/nextgen/en/library/application-notes/2004/bioanalytical-strategy-for-in-vitro-metabolite-screening-with-exact-mass-using-the-q-tof-micro.html#:~:text=In%20this%20paper%2C%20we%20present%20an%20automated%20bioanalytical,in%20microsomes%20at%205%20%CE%BCM%20will%20be%20shown"
- driver.get(page_url)
- time.sleep(3)
- try:
- #input("Press ENTER to continue")
- # close Cookie message
- driver.find_element(By.ID, 'onetrust-reject-all-handler').click()
- time.sleep(1)
- # find PDF for download
- pdf_link_element = driver.find_element(By.XPATH, "//a[@title='Download PDF']")
- print(pdf_link_element)
- #input("Press ENTER to continue")
- # download it
- pdf_link_element.click()
- #input("Press ENTER to continue")
- time.sleep(1) # it needs time to download (but I don't know how to check if it finished)
- except Exception as e:
- print("Error:", e)
- finally:
- driver.quit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement