Advertisement
furas

Python - Selenium - download PDF (Stackoverflow)

Jul 2nd, 2025 (edited)
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.94 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. # date: 2025.07.02
  4. # [web scraping - How to use Python to download a pdf file from a link (not button!) - Stack Overflow](https://stackoverflow.com/questions/79686767/how-to-use-python-to-download-a-pdf-file-from-a-link-not-button?noredirect=1#comment140558944_79686767)
  5.  
  6. from selenium import webdriver
  7. from selenium.webdriver.common.by import By
  8.  
  9. import time
  10.  
  11. # ---
  12.  
  13. import selenium
  14. print('Selenium:', selenium.__version__)  # Selenium: 4.19.0  # or newer
  15.  
  16. # ---
  17.  
  18. options = webdriver.ChromeOptions()
  19. options.add_experimental_option('prefs', {
  20. #"download.default_directory": "C:/Users/517/Download",  # Change default directory for downloads
  21. #"download.prompt_for_download": False,  # To auto download the file
  22. #"download.directory_upgrade": True,
  23. "plugins.always_open_pdf_externally": True  # It will not show PDF directly in Chrome
  24. })
  25.  
  26. # newer Selenium can automatically download driver - so it doesn't need `service=`
  27. driver = webdriver.Chrome(options=options)
  28.  
  29. page_url = "https://www.waters.com/nextgen/en/library/application-notes/2004/bioanalytical-strategy-for-in-vitro-metabolite-screening-with-exact-mass-using-the-q-tof-micro.html#:~:text=In%20this%20paper%2C%20we%20present%20an%20automated%20bioanalytical,in%20microsomes%20at%205%20%CE%BCM%20will%20be%20shown"
  30.  
  31. driver.get(page_url)
  32. time.sleep(3)
  33.  
  34. try:
  35.     #input("Press ENTER to continue")
  36.  
  37.     # close Cookie message
  38.     driver.find_element(By.ID, 'onetrust-reject-all-handler').click()
  39.     time.sleep(1)
  40.  
  41.     # find PDF for download
  42.     pdf_link_element = driver.find_element(By.XPATH, "//a[@title='Download PDF']")
  43.     print(pdf_link_element)
  44.     #input("Press ENTER to continue")
  45.  
  46.     # download it
  47.     pdf_link_element.click()
  48.     #input("Press ENTER to continue")
  49.     time.sleep(1)  # it needs time to download (but I don't know how to check if it finished)
  50.  
  51. except Exception as e:
  52.     print("Error:", e)
  53. finally:
  54.     driver.quit()
  55.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement