Advertisement
Najeebsk

EXTRACTOR-LINKS.pyw

May 2nd, 2025
333
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.88 KB | None | 0 0
  1. import tkinter as tk
  2. from tkinter import messagebox, filedialog
  3. from tkinter import scrolledtext
  4. from tkinter import INSERT
  5. from PIL import Image, ImageTk
  6. import requests
  7. from bs4 import BeautifulSoup
  8. from urllib.parse import urljoin, urlparse
  9. import re
  10. import os
  11. import io  # Required for image preview
  12.  
  13. # Define image extensions for preview
  14. image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
  15.  
  16. def extract_links(url, exclude_words=None):
  17.     try:
  18.         response = requests.get(url)
  19.         response.raise_for_status()
  20.         url = response.url  # Update URL to final location after redirect
  21.         soup = BeautifulSoup(response.text, 'html.parser')
  22.        
  23.         links = []
  24.         for link in soup.find_all('a', href=True):
  25.             full_link = urljoin(url, link['href'])
  26.             if exclude_words and any(excluded_word in full_link for excluded_word in exclude_words):
  27.                 continue
  28.             links.append(full_link)
  29.        
  30.         return links
  31.     except requests.exceptions.RequestException as e:
  32.         print(f"Error fetching the URL {url}: {e}")
  33.         return []
  34.  
  35. def extract_username(url):
  36.     parsed_url = urlparse(url)
  37.     path_parts = parsed_url.path.split('/')
  38.    
  39.     username_patterns = [
  40.         r'/user/(\w+)',
  41.         r'/profile/(\w+)',
  42.         r'/(\w+)$'
  43.     ]
  44.    
  45.     for pattern in username_patterns:
  46.         match = re.search(pattern, parsed_url.path)
  47.         if match:
  48.             return match.group(1)
  49.    
  50.     for part in reversed(path_parts):
  51.         if part:
  52.             return part
  53.    
  54.     return None
  55.  
  56. def clean_and_log_links(links, filename, mode='a'):
  57.     usernames = [extract_username(link) for link in links]
  58.    
  59.     with open(filename, mode) as f:
  60.         for link, username in zip(links, usernames):
  61.             cleaned_link = re.sub(r',user$', '', link)
  62.             f.write(f"{cleaned_link}\n")
  63.  
  64. def overwrite_links_file(filename):
  65.     if os.path.exists(filename):
  66.         os.remove(filename)
  67.     print(f"Cleared existing content in {filename}")
  68.  
  69. def extract_and_log_links():
  70.     urls = text_input.get("1.0", "end-1c").split()
  71.     exclude_words_input = exclude_input.get("1.0", "end-1c").splitlines()
  72.    
  73.     if not urls:
  74.         messagebox.showwarning("Input Error", "Please enter at least one URL.")
  75.         return
  76.    
  77.     exclude_words = [word.strip() for word in exclude_words_input if word.strip()]
  78.  
  79.     overwrite_links_file('links.txt')
  80.  
  81.     all_links = []
  82.     for i, url in enumerate(urls):
  83.         base_url = url.split('?')[0]
  84.         print(f"Extracting links from: {base_url}")
  85.         links = extract_links(base_url, exclude_words)
  86.         all_links.extend(links)
  87.        
  88.         if links:
  89.             mode = 'w' if i == 0 else 'a'
  90.             clean_and_log_links(links, 'links.txt', mode)
  91.         else:
  92.             messagebox.showerror("Error", f"No links found or an error occurred for {url}")
  93.  
  94.     messagebox.showinfo("Done", f"Total links extracted and cleaned: {len(all_links)}")
  95.     display_log()
  96.  
  97. def display_log():
  98.     with open('links.txt', 'r') as file:
  99.         log_content = file.read()
  100.         log_text.delete("1.0", tk.END)
  101.         log_text.insert(tk.INSERT, log_content)
  102.  
  103. def save_as_settings():
  104.     urls = text_input.get("1.0", "end-1c").strip()
  105.     exclude_words = exclude_input.get("1.0", "end-1c").strip()
  106.  
  107.     file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
  108.    
  109.     if not file_path:
  110.         return
  111.  
  112.     with open(file_path, "w") as file:
  113.         file.write(f"URLs:\n{urls}\n")
  114.         if exclude_words:
  115.             file.write(f"Exclude Words:\n{exclude_words}\n")
  116.    
  117.     messagebox.showinfo("Settings Saved", f"Your settings have been saved to {file_path}.")
  118.  
  119. def load_settings():
  120.     file_path = filedialog.askopenfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
  121.    
  122.     if not file_path or not os.path.exists(file_path):
  123.         messagebox.showwarning("Load Error", "The selected file does not exist.")
  124.         return
  125.  
  126.     with open(file_path, "r") as file:
  127.         content = file.read().splitlines()
  128.  
  129.     if len(content) >= 2:
  130.         urls = "\n".join(content[1:content.index("Exclude Words:")]).strip() if "Exclude Words:" in content else "\n".join(content[1:]).strip()
  131.         exclude_words = "\n".join(content[content.index("Exclude Words:") + 1:]).strip() if "Exclude Words:" in content else ""
  132.  
  133.         text_input.delete("1.0", tk.END)
  134.         exclude_input.delete("1.0", tk.END)
  135.        
  136.         text_input.insert(tk.END, urls)
  137.         exclude_input.insert(tk.END, exclude_words)
  138.  
  139.     messagebox.showinfo("Settings Loaded", "Your settings have been loaded successfully.")
  140.  
  141. def preview_image_popup():
  142.     try:
  143.         line_index = log_text.index(tk.INSERT).split(".")[0]
  144.         selected_url = log_text.get(f"{line_index}.0", f"{line_index}.end").strip()
  145.         if not any(selected_url.lower().endswith(ext) for ext in image_exts):
  146.             raise Exception("Selected link is not an image.")
  147.  
  148.         response = requests.get(selected_url, timeout=10)
  149.         image = Image.open(io.BytesIO(response.content))
  150.  
  151.         popup = tk.Toplevel(root)
  152.         popup.title("Image Preview")
  153.         popup.geometry("600x600")
  154.  
  155.         img_resized = image.resize((500, 500), Image.LANCZOS)
  156.         img_tk = ImageTk.PhotoImage(img_resized)
  157.  
  158.         label = tk.Label(popup, image=img_tk)
  159.         label.image = img_tk  # Keep a reference to prevent garbage collection
  160.         label.pack()
  161.  
  162.     except Exception as e:
  163.         messagebox.showerror("Preview Error", str(e))    
  164.  
  165. root = tk.Tk()
  166. root.title("Najeeb Shah Khan Link Extractor")
  167. root.configure(bg="#2c3e50")
  168.  
  169. frame = tk.Frame(root, padx=10, pady=10)
  170. frame.pack(padx=10, pady=10)
  171.  
  172. lbl = tk.Label(frame, text="Enter URLs (one per line):")
  173. lbl.pack(anchor="w")
  174.  
  175. text_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
  176. text_input.pack(pady=5)
  177.  
  178. exclude_lbl = tk.Label(frame, text="Enter words to exclude (one per line) [Optional]:")
  179. exclude_lbl.pack(anchor="w")
  180.  
  181. exclude_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=4)
  182. exclude_input.pack(pady=5)
  183.  
  184. # Context menus
  185. context_menu = tk.Menu(root, tearoff=0)
  186. context_menu.add_command(label="Copy", command=lambda: text_input.event_generate("<<Copy>>"))
  187. context_menu.add_command(label="Paste", command=lambda: text_input.event_generate("<<Paste>>"))
  188.  
  189. context_menu_exclude = tk.Menu(root, tearoff=0)
  190. context_menu_exclude.add_command(label="Copy", command=lambda: exclude_input.event_generate("<<Copy>>"))
  191. context_menu_exclude.add_command(label="Paste", command=lambda: exclude_input.event_generate("<<Paste>>"))
  192.  
  193. # Bindings for context menus
  194. text_input.bind("<Button-3>", lambda e: context_menu.tk_popup(e.x_root, e.y_root))
  195. exclude_input.bind("<Button-3>", lambda e: context_menu_exclude.tk_popup(e.x_root, e.y_root))
  196.  
  197. # Buttons
  198. button_frame = tk.Frame(root, bg="#2c3e50")
  199. button_frame.pack(pady=5)
  200.  
  201. tk.Button(button_frame, text="Extract Links", command=extract_and_log_links, bg="#3498db", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
  202. tk.Button(button_frame, text="Save Settings As", command=save_as_settings, bg="#27ae60", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
  203. tk.Button(button_frame, text="Load Settings", command=load_settings, bg="#e74c3c", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
  204. tk.Button(button_frame, text="Preview Image", command=preview_image_popup, bg="#f0c1c1", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
  205.  
  206. # Log output
  207. log_label = tk.Label(frame, text="Log Output:")
  208. log_label.pack(anchor="w")
  209.  
  210. log_text = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
  211. log_text.pack(pady=5)
  212.  
  213. root.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement