Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tkinter as tk
- from tkinter import messagebox, filedialog
- from tkinter import scrolledtext
- from tkinter import INSERT
- from PIL import Image, ImageTk
- import requests
- from bs4 import BeautifulSoup
- from urllib.parse import urljoin, urlparse
- import re
- import os
- import io # Required for image preview
- # Define image extensions for preview
- image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
- def extract_links(url, exclude_words=None):
- try:
- response = requests.get(url)
- response.raise_for_status()
- url = response.url # Update URL to final location after redirect
- soup = BeautifulSoup(response.text, 'html.parser')
- links = []
- for link in soup.find_all('a', href=True):
- full_link = urljoin(url, link['href'])
- if exclude_words and any(excluded_word in full_link for excluded_word in exclude_words):
- continue
- links.append(full_link)
- return links
- except requests.exceptions.RequestException as e:
- print(f"Error fetching the URL {url}: {e}")
- return []
- def extract_username(url):
- parsed_url = urlparse(url)
- path_parts = parsed_url.path.split('/')
- username_patterns = [
- r'/user/(\w+)',
- r'/profile/(\w+)',
- r'/(\w+)$'
- ]
- for pattern in username_patterns:
- match = re.search(pattern, parsed_url.path)
- if match:
- return match.group(1)
- for part in reversed(path_parts):
- if part:
- return part
- return None
- def clean_and_log_links(links, filename, mode='a'):
- usernames = [extract_username(link) for link in links]
- with open(filename, mode) as f:
- for link, username in zip(links, usernames):
- cleaned_link = re.sub(r',user$', '', link)
- f.write(f"{cleaned_link}\n")
- def overwrite_links_file(filename):
- if os.path.exists(filename):
- os.remove(filename)
- print(f"Cleared existing content in {filename}")
- def extract_and_log_links():
- urls = text_input.get("1.0", "end-1c").split()
- exclude_words_input = exclude_input.get("1.0", "end-1c").splitlines()
- if not urls:
- messagebox.showwarning("Input Error", "Please enter at least one URL.")
- return
- exclude_words = [word.strip() for word in exclude_words_input if word.strip()]
- overwrite_links_file('links.txt')
- all_links = []
- for i, url in enumerate(urls):
- base_url = url.split('?')[0]
- print(f"Extracting links from: {base_url}")
- links = extract_links(base_url, exclude_words)
- all_links.extend(links)
- if links:
- mode = 'w' if i == 0 else 'a'
- clean_and_log_links(links, 'links.txt', mode)
- else:
- messagebox.showerror("Error", f"No links found or an error occurred for {url}")
- messagebox.showinfo("Done", f"Total links extracted and cleaned: {len(all_links)}")
- display_log()
- def display_log():
- with open('links.txt', 'r') as file:
- log_content = file.read()
- log_text.delete("1.0", tk.END)
- log_text.insert(tk.INSERT, log_content)
- def save_as_settings():
- urls = text_input.get("1.0", "end-1c").strip()
- exclude_words = exclude_input.get("1.0", "end-1c").strip()
- file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
- if not file_path:
- return
- with open(file_path, "w") as file:
- file.write(f"URLs:\n{urls}\n")
- if exclude_words:
- file.write(f"Exclude Words:\n{exclude_words}\n")
- messagebox.showinfo("Settings Saved", f"Your settings have been saved to {file_path}.")
- def load_settings():
- file_path = filedialog.askopenfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
- if not file_path or not os.path.exists(file_path):
- messagebox.showwarning("Load Error", "The selected file does not exist.")
- return
- with open(file_path, "r") as file:
- content = file.read().splitlines()
- if len(content) >= 2:
- urls = "\n".join(content[1:content.index("Exclude Words:")]).strip() if "Exclude Words:" in content else "\n".join(content[1:]).strip()
- exclude_words = "\n".join(content[content.index("Exclude Words:") + 1:]).strip() if "Exclude Words:" in content else ""
- text_input.delete("1.0", tk.END)
- exclude_input.delete("1.0", tk.END)
- text_input.insert(tk.END, urls)
- exclude_input.insert(tk.END, exclude_words)
- messagebox.showinfo("Settings Loaded", "Your settings have been loaded successfully.")
- def preview_image_popup():
- try:
- line_index = log_text.index(tk.INSERT).split(".")[0]
- selected_url = log_text.get(f"{line_index}.0", f"{line_index}.end").strip()
- if not any(selected_url.lower().endswith(ext) for ext in image_exts):
- raise Exception("Selected link is not an image.")
- response = requests.get(selected_url, timeout=10)
- image = Image.open(io.BytesIO(response.content))
- popup = tk.Toplevel(root)
- popup.title("Image Preview")
- popup.geometry("600x600")
- img_resized = image.resize((500, 500), Image.LANCZOS)
- img_tk = ImageTk.PhotoImage(img_resized)
- label = tk.Label(popup, image=img_tk)
- label.image = img_tk # Keep a reference to prevent garbage collection
- label.pack()
- except Exception as e:
- messagebox.showerror("Preview Error", str(e))
- root = tk.Tk()
- root.title("Najeeb Shah Khan Link Extractor")
- root.configure(bg="#2c3e50")
- frame = tk.Frame(root, padx=10, pady=10)
- frame.pack(padx=10, pady=10)
- lbl = tk.Label(frame, text="Enter URLs (one per line):")
- lbl.pack(anchor="w")
- text_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
- text_input.pack(pady=5)
- exclude_lbl = tk.Label(frame, text="Enter words to exclude (one per line) [Optional]:")
- exclude_lbl.pack(anchor="w")
- exclude_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=4)
- exclude_input.pack(pady=5)
- # Context menus
- context_menu = tk.Menu(root, tearoff=0)
- context_menu.add_command(label="Copy", command=lambda: text_input.event_generate("<<Copy>>"))
- context_menu.add_command(label="Paste", command=lambda: text_input.event_generate("<<Paste>>"))
- context_menu_exclude = tk.Menu(root, tearoff=0)
- context_menu_exclude.add_command(label="Copy", command=lambda: exclude_input.event_generate("<<Copy>>"))
- context_menu_exclude.add_command(label="Paste", command=lambda: exclude_input.event_generate("<<Paste>>"))
- # Bindings for context menus
- text_input.bind("<Button-3>", lambda e: context_menu.tk_popup(e.x_root, e.y_root))
- exclude_input.bind("<Button-3>", lambda e: context_menu_exclude.tk_popup(e.x_root, e.y_root))
- # Buttons
- button_frame = tk.Frame(root, bg="#2c3e50")
- button_frame.pack(pady=5)
- tk.Button(button_frame, text="Extract Links", command=extract_and_log_links, bg="#3498db", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
- tk.Button(button_frame, text="Save Settings As", command=save_as_settings, bg="#27ae60", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
- tk.Button(button_frame, text="Load Settings", command=load_settings, bg="#e74c3c", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
- tk.Button(button_frame, text="Preview Image", command=preview_image_popup, bg="#f0c1c1", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
- # Log output
- log_label = tk.Label(frame, text="Log Output:")
- log_label.pack(anchor="w")
- log_text = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
- log_text.pack(pady=5)
- root.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement