EXTRACTOR-LINKS.pyw

Najeebsk

May 2nd, 2025

333

Never

Add comment

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

Python 7.88 KB | None | 0 0

raw download clone embed print report

import tkinter as tk
from tkinter import messagebox, filedialog
from tkinter import scrolledtext
from tkinter import INSERT
from PIL import Image, ImageTk
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import re
import os
import io # Required for image preview
# Define image extensions for preview
image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
def extract_links(url, exclude_words=None):
try:
response = requests.get(url)
response.raise_for_status()
url = response.url # Update URL to final location after redirect
soup = BeautifulSoup(response.text, 'html.parser')
links = []
for link in soup.find_all('a', href=True):
full_link = urljoin(url, link['href'])
if exclude_words and any(excluded_word in full_link for excluded_word in exclude_words):
continue
links.append(full_link)
return links
except requests.exceptions.RequestException as e:
print(f"Error fetching the URL {url}: {e}")
return []
def extract_username(url):
parsed_url = urlparse(url)
path_parts = parsed_url.path.split('/')
username_patterns = [
r'/user/(\w+)',
r'/profile/(\w+)',
r'/(\w+)$'
]
for pattern in username_patterns:
match = re.search(pattern, parsed_url.path)
if match:
return match.group(1)
for part in reversed(path_parts):
if part:
return part
return None
def clean_and_log_links(links, filename, mode='a'):
usernames = [extract_username(link) for link in links]
with open(filename, mode) as f:
for link, username in zip(links, usernames):
cleaned_link = re.sub(r',user$', '', link)
f.write(f"{cleaned_link}\n")
def overwrite_links_file(filename):
if os.path.exists(filename):
os.remove(filename)
print(f"Cleared existing content in {filename}")
def extract_and_log_links():
urls = text_input.get("1.0", "end-1c").split()
exclude_words_input = exclude_input.get("1.0", "end-1c").splitlines()
if not urls:
messagebox.showwarning("Input Error", "Please enter at least one URL.")
return
exclude_words = [word.strip() for word in exclude_words_input if word.strip()]
overwrite_links_file('links.txt')
all_links = []
for i, url in enumerate(urls):
base_url = url.split('?')[0]
print(f"Extracting links from: {base_url}")
links = extract_links(base_url, exclude_words)
all_links.extend(links)
if links:
mode = 'w' if i == 0 else 'a'
clean_and_log_links(links, 'links.txt', mode)
else:
messagebox.showerror("Error", f"No links found or an error occurred for {url}")
messagebox.showinfo("Done", f"Total links extracted and cleaned: {len(all_links)}")
display_log()
def display_log():
with open('links.txt', 'r') as file:
log_content = file.read()
log_text.delete("1.0", tk.END)
log_text.insert(tk.INSERT, log_content)
def save_as_settings():
urls = text_input.get("1.0", "end-1c").strip()
exclude_words = exclude_input.get("1.0", "end-1c").strip()
file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
if not file_path:
return
with open(file_path, "w") as file:
file.write(f"URLs:\n{urls}\n")
if exclude_words:
file.write(f"Exclude Words:\n{exclude_words}\n")
messagebox.showinfo("Settings Saved", f"Your settings have been saved to {file_path}.")
def load_settings():
file_path = filedialog.askopenfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
if not file_path or not os.path.exists(file_path):
messagebox.showwarning("Load Error", "The selected file does not exist.")
return
with open(file_path, "r") as file:
content = file.read().splitlines()
if len(content) >= 2:
urls = "\n".join(content[1:content.index("Exclude Words:")]).strip() if "Exclude Words:" in content else "\n".join(content[1:]).strip()
exclude_words = "\n".join(content[content.index("Exclude Words:") + 1:]).strip() if "Exclude Words:" in content else ""
text_input.delete("1.0", tk.END)
exclude_input.delete("1.0", tk.END)
text_input.insert(tk.END, urls)
exclude_input.insert(tk.END, exclude_words)
messagebox.showinfo("Settings Loaded", "Your settings have been loaded successfully.")
def preview_image_popup():
try:
line_index = log_text.index(tk.INSERT).split(".")[0]
selected_url = log_text.get(f"{line_index}.0", f"{line_index}.end").strip()
if not any(selected_url.lower().endswith(ext) for ext in image_exts):
raise Exception("Selected link is not an image.")
response = requests.get(selected_url, timeout=10)
image = Image.open(io.BytesIO(response.content))
popup = tk.Toplevel(root)
popup.title("Image Preview")
popup.geometry("600x600")
img_resized = image.resize((500, 500), Image.LANCZOS)
img_tk = ImageTk.PhotoImage(img_resized)
label = tk.Label(popup, image=img_tk)
label.image = img_tk # Keep a reference to prevent garbage collection
label.pack()
except Exception as e:
messagebox.showerror("Preview Error", str(e))
root = tk.Tk()
root.title("Najeeb Shah Khan Link Extractor")
root.configure(bg="#2c3e50")
frame = tk.Frame(root, padx=10, pady=10)
frame.pack(padx=10, pady=10)
lbl = tk.Label(frame, text="Enter URLs (one per line):")
lbl.pack(anchor="w")
text_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
text_input.pack(pady=5)
exclude_lbl = tk.Label(frame, text="Enter words to exclude (one per line) [Optional]:")
exclude_lbl.pack(anchor="w")
exclude_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=4)
exclude_input.pack(pady=5)
# Context menus
context_menu = tk.Menu(root, tearoff=0)
context_menu.add_command(label="Copy", command=lambda: text_input.event_generate("<<Copy>>"))
context_menu.add_command(label="Paste", command=lambda: text_input.event_generate("<<Paste>>"))
context_menu_exclude = tk.Menu(root, tearoff=0)
context_menu_exclude.add_command(label="Copy", command=lambda: exclude_input.event_generate("<<Copy>>"))
context_menu_exclude.add_command(label="Paste", command=lambda: exclude_input.event_generate("<<Paste>>"))
# Bindings for context menus
text_input.bind("<Button-3>", lambda e: context_menu.tk_popup(e.x_root, e.y_root))
exclude_input.bind("<Button-3>", lambda e: context_menu_exclude.tk_popup(e.x_root, e.y_root))
# Buttons
button_frame = tk.Frame(root, bg="#2c3e50")
button_frame.pack(pady=5)
tk.Button(button_frame, text="Extract Links", command=extract_and_log_links, bg="#3498db", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
tk.Button(button_frame, text="Save Settings As", command=save_as_settings, bg="#27ae60", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
tk.Button(button_frame, text="Load Settings", command=load_settings, bg="#e74c3c", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
tk.Button(button_frame, text="Preview Image", command=preview_image_popup, bg="#f0c1c1", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
# Log output
log_label = tk.Label(frame, text="Log Output:")
log_label.pack(anchor="w")
log_text = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
log_text.pack(pady=5)
root.mainloop()

Add Comment

Please, Sign In to add comment