DriveThruRPG library.dat title extractor

vindree

Dec 5th, 2024

Never

Add comment

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

Python 4.51 KB | None | 0 0

raw download clone embed print report

import os
import time
import logging
import re
import sys
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def count_title_opener(file_path):
"""Counts occurrences of the 'title' opener in the binary file."""
title_opener = b"title"
try:
with open(file_path, 'rb') as file:
content = file.read()
return content.count(title_opener)
except FileNotFoundError:
logging.error(f"The file at {file_path} was not found.")
raise
except Exception as e:
logging.error(f"An unexpected error occurred: {e}")
raise
def extract_titles(file_path, output_file=None):
"""Extracts titles from the binary file and writes them to an output file."""
title_opener = b"title"
end_pattern = b'\x00\x00\x00'
errors = []
try:
with open(file_path, 'rb') as file:
content = file.read()
title_count = content.count(title_opener)
if title_count == 0:
logging.warning("No 'title' openers found. Check the file format.")
return
logging.info(f"Found {title_count} occurrences of 'title' in the file.")
start_index = 0
titles = []
while start_index < len(content):
start_index = content.find(title_opener, start_index)
if start_index == -1:
break
start_index += len(title_opener)
if start_index + 3 > len(content):
errors.append(f"Not enough data after index {start_index}. Skipping this occurrence. Bytes: {content[start_index:start_index+30].hex()}")
start_index += 1
continue
start_index += 3
end_index = content.find(end_pattern, start_index)
if end_index == -1:
errors.append(f"End pattern not found after index {start_index}. Skipping this occurrence. Bytes: {content[start_index:start_index+30].hex()}")
start_index += 1
continue
title_bytes = content[start_index:end_index].strip()
try:
title = title_bytes.decode('utf-8', errors='replace')
title = re.sub(r'[^\x20-\x7E]', '', title)
titles.append(title)
except UnicodeDecodeError:
errors.append(f"Error decoding bytes at index {start_index} to {end_index}. Bytes: {content[start_index:end_index].hex()}")
start_index = end_index + len(end_pattern)
if not titles:
logging.warning("No valid titles found after scraping.")
timestamp = time.strftime("%Y%m%d_%H%M%S")
if output_file is None:
output_file = os.path.splitext(file_path)[0] + f"_titles_{timestamp}.txt"
with open(output_file, 'w', encoding='utf-8') as output:
output.write(f"Script run at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
if errors:
output.write("Errors:\n")
for error in errors[:5]:
output.write(f"- {error}\n")
output.write("\n")
output.write("Titles:\n")
for title in titles:
output.write(title.strip() + '\n')
logging.info(f"Titles successfully written to {output_file}.")
logging.info(f"Number of 'title' openers found: {title_count}")
logging.info(f"Number of titles extracted: {len(titles)}")
except FileNotFoundError:
logging.error(f"The file at {file_path} was not found.")
raise
except Exception as e:
logging.error(f"An unexpected error occurred: {e}")
raise
if __name__ == "__main__":
if len(sys.argv) != 2:
logging.error("Usage: python title_extractor.py <path_to_file.dat>")
input("Press Enter to exit...")
else:
input_file = sys.argv[1]
if not os.path.isfile(input_file):
logging.error("The specified file does not exist.")
else:
try:
title_count = count_title_opener(input_file)
if title_count == 0:
logging.warning("No 'title' openers found. Check if the file format is correct or the data is not as expected.")
else:
extract_titles(input_file)
except Exception as e:
logging.error(f"An error occurred during processing: {e}")
input("Press Enter to exit...")

Tags: Python Script

Add Comment

Please, Sign In to add comment