Advertisement
vindree

DriveThruRPG library.dat title extractor

Dec 5th, 2024
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.51 KB | None | 0 0
  1. import os
  2. import time
  3. import logging
  4. import re
  5. import sys
  6.  
  7. # Configure logging
  8. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  9.  
  10. def count_title_opener(file_path):
  11.     """Counts occurrences of the 'title' opener in the binary file."""
  12.     title_opener = b"title"
  13.     try:
  14.         with open(file_path, 'rb') as file:
  15.             content = file.read()
  16.         return content.count(title_opener)
  17.     except FileNotFoundError:
  18.         logging.error(f"The file at {file_path} was not found.")
  19.         raise
  20.     except Exception as e:
  21.         logging.error(f"An unexpected error occurred: {e}")
  22.         raise
  23.  
  24. def extract_titles(file_path, output_file=None):
  25.     """Extracts titles from the binary file and writes them to an output file."""
  26.     title_opener = b"title"
  27.     end_pattern = b'\x00\x00\x00'
  28.     errors = []
  29.  
  30.     try:
  31.         with open(file_path, 'rb') as file:
  32.             content = file.read()
  33.  
  34.         title_count = content.count(title_opener)
  35.         if title_count == 0:
  36.             logging.warning("No 'title' openers found. Check the file format.")
  37.             return
  38.  
  39.         logging.info(f"Found {title_count} occurrences of 'title' in the file.")
  40.  
  41.         start_index = 0
  42.         titles = []
  43.  
  44.         while start_index < len(content):
  45.             start_index = content.find(title_opener, start_index)
  46.             if start_index == -1:
  47.                 break
  48.             start_index += len(title_opener)
  49.  
  50.             if start_index + 3 > len(content):
  51.                 errors.append(f"Not enough data after index {start_index}. Skipping this occurrence. Bytes: {content[start_index:start_index+30].hex()}")
  52.                 start_index += 1
  53.                 continue
  54.  
  55.             start_index += 3
  56.             end_index = content.find(end_pattern, start_index)
  57.  
  58.             if end_index == -1:
  59.                 errors.append(f"End pattern not found after index {start_index}. Skipping this occurrence. Bytes: {content[start_index:start_index+30].hex()}")
  60.                 start_index += 1
  61.                 continue
  62.  
  63.             title_bytes = content[start_index:end_index].strip()
  64.             try:
  65.                 title = title_bytes.decode('utf-8', errors='replace')
  66.                 title = re.sub(r'[^\x20-\x7E]', '', title)
  67.                 titles.append(title)
  68.             except UnicodeDecodeError:
  69.                 errors.append(f"Error decoding bytes at index {start_index} to {end_index}. Bytes: {content[start_index:end_index].hex()}")
  70.  
  71.             start_index = end_index + len(end_pattern)
  72.  
  73.         if not titles:
  74.             logging.warning("No valid titles found after scraping.")
  75.  
  76.         timestamp = time.strftime("%Y%m%d_%H%M%S")
  77.         if output_file is None:
  78.             output_file = os.path.splitext(file_path)[0] + f"_titles_{timestamp}.txt"
  79.  
  80.         with open(output_file, 'w', encoding='utf-8') as output:
  81.             output.write(f"Script run at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
  82.             if errors:
  83.                 output.write("Errors:\n")
  84.                 for error in errors[:5]:
  85.                     output.write(f"- {error}\n")
  86.                 output.write("\n")
  87.             output.write("Titles:\n")
  88.             for title in titles:
  89.                 output.write(title.strip() + '\n')
  90.  
  91.         logging.info(f"Titles successfully written to {output_file}.")
  92.         logging.info(f"Number of 'title' openers found: {title_count}")
  93.         logging.info(f"Number of titles extracted: {len(titles)}")
  94.  
  95.     except FileNotFoundError:
  96.         logging.error(f"The file at {file_path} was not found.")
  97.         raise
  98.     except Exception as e:
  99.         logging.error(f"An unexpected error occurred: {e}")
  100.         raise
  101.  
  102. if __name__ == "__main__":
  103.     if len(sys.argv) != 2:
  104.         logging.error("Usage: python title_extractor.py <path_to_file.dat>")
  105.         input("Press Enter to exit...")
  106.     else:
  107.         input_file = sys.argv[1]
  108.         if not os.path.isfile(input_file):
  109.             logging.error("The specified file does not exist.")
  110.         else:
  111.             try:
  112.                 title_count = count_title_opener(input_file)
  113.                 if title_count == 0:
  114.                     logging.warning("No 'title' openers found. Check if the file format is correct or the data is not as expected.")
  115.                 else:
  116.                     extract_titles(input_file)
  117.             except Exception as e:
  118.                 logging.error(f"An error occurred during processing: {e}")
  119.  
  120.         input("Press Enter to exit...")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement