Advertisement
3th1ca14aX0r

Newest PY

Jun 17th, 2025 (edited)
450
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
JSON 22.63 KB | None | 0 0
  1. # import_requests.py
  2. # This script fetches metadata for Rust crates from crates.io, enriches it with AI insights using a local LLM,
  3. # and performs dependency analysis. It also handles retries, caching, and logging.
  4.  
  5. import requests
  6. import json
  7. import logging
  8. import time
  9. import os
  10. import re
  11. import shutil
  12. import tarfile
  13. import tempfile
  14. import subprocess
  15. import sys
  16. from typing import Optional, Dict, List, Any, Union
  17. from concurrent.futures import ThreadPoolExecutor, as_completed
  18. from bs4 import BeautifulSoup
  19. import tiktoken
  20. import requests_cache
  21. from datetime import datetime
  22. from llama_cpp import Llama
  23.  
  24. # Constants
  25. MODEL_PATH = os.path.expanduser("~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf")
  26. LLAMA_BIN = os.path.expanduser("~/llama.cpp/build/bin/llama-cli")
  27. MAX_TOKENS = 256
  28. MODEL_TOKEN_LIMIT = 4096
  29. PROMPT_TOKEN_MARGIN = 3000
  30. CHECKPOINT_INTERVAL = 5  # Save intermediary results every N crates
  31. MAX_RETRIES = 3
  32.  
  33. # Setup logging
  34. logging.basicConfig(
  35.     level=logging.INFO,
  36.     format="%(asctime)s [%(levelname)s] %(message)s",
  37.     handlers=[
  38.         logging.StreamHandler(),
  39.         logging.FileHandler(f"crate_enrichment_{time.strftime('%Y%m%d-%H%M%S')}.log")
  40.     ]
  41. )
  42.  
  43. # GitHub Token
  44. GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "")
  45.  
  46. # Caching
  47. session = requests_cache.CachedSession('crate_cache', expire_after=3600)
  48.  
  49. # Retry decorator
  50. def retry_with_backoff(max_retries=3, backoff_factor=1.0):
  51.     """Retry decorator with exponential backoff"""
  52.     def decorator(func):
  53.         def wrapper(*args, **kwargs):
  54.             retries, wait_time = 0, backoff_factor
  55.             while retries < max_retries:
  56.                 try:
  57.                     return func(*args, **kwargs)
  58.                 except Exception as e:
  59.                     retries += 1
  60.                     if retries == max_retries:
  61.                         logging.error(f"All {max_retries} retries failed: {str(e)}")
  62.                         return kwargs.get('default_return', None)
  63.                     logging.warning(f"Attempt {retries} failed, retrying in {wait_time:.1f}s: {str(e)}")
  64.                     time.sleep(wait_time)
  65.                     wait_time *= 2
  66.             return None
  67.         return wrapper
  68.     return decorator
  69.  
  70. def estimate_tokens(prompt: str) -> int:
  71.     encoding = tiktoken.get_encoding("cl100k_base")
  72.     return len(encoding.encode(prompt))
  73.  
  74. def truncate_content(content: str, max_tokens: int = 1000) -> str:
  75.     paragraphs = content.split("\n\n")
  76.     result, current_tokens = "", 0
  77.     encoding = tiktoken.get_encoding("cl100k_base")
  78.     for para in paragraphs:
  79.         tokens = len(encoding.encode(para))
  80.         if current_tokens + tokens <= max_tokens:
  81.             result += para + "\n\n"
  82.             current_tokens += tokens
  83.         else:
  84.             break
  85.     return result.strip()
  86.  
  87. def clean_output(output: str, task: str = "general") -> str:
  88.     """Task-specific output cleaning"""
  89.     if not output:
  90.         return ""
  91.    
  92.     # Remove any remaining prompt artifacts
  93.     output = output.split("<|end|>")[0].strip()
  94.    
  95.     if task == "classification":
  96.         # For classification tasks, extract just the category
  97.         categories = ["AI", "Database", "Web Framework", "Networking", "Serialization",
  98.                      "Utilities", "DevTools", "ML", "Cryptography", "Unknown"]
  99.         for category in categories:
  100.             if re.search(r'\b' + re.escape(category) + r'\b', output, re.IGNORECASE):
  101.                 return category
  102.         return "Unknown"
  103.    
  104.     elif task == "factual_pairs":
  105.         # For factual pairs, ensure proper formatting
  106.         pairs = []
  107.         facts = re.findall(r'✅\s*Factual:?\s*(.*?)(?=❌|\Z)', output, re.DOTALL)
  108.         counterfacts = re.findall(r'❌\s*Counterfactual:?\s*(.*?)(?=✅|\Z)', output, re.DOTALL)
  109.        
  110.         # Pair them up
  111.         for i in range(min(len(facts), len(counterfacts))):
  112.             pairs.append(f"✅ Factual: {facts[i].strip()}\n❌ Counterfactual: {counterfacts[i].strip()}")
  113.        
  114.         return "\n\n".join(pairs)
  115.    
  116.     else:
  117.         # General cleaning - more permissive than before
  118.         lines = [line.strip() for line in output.splitlines() if line.strip()]
  119.         return "\n".join(lines)
  120.  
  121. # Load model ONCE at script startup
  122. model = Llama(
  123.     model_path=MODEL_PATH,
  124.     n_ctx=2048,
  125.     n_gpu_layers=32,  # Use as many GPU layers as possible
  126.     verbose=False
  127. )
  128.  
  129. # Then use model for inference
  130. def run_llama(prompt: str, temp: float = 0.2, max_tokens: int = 256) -> Optional[str]:
  131.     """Run the LLM with customizable parameters per task"""
  132.     try:
  133.         token_count = estimate_tokens(prompt)
  134.         if token_count > PROMPT_TOKEN_MARGIN:
  135.             logging.warning(f"Prompt too long ({token_count} tokens). Truncating.")
  136.             prompt = truncate_content(prompt, PROMPT_TOKEN_MARGIN - 100)
  137.        
  138.         output = model(
  139.             prompt,
  140.             max_tokens=max_tokens,
  141.             temperature=temp,
  142.             stop=["<|end|>", "<|user|>", "<|system|>"]  # Stop at these tokens
  143.         )
  144.        
  145.         raw_text = output["choices"][0]["text"]
  146.         return clean_output(raw_text)
  147.     except Exception as e:
  148.         logging.error(f"Model inference failed: {str(e)}")
  149.         raise
  150.  
  151. def summarize_feature(crate_name: str, features: list) -> Optional[str]:
  152.     """Generate summaries for crate features with better prompting"""
  153.     try:
  154.         if not features:
  155.             return "No features documented for this crate."
  156.        
  157.         # Format features with their dependencies
  158.         feature_text = ""
  159.         for f in features[:8]:  # Limit to 8 features for context size
  160.             feature_name = f.get("name", "")
  161.             deps = f.get("dependencies", [])
  162.             deps_str = ", ".join(deps) if deps else "none"
  163.             feature_text += f"- {feature_name} (dependencies: {deps_str})\n"
  164.        
  165.         prompt = (
  166.             f"<|system|>You are a Rust programming expert analyzing crate features.\n"
  167.             f"<|user|>For the Rust crate `{crate_name}`, explain these features and what functionality they provide:\n\n"
  168.             f"{feature_text}\n\n"
  169.             f"Provide a concise explanation of each feature's purpose and when a developer would enable it.\n"
  170.             f"<|end|>"
  171.         )
  172.        
  173.         # Use moderate temperature for informative but natural explanation
  174.         return run_llama(prompt, temp=0.2, max_tokens=350)
  175.     except Exception as e:
  176.         logging.warning(f"Feature summarization failed for {crate_name}: {str(e)}")
  177.         return "Feature summary not available."
  178.  
  179. def classify_use_case(crate_name: str, desc: str, keywords: list, readme_summary: str = "", key_deps: list = None) -> Optional[str]:
  180.     """Classify the use case of a crate with rich context"""
  181.     key_deps = key_deps or []
  182.     try:
  183.         joined = ", ".join(keywords[:10]) if keywords else "None"
  184.         key_deps_str = ", ".join(key_deps[:5]) if key_deps else "None"
  185.         desc = truncate_content(desc, 300)
  186.         readme_summary = truncate_content(readme_summary, 300)
  187.        
  188.         # Few-shot prompting with examples
  189.         prompt = (
  190.             f"<|system|>You are a Rust expert classifying crates into the most appropriate category.\n"
  191.             f"<|user|>\n"
  192.             f"# Example 1\n"
  193.             f"Crate: `tokio`\n"
  194.             f"Description: An asynchronous runtime for the Rust programming language\n"
  195.             f"Keywords: async, runtime, futures\n"
  196.             f"Key Dependencies: mio, bytes, parking_lot\n"
  197.             f"Category: Networking\n\n"
  198.            
  199.             f"# Example 2\n"
  200.             f"Crate: `serde`\n"
  201.             f"Description: A generic serialization/deserialization framework\n"
  202.             f"Keywords: serde, serialization\n"
  203.             f"Key Dependencies: serde_derive\n"
  204.             f"Category: Serialization\n\n"
  205.            
  206.             f"# Crate to Classify\n"
  207.             f"Crate: `{crate_name}`\n"
  208.             f"Description: {desc}\n"
  209.             f"Keywords: {joined}\n"
  210.             f"README Summary: {readme_summary}\n"
  211.             f"Key Dependencies: {key_deps_str}\n\n"
  212.             f"Category (pick only one): [AI, Database, Web Framework, Networking, Serialization, Utilities, DevTools, ML, Cryptography, Unknown]\n"
  213.             f"<|end|>"
  214.         )
  215.        
  216.         # Use lower temperature for classification tasks
  217.         result = run_llama(prompt, temp=0.1, max_tokens=20)
  218.        
  219.         # Extract just the category name using regex
  220.         categories = ["AI", "Database", "Web Framework", "Networking", "Serialization",
  221.                      "Utilities", "DevTools", "ML", "Cryptography", "Unknown"]
  222.         for category in categories:
  223.             if re.search(r'\b' + re.escape(category) + r'\b', result, re.IGNORECASE):
  224.                 return category
  225.        
  226.         return "Unknown"  # Default if no category is found
  227.     except Exception as e:
  228.         logging.warning(f"Classification failed for {crate_name}: {str(e)}")
  229.         return "Unknown"
  230.  
  231. def score_crate(data: dict) -> float:
  232.     score = (data.get("downloads", 0) / 1000) + (data.get("github_stars", 0) * 10)
  233.     score += len(truncate_content(data.get("readme", ""), 1000)) / 500
  234.     return round(score, 2)
  235.  
  236. def factual_pairs(crate: dict) -> Optional[str]:
  237.     try:
  238.         desc = truncate_content(crate.get("description", ""), 300)
  239.         readme_summary = truncate_content(crate.get("readme_summary", ""), 300)
  240.        
  241.         prompt = (
  242.             f"<|system|>Create 5 factual/counterfactual pairs for the Rust crate. "
  243.             f"Factual statements must be true. Counterfactuals should be plausible but incorrect - "
  244.             f"make them subtle and convincing rather than simple negations.\n"
  245.             f"<|user|>\n"
  246.             f"Crate: {crate['name']}\n"
  247.             f"Description: {desc}\n"
  248.             f"Repo: {crate.get('repository', '')}\n"
  249.             f"README Summary: {readme_summary}\n"
  250.             f"Key Features: {', '.join([f['name'] for f in crate.get('features', [])][:5])}\n\n"
  251.             f"Format each pair as:\n"
  252.             f"✅ Factual: [true statement about the crate]\n"
  253.             f"❌ Counterfactual: [plausible but false statement]\n\n"
  254.             f"Create 5 pairs.\n"
  255.             f"<|end|>"
  256.         )
  257.        
  258.         # Use higher temperature for creative outputs
  259.         return run_llama(prompt, temp=0.6, max_tokens=400)
  260.     except Exception as e:
  261.         logging.warning(f"Factual pairs generation failed for {crate['name']}: {str(e)}")
  262.         return None
  263.  
  264. def extract_code_snippets(readme: str) -> list:
  265.     """Extract code snippets from markdown README"""
  266.     snippets = []
  267.     if not readme:
  268.         return snippets
  269.        
  270.     # Find Rust code blocks
  271.     pattern = r"```(?:rust|(?:no_run|ignore|compile_fail|mdbook-runnable)?)\s*([\s\S]*?)```"
  272.     matches = re.findall(pattern, readme)
  273.    
  274.     for code in matches:
  275.         if len(code.strip()) > 10:  # Only include non-trivial snippets
  276.             snippets.append(code.strip())
  277.    
  278.     return snippets[:5]  # Limit to 5 snippets
  279.  
  280. def download_crate_source(crate_name: str, version: str, temp_dir: str) -> Optional[str]:
  281.     """Download and extract crate source code"""
  282.     try:
  283.         url = f"https://crates.io/api/v1/crates/{crate_name}/{version}/download"
  284.         download_path = os.path.join(temp_dir, f"{crate_name}-{version}.tar.gz")
  285.        
  286.         with session.get(url, stream=True) as r:
  287.             r.raise_for_status()
  288.             with open(download_path, 'wb') as f:
  289.                 for chunk in r.iter_content(chunk_size=8192):
  290.                     f.write(chunk)
  291.        
  292.         extract_path = os.path.join(temp_dir, f"{crate_name}-{version}")
  293.         os.makedirs(extract_path, exist_ok=True)
  294.        
  295.         with tarfile.open(download_path) as tar:
  296.             tar.extractall(path=extract_path)
  297.        
  298.         return extract_path
  299.     except Exception as e:
  300.         logging.error(f"Failed to download source for {crate_name}: {str(e)}")
  301.         return None
  302.  
  303. def analyze_dependencies(crates_data: list) -> dict:
  304.     """Analyze dependencies between crates"""
  305.     dependency_graph = {}
  306.     crate_names = {crate["name"] for crate in crates_data}
  307.    
  308.     for crate in crates_data:
  309.         deps = []
  310.         for dep in crate.get("dependencies", []):
  311.             if dep.get("crate_id") in crate_names:
  312.                 deps.append(dep.get("crate_id"))
  313.         dependency_graph[crate["name"]] = deps
  314.    
  315.     # Find most depended-upon crates
  316.     reverse_deps = {}
  317.     for crate, deps in dependency_graph.items():
  318.         for dep in deps:
  319.             if dep not in reverse_deps:
  320.                 reverse_deps[dep] = []
  321.             reverse_deps[dep].append(crate)
  322.    
  323.     return {
  324.         "dependency_graph": dependency_graph,
  325.         "reverse_dependencies": reverse_deps,
  326.         "most_depended": sorted(reverse_deps.items(), key=lambda x: len(x[1]), reverse=True)[:10]
  327.     }
  328.  
  329. def save_checkpoint(data: list, filename_prefix: str = "checkpoint") -> str:
  330.     """Save intermediary results with timestamp"""
  331.     timestamp = time.strftime("%Y%m%d-%H%M%S")
  332.     filename = f"{filename_prefix}_{timestamp}.jsonl"
  333.     with open(filename, "w") as out:
  334.         for item in data:
  335.             out.write(json.dumps(item) + "\n")
  336.     logging.info(f"Saved checkpoint to {filename}")
  337.     return filename
  338.  
  339. @retry_with_backoff(max_retries=MAX_RETRIES)
  340. def fetch_crate_metadata(crate: str) -> Optional[dict]:
  341.     try:
  342.         r = session.get(f"https://crates.io/api/v1/crates/{crate}")
  343.         r.raise_for_status()
  344.         data = r.json()
  345.         crate_data = data["crate"]
  346.         latest = crate_data["newest_version"]
  347.        
  348.         # Get readme
  349.         readme_response = session.get(f"https://crates.io/api/v1/crates/{crate}/readme")
  350.         readme = readme_response.text if readme_response.ok else ""
  351.        
  352.         # Get dependencies
  353.         deps_response = session.get(f"https://crates.io/api/v1/crates/{crate}/{latest}/dependencies")
  354.         deps = deps_response.json().get("dependencies", []) if deps_response.ok else []
  355.        
  356.         # Get features - using the versions endpoint
  357.         features = []
  358.         versions_response = session.get(f"https://crates.io/api/v1/crates/{crate}/{latest}")
  359.         if versions_response.ok:
  360.             version_data = versions_response.json().get("version", {})
  361.             features_dict = version_data.get("features", {})
  362.             features = [{"name": k, "dependencies": v} for k, v in features_dict.items()]
  363.        
  364.         # Repository info and GitHub stars
  365.         repo = crate_data.get("repository", "")
  366.         gh_stars = 0
  367.        
  368.         # Check if it's a GitHub repo
  369.        if "github.com" in repo and GITHUB_TOKEN:
  370.            match = re.search(r"github.com/([^/]+)/([^/]+)", repo)
  371.            if match:
  372.                owner, repo_name = match.groups()
  373.                repo_name = repo_name.split('.')[0]  # Handle .git extensions
  374.                gh_url = f"https://api.github.com/repos/{owner}/{repo_name}"
  375.                gh_headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
  376.                gh = session.get(gh_url, headers=gh_headers)
  377.                if gh.ok:
  378.                    gh_data = gh.json()
  379.                    gh_stars = gh_data.get("stargazers_count", 0)
  380.        
  381.        # Check if it's hosted on lib.rs
  382.         lib_rs_data = {}
  383.         if "lib.rs" in repo:
  384.             lib_rs_url = f"https://lib.rs/crates/{crate}"
  385.             lib_rs_response = session.get(lib_rs_url)
  386.             if lib_rs_response.ok:
  387.                 soup = BeautifulSoup(lib_rs_response.text, 'html.parser')
  388.                 # Get README from lib.rs if not already available
  389.                 if not readme:
  390.                     readme_div = soup.find('div', class_='readme')
  391.                     if readme_div:
  392.                         readme = readme_div.get_text(strip=True)
  393.                
  394.                 # Get lib.rs specific stats
  395.                 stats_div = soup.find('div', class_='crate-stats')
  396.                 if stats_div:
  397.                     downloads_text = stats_div.find(string=re.compile(r'[\d,]+ downloads'))
  398.                     if downloads_text:
  399.                         lib_rs_data["librs_downloads"] = int(re.sub(r'[^\d]', '', downloads_text))
  400.        
  401.         # Extract code snippets from readme
  402.         code_snippets = extract_code_snippets(readme)
  403.        
  404.         # Extract sections from readme
  405.         readme_sections = extract_readme_sections(readme) if readme else ""
  406.        
  407.         result = {
  408.             "name": crate,
  409.             "version": latest,
  410.             "description": crate_data.get("description", ""),
  411.             "repository": repo,
  412.             "keywords": crate_data.get("keywords", []),
  413.             "categories": crate_data.get("categories", []),
  414.             "readme": readme,
  415.             "downloads": crate_data.get("downloads", 0),
  416.             "github_stars": gh_stars,
  417.             "dependencies": deps,
  418.             "code_snippets": code_snippets,
  419.             "features": features,  # Now populated with actual features
  420.             "readme_sections": readme_sections,  # New line added
  421.             **lib_rs_data
  422.         }
  423.        
  424.         return result
  425.        
  426.     except Exception as e:
  427.         logging.error(f"Failed fetching metadata for {crate}: {str(e)}")
  428.         raise
  429.  
  430. def enrich_crate(crate: dict) -> dict:
  431.     """Apply AI enrichments to crate data"""
  432.     try:
  433.         # First generate a README summary to use in other prompts
  434.         if crate["readme"]:
  435.             try:
  436.                 readme_content = crate.get("readme_sections", "") or truncate_content(crate.get("readme", ""), 2000)
  437.                 prompt = f"<|system|>Extract key features from README.\n<|user|>Summarize key aspects of this Rust crate from its README:\n{readme_content}\n<|end|>"
  438.                 crate["readme_summary"] = run_llama(prompt, temp=0.3, max_tokens=300)
  439.             except Exception as e:
  440.                 logging.warning(f"README summary failed for {crate['name']}: {str(e)}")
  441.                 crate["readme_summary"] = None
  442.        
  443.         # Extract key dependencies for context
  444.         key_deps = [dep.get("crate_id") for dep in crate.get("dependencies", [])[:5] if dep.get("kind") == "normal"]
  445.        
  446.         # Now use this enriched context for classification
  447.         crate["feature_summary"] = summarize_feature(crate["name"], crate["features"])
  448.         crate["use_case"] = classify_use_case(
  449.             crate["name"],
  450.             crate["description"],
  451.             crate["keywords"],
  452.             crate.get("readme_summary", ""),
  453.             key_deps
  454.         )
  455.         crate["score"] = score_crate(crate)
  456.         crate["factual_counterfactual"] = factual_pairs(crate)
  457.                
  458.         return crate
  459.     except Exception as e:
  460.         logging.error(f"Failed to enrich {crate['name']}: {str(e)}")
  461.         return crate
  462.  
  463. def get_crate_list() -> list:
  464.     return [
  465.         # Original crates
  466.         "serde", "tokio", "reqwest", "rand", "clap", "rayon", "uuid", "actix-web", "sqlx", "candle-core", "onnxruntime",
  467.        
  468.         # ML/AI crates
  469.         "tokenizers", "safetensors", "linfa", "ndarray", "smartcore", "burn", "tract", "tch",
  470.         # Add more crates here...
  471.        
  472.         # Other specialized crates
  473.         "movingai", "ug-metal", "surrealml-core", "tauri",
  474.         # Add more crates here...
  475.     ]
  476.  
  477. def main():
  478.     start_time = time.time()
  479.     crates = get_crate_list()
  480.     logging.info(f"Fetching and enriching {len(crates)} crates...")
  481.  
  482.     # Create timestamped output directory
  483.     timestamp = time.strftime("%Y%m%d-%H%M%S")
  484.     output_dir = f"crate_data_{timestamp}"
  485.     os.makedirs(output_dir, exist_ok=True)
  486.    
  487.     enriched = []
  488.    
  489.     # Step 1: Fetch metadata for all crates
  490.     with ThreadPoolExecutor(max_workers=4) as pool:
  491.         futures = {pool.submit(fetch_crate_metadata, name): name for name in crates}
  492.        
  493.         for i, future in enumerate(as_completed(futures)):
  494.             crate_name = futures[future]
  495.             try:
  496.                 data = future.result()
  497.                 if data:
  498.                     enriched.append(data)
  499.                     logging.info(f"Fetched metadata for {crate_name} ({i+1}/{len(crates)})")
  500.                    
  501.                 # Save checkpoint periodically
  502.                 if (i+1) % CHECKPOINT_INTERVAL == 0 or i+1 == len(crates):
  503.                     save_checkpoint(enriched, f"{output_dir}/metadata_checkpoint")
  504.                    
  505.             except Exception as e:
  506.                 logging.error(f"Failed processing {crate_name}: {str(e)}")
  507.    
  508.     # Step 2: Enrich crates with AI insights
  509.     for i, crate in enumerate(enriched):
  510.         try:
  511.             logging.info(f"Enriching {crate['name']} ({i+1}/{len(enriched)})")
  512.             enriched[i] = enrich_crate(crate)
  513.            
  514.             # Save checkpoint periodically
  515.             if (i+1) % CHECKPOINT_INTERVAL == 0 or i+1 == len(enriched):
  516.                 save_checkpoint(enriched, f"{output_dir}/ai_enriched_checkpoint")
  517.                
  518.         except Exception as e:
  519.             logging.error(f"Failed to enrich {crate['name']}: {str(e)}")
  520.    
  521.     # Step 3: Perform dependency analysis
  522.     logging.info("Analyzing crate dependencies...")
  523.     dependency_analysis = analyze_dependencies(enriched)
  524.    
  525.     # Save final results
  526.     final_output = f"{output_dir}/enriched_crate_metadata_{timestamp}.jsonl"
  527.     with open(final_output, "w") as out:
  528.         for item in enriched:
  529.             out.write(json.dumps(item) + "\n")
  530.            
  531.     # Save dependency analysis
  532.     with open(f"{output_dir}/dependency_analysis_{timestamp}.json", "w") as out:
  533.         json.dump(dependency_analysis, out, indent=2)
  534.  
  535.     # Generate summary report
  536.     summary = {
  537.         "total_crates": len(enriched),
  538.         "total_time": f"{time.time() - start_time:.2f}s",
  539.         "timestamp": datetime.now().isoformat(),
  540.         "most_popular": sorted(enriched, key=lambda x: x.get("score", 0), reverse=True)[:5],
  541.         "most_depended_upon": dependency_analysis["most_depended"][:5]
  542.     }
  543.    
  544.     with open(f"{output_dir}/summary_report_{timestamp}.json", "w") as out:
  545.         json.dump(summary, out, indent=2)
  546.  
  547.     logging.info(f"✅ Done. Enriched {len(enriched)} crates in {time.time() - start_time:.2f}s")
  548.     logging.info(f"Results saved to {output_dir}/")
  549.  
  550. if __name__ == "__main__":
  551.     # Check disk space before starting
  552.     if shutil.disk_usage("/").free < 1_000_000_000:  # 1GB
  553.         logging.warning("Low disk space! This may affect performance.")
  554.    
  555.     try:
  556.         main()
  557.     except Exception as e:
  558.         logging.critical(f"Script failed: {str(e)}")
  559.         sys.exit(1)
  560.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement