Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import zipfile
- from pathlib import Path
- from typing import Iterator # Still need to import Iterator, but not List or Union
- # Define type alias for clarity
- type FilePath = str | Path # Using 'type' keyword for type aliases (PEP 695 in Python 3.12+)
- def extract_wav_files_from_zip(
- zip_path: FilePath,
- extract_to_dir: FilePath
- ) -> list[Path]: # Using built-in list generic
- """
- Extracts all .wav files from a specified ZIP archive to a target directory.
- Args:
- zip_path: The path to the ZIP archive (str or Path object).
- extract_to_dir: The directory where the .wav files should be extracted (str or Path object).
- Returns:
- A list of Path objects for the extracted .wav files.
- Raises:
- FileNotFoundError: If the specified zip_path does not exist.
- zipfile.BadZipFile: If the file is not a valid ZIP archive.
- PermissionError: If there are issues writing to the extract_to_dir.
- """
- zip_archive_path = Path(zip_path)
- target_directory = Path(extract_to_dir)
- # Ensure the ZIP file exists
- if not zip_archive_path.is_file():
- raise FileNotFoundError(f"ZIP archive not found at: {zip_archive_path}")
- # Ensure the target directory exists, create if not
- target_directory.mkdir(parents=True, exist_ok=True)
- extracted_files: list[Path] = [] # Using built-in list generic
- try:
- with zipfile.ZipFile(zip_archive_path, 'r') as zf:
- for member in zf.namelist():
- member_path = Path(member)
- if member_path.suffix.lower() == '.wav':
- extracted_file_full_path = target_directory.joinpath(member_path.name).resolve()
- if not str(extracted_file_full_path).startswith(str(target_directory.resolve())):
- print(f"Skipping potentially malicious path outside target directory: {member}")
- continue
- zf.extract(member, target_directory)
- extracted_files.append(extracted_file_full_path)
- print(f"Extracted: {member} to {extracted_file_full_path}")
- except zipfile.BadZipFile as e:
- raise zipfile.BadZipFile(f"Error: Not a valid ZIP file. {e}")
- except Exception as e:
- raise Exception(f"An error occurred during extraction: {e}")
- return extracted_files
- def create_dummy_zip_with_wav(
- zip_name: FilePath = "test_audio_archive.zip",
- num_wav_files: int = 2,
- num_other_files: int = 1
- ) -> Path:
- """
- Creates a dummy ZIP archive containing .wav files and other file types for testing.
- """
- zip_path = Path(zip_name)
- temp_dir = Path("temp_dummy_files")
- temp_dir.mkdir(exist_ok=True)
- with zipfile.ZipFile(zip_path, 'w') as zf:
- for i in range(num_wav_files):
- wav_file = temp_dir / f"audio_clip_{i + 1}.wav"
- wav_file.write_bytes(b'\x00\x00\x00\x00')
- zf.write(wav_file, wav_file.name)
- wav_file.unlink()
- for i in range(num_other_files):
- txt_file = temp_dir / f"document_{i + 1}.txt"
- txt_file.write_text(f"This is a dummy text file {i + 1}.")
- zf.write(txt_file, txt_file.name)
- txt_file.unlink()
- sub_dir = temp_dir / "subdir"
- sub_dir.mkdir(exist_ok=True)
- sub_wav_file = sub_dir / "nested_audio.wav"
- sub_wav_file.write_bytes(b'\x01\x01\x01\x01')
- zf.write(sub_wav_file, str(sub_wav_file.relative_to(temp_dir)))
- sub_wav_file.unlink()
- sub_dir.rmdir()
- temp_dir.rmdir()
- print(f"Created dummy ZIP: {zip_path}")
- return zip_path
- # --- Main execution example (remains the same) ---
- if __name__ == "__main__":
- dummy_zip = create_dummy_zip_with_wav(num_wav_files=3, num_other_files=2)
- extract_dir = Path("extracted_wav_files")
- try:
- extracted_wavs = extract_wav_files_from_zip(dummy_zip, extract_dir)
- print(f"\nSuccessfully extracted {len(extracted_wavs)} .wav files to: {extract_dir}")
- for wav_file in extracted_wavs:
- print(f"- {wav_file}")
- for f in extracted_wavs:
- f.unlink(missing_ok=True)
- if extract_dir.exists():
- extract_dir.rmdir()
- print(f"Cleaned up extraction directory: {extract_dir}")
- except (FileNotFoundError, zipfile.BadZipFile, PermissionError, Exception) as e:
- print(f"Operation failed: {e}")
- finally:
- if dummy_zip.exists():
- dummy_zip.unlink()
- print(f"Cleaned up dummy ZIP: {dummy_zip}")
- print("\n--- Testing with a non-existent ZIP ---")
- try:
- extract_wav_files_from_zip("non_existent.zip", "temp_output")
- except FileNotFoundError as e:
- print(f"Caught expected error: {e}")
- print("\n--- Testing with a bad ZIP file (e.g., a text file renamed to .zip) ---")
- bad_zip_path = Path("bad_archive.zip")
- bad_zip_path.write_text("This is not a zip file.")
- try:
- extract_wav_files_from_zip(bad_zip_path, "temp_output")
- except zipfile.BadZipFile as e:
- print(f"Caught expected error: {e}")
- finally:
- if bad_zip_path.exists():
- bad_zip_path.unlink()
- print(f"Cleaned up bad ZIP file: {bad_zip_path}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement