Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # A virtualenv and pip install pytest is needed to run the tests
- # python filename.py # run performance measurements
- # pytest filename.py # run the tests, one should fail until fixed
- #
- import time
- import shutil
- import os
- import tempfile
- import re
- from functools import wraps
- from pathlib import Path
- import pytest
- class TempDir:
- # ## Magic methods. task 5
- # ***
- # Create a context manager `TempDir` (Use Context Manager protocol - methods `__enter__`, `__exit__`):
- # 1. When entering the context, a new temporary directory is created with random, unique name.
- # Use `os.mkdir` to create the directory.
- # 2. Until exiting this context the new created directory becomes current one and all actions are executed
- # in scope of this new directory.
- # 3. When exiting this context, the temporary directory is removed with all files in it.
- # Use `rmtree` from `shutil` to remove whole directory.
- # 4. The new working directory becomes the same as before entering context.
- #
- def __enter__(self):
- self.cwd = os.getcwd() # Store current dir
- print(self.cwd)
- self.temp_dir = tempfile.mkdtemp() # Make temporary dir
- print(self.temp_dir)
- os.chdir(self.temp_dir) # Change to temporary
- return self.temp_dir
- def __exit__(self, exc_type, exc_val, exc_tb):
- os.chdir(self.cwd) # Go back to the original dir
- shutil.rmtree(self.temp_dir) # Clean up garbage...
- class DataLookup:
- DEFAULT_PAGE = 0
- def __init__(self, data: str, pagesize: int | None = None):
- self.data = data
- self.page_size = pagesize
- self.item_count = len(data)
- self.page_count = self.item_count // self.page_size + 1
- self.compiled_pattern = None
- def lookup_brute(self, data):
- result = []
- for i in range(self.page_count):
- start_item = i * self.page_size
- stop_item = min(self.item_count, (i + 1) * self.page_size)
- page = self.data[start_item:stop_item]
- if data in page:
- result.append(i)
- if not result:
- raise Exception(f"'{data} is missing on the pages")
- return result
- def compile_pattern(self, pattern: str):
- if not pattern:
- return
- if not self.compiled_pattern:
- self.compiled_pattern = re.compile(pattern)
- def lookup(self, fully_on_single: bool = False):
- if not self.data:
- return set()
- if not self.compiled_pattern:
- raise ValueError("Pattern must be compiled first! Use compile_pattern()")
- if not self.page_size:
- raise ValueError("Page size is not set!")
- pages = set()
- for match in self.compiled_pattern.finditer(self.data):
- start_pos, end_pos = match.span()
- start_page = self.DEFAULT_PAGE if (start_pos == 0) else (start_pos - 1) // self.page_size
- end_page = self.DEFAULT_PAGE if (end_pos == 0) else (end_pos - 1) // self.page_size
- if fully_on_single and start_page != end_page:
- continue
- pages.update([start_page, end_page])
- return pages
- def test_temp_dir_differs():
- cwd = str(Path(os.getcwd()).resolve().absolute())
- with TempDir() as temp_dir:
- created_dir = str(Path(os.getcwd()).resolve().absolute())
- assert os.path.exists(temp_dir) and os.path.isdir(temp_dir)
- assert created_dir != cwd
- def test_temp_dir_removed():
- with TempDir() as temp_dir:
- temp_dir = str(Path(temp_dir).resolve().absolute())
- assert not os.path.exists(temp_dir)
- @pytest.mark.parametrize('data, pagesize, lookup, expected', [
- ('Your data', 5, 'data', [1]),
- ('Your data data', 5, 'data', [1, 2]),
- ('Your good data good', 5, 'good', [1, 3]),
- ('Your good data', 7, 'good', [0, 1]),
- ])
- def test_data_lookup(data, pagesize, lookup, expected):
- finder = DataLookup(data, pagesize)
- pages = finder.lookup_brute(lookup)
- assert pages == expected
- @pytest.mark.parametrize('data, pagesize, lookup', [
- ('Your data', 5, 'missing'),
- ('Your data data', 3, 'Your'),
- ])
- def test_data_lookup_missing(data, pagesize, lookup):
- finder = DataLookup(data, pagesize)
- with pytest.raises(Exception):
- finder.lookup_brute(lookup)
- def measure_time(iter_count: int):
- def decorator(func):
- @wraps(func)
- def wrapper(*args, **kwargs):
- result = None
- start = time.perf_counter()
- for _ in range(iter_count):
- result = func(*args, **kwargs)
- end = time.perf_counter()
- print(f"Function '{func.__name__}' executed {iter_count} times, taking {end - start:.4f} seconds in total.")
- return result
- return wrapper
- return decorator
- @measure_time(1000000)
- def brute_force_lookup(target: DataLookup, lookup_for):
- target.lookup_brute(lookup_for)
- @measure_time(1000000)
- def regexp_lookup(target: DataLookup):
- target.lookup()
- if __name__ == '__main__':
- finder = DataLookup("""
- Your data beautiful data for all of the data where data was not really a correct data.
- The invaliddata was not really a valid data. It was neither prepared nor cleaned up before using the data
- as a data science examination functiondata.
- """, 6)
- lookup = 'data'
- brute_force_lookup(finder, lookup)
- finder.compile_pattern(lookup)
- regexp_lookup(finder)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement