Advertisement
dzooli

Lookup and tests

Feb 9th, 2025 (edited)
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.52 KB | Source Code | 0 0
  1. #
  2. # A virtualenv and pip install pytest is needed to run the tests
  3. # python filename.py # run performance measurements
  4. # pytest filename.py # run the tests, one should fail until fixed
  5. #
  6. import time
  7. import shutil
  8. import os
  9. import tempfile
  10. import re
  11. from functools import wraps
  12.  
  13. from pathlib import Path
  14.  
  15. import pytest
  16.  
  17.  
  18. class TempDir:
  19.     # ## Magic methods. task 5
  20.     # ***
  21.     # Create a context manager `TempDir` (Use Context Manager protocol - methods `__enter__`, `__exit__`):
  22.     # 1. When entering the context, a new temporary directory is created with random, unique name.
  23.     # Use `os.mkdir` to create the directory.
  24.     # 2. Until exiting this context the new created directory becomes current one and all actions are executed
  25.     # in scope of this new directory.
  26.     # 3. When exiting this context, the temporary directory is removed with all files in it.
  27.     # Use `rmtree` from `shutil` to remove whole directory.
  28.     # 4. The new working directory becomes the same as before entering context.
  29.     #
  30.     def __enter__(self):
  31.         self.cwd = os.getcwd()  # Store current dir
  32.         print(self.cwd)
  33.         self.temp_dir = tempfile.mkdtemp()  # Make temporary dir
  34.         print(self.temp_dir)
  35.         os.chdir(self.temp_dir)  # Change to temporary
  36.         return self.temp_dir
  37.  
  38.     def __exit__(self, exc_type, exc_val, exc_tb):
  39.         os.chdir(self.cwd)  # Go back to the original dir
  40.         shutil.rmtree(self.temp_dir)  # Clean up garbage...
  41.  
  42.  
  43. class DataLookup:
  44.     DEFAULT_PAGE = 0
  45.  
  46.     def __init__(self, data: str, pagesize: int | None = None):
  47.         self.data = data
  48.         self.page_size = pagesize
  49.         self.item_count = len(data)
  50.         self.page_count = self.item_count // self.page_size + 1
  51.         self.compiled_pattern = None
  52.  
  53.     def lookup_brute(self, data):
  54.         result = []
  55.         for i in range(self.page_count):
  56.             start_item = i * self.page_size
  57.             stop_item = min(self.item_count, (i + 1) * self.page_size)
  58.             page = self.data[start_item:stop_item]
  59.  
  60.             if data in page:
  61.                 result.append(i)
  62.  
  63.         if not result:
  64.             raise Exception(f"'{data} is missing on the pages")
  65.  
  66.         return result
  67.  
  68.     def compile_pattern(self, pattern: str):
  69.         if not pattern:
  70.             return
  71.         if not self.compiled_pattern:
  72.             self.compiled_pattern = re.compile(pattern)
  73.  
  74.     def lookup(self, fully_on_single: bool = False):
  75.         if not self.data:
  76.             return set()
  77.  
  78.         if not self.compiled_pattern:
  79.             raise ValueError("Pattern must be compiled first! Use compile_pattern()")
  80.         if not self.page_size:
  81.             raise ValueError("Page size is not set!")
  82.  
  83.         pages = set()
  84.         for match in self.compiled_pattern.finditer(self.data):
  85.             start_pos, end_pos = match.span()
  86.             start_page = self.DEFAULT_PAGE if (start_pos == 0) else (start_pos - 1) // self.page_size
  87.             end_page = self.DEFAULT_PAGE if (end_pos == 0) else (end_pos - 1) // self.page_size
  88.  
  89.             if fully_on_single and start_page != end_page:
  90.                 continue
  91.  
  92.             pages.update([start_page, end_page])
  93.  
  94.         return pages
  95.  
  96.  
  97. def test_temp_dir_differs():
  98.     cwd = str(Path(os.getcwd()).resolve().absolute())
  99.     with TempDir() as temp_dir:
  100.         created_dir = str(Path(os.getcwd()).resolve().absolute())
  101.         assert os.path.exists(temp_dir) and os.path.isdir(temp_dir)
  102.         assert created_dir != cwd
  103.  
  104.  
  105. def test_temp_dir_removed():
  106.     with TempDir() as temp_dir:
  107.         temp_dir = str(Path(temp_dir).resolve().absolute())
  108.     assert not os.path.exists(temp_dir)
  109.  
  110.  
  111. @pytest.mark.parametrize('data, pagesize, lookup, expected', [
  112.     ('Your data', 5, 'data', [1]),
  113.     ('Your data data', 5, 'data', [1, 2]),
  114.     ('Your good data good', 5, 'good', [1, 3]),
  115.     ('Your good data', 7, 'good', [0, 1]),
  116. ])
  117. def test_data_lookup(data, pagesize, lookup, expected):
  118.     finder = DataLookup(data, pagesize)
  119.     pages = finder.lookup_brute(lookup)
  120.     assert pages == expected
  121.  
  122.  
  123. @pytest.mark.parametrize('data, pagesize, lookup', [
  124.     ('Your data', 5, 'missing'),
  125.     ('Your data data', 3, 'Your'),
  126. ])
  127. def test_data_lookup_missing(data, pagesize, lookup):
  128.     finder = DataLookup(data, pagesize)
  129.     with pytest.raises(Exception):
  130.         finder.lookup_brute(lookup)
  131.  
  132.  
  133. def measure_time(iter_count: int):
  134.  
  135.     def decorator(func):
  136.         @wraps(func)
  137.         def wrapper(*args, **kwargs):
  138.             result = None
  139.             start = time.perf_counter()
  140.             for _ in range(iter_count):
  141.                 result = func(*args, **kwargs)
  142.             end = time.perf_counter()
  143.             print(f"Function '{func.__name__}' executed {iter_count} times, taking {end - start:.4f} seconds in total.")
  144.             return result
  145.  
  146.         return wrapper
  147.  
  148.     return decorator
  149.  
  150.  
  151. @measure_time(1000000)
  152. def brute_force_lookup(target: DataLookup, lookup_for):
  153.     target.lookup_brute(lookup_for)
  154.  
  155.  
  156. @measure_time(1000000)
  157. def regexp_lookup(target: DataLookup):
  158.     target.lookup()
  159.  
  160.  
  161. if __name__ == '__main__':
  162.     finder = DataLookup("""
  163.    Your data beautiful data for all of the data where data was not really a correct data.
  164.    The invaliddata was not really a valid data. It was neither prepared nor cleaned up before using the data
  165.    as a data science examination functiondata.
  166.    """, 6)
  167.  
  168.     lookup = 'data'
  169.  
  170.     brute_force_lookup(finder, lookup)
  171.  
  172.     finder.compile_pattern(lookup)
  173.     regexp_lookup(finder)
  174.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement