Advertisement
Danila_lipatov

cuda_numba_example

May 13th, 2025
255
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.49 KB | None | 0 0
  1. import numpy as np
  2. from numba import cuda
  3. import time
  4.  
  5.  
  6. @cuda.jit
  7. def heat2d_kernel(u, u_new, n, alpha, dt, dx):
  8.     i, j = cuda.grid(2)
  9.  
  10.     if 0 < i < n - 1 and 0 < j < n - 1:
  11.         u_new[i, j] = u[i, j] + alpha * dt / (dx * dx) * (
  12.             u[i + 1, j] + u[i - 1, j] + u[i, j + 1] + u[i, j - 1] - 4 * u[i, j])
  13.  
  14.  
  15. def solve_heat2d_numba(n=100, steps=1000, dt=0.0001, block_size=(16, 16)):
  16.     alpha = 1.0
  17.     dx = 1.0 / (n - 1)
  18.  
  19.     # Initialize
  20.     u = np.ones((n, n), dtype=np.float64)
  21.     u[0, :] = u[-1, :] = u[:, 0] = u[:, -1] = 0.0
  22.     u_new = np.copy(u)
  23.  
  24.     # Allocate device memory
  25.     d_u = cuda.to_device(u)
  26.     d_u_new = cuda.to_device(u_new)
  27.  
  28.     # Configure grid size dynamically based on block size
  29.     threads_per_block = block_size
  30.     blocks_per_grid_x = (n + block_size[0] - 1) // block_size[0]
  31.     blocks_per_grid_y = (n + block_size[1] - 1) // block_size[1]
  32.     blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)
  33.  
  34.     # Time measurement
  35.     start = time.time()
  36.  
  37.     # Main loop
  38.     for step in range(steps):
  39.         heat2d_kernel[blocks_per_grid, threads_per_block](
  40.             d_u, d_u_new, n, alpha, dt, dx)
  41.         d_u, d_u_new = d_u_new, d_u
  42.  
  43.     cuda.synchronize()
  44.     elapsed = time.time() - start
  45.     print(f"Time (block_size={block_size}): {elapsed:.4f} s")
  46.  
  47.     # Get result back
  48.     result = d_u.copy_to_host()
  49.     return result
  50.  
  51. if __name__ == "__main__":
  52.     solve_heat2d_numba(n=128, steps=1000, dt=0.0001, block_size=(16, 16))
  53.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement