Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from numba import cuda
- import time
- @cuda.jit
- def heat2d_kernel(u, u_new, n, alpha, dt, dx):
- i, j = cuda.grid(2)
- if 0 < i < n - 1 and 0 < j < n - 1:
- u_new[i, j] = u[i, j] + alpha * dt / (dx * dx) * (
- u[i + 1, j] + u[i - 1, j] + u[i, j + 1] + u[i, j - 1] - 4 * u[i, j])
- def solve_heat2d_numba(n=100, steps=1000, dt=0.0001, block_size=(16, 16)):
- alpha = 1.0
- dx = 1.0 / (n - 1)
- # Initialize
- u = np.ones((n, n), dtype=np.float64)
- u[0, :] = u[-1, :] = u[:, 0] = u[:, -1] = 0.0
- u_new = np.copy(u)
- # Allocate device memory
- d_u = cuda.to_device(u)
- d_u_new = cuda.to_device(u_new)
- # Configure grid size dynamically based on block size
- threads_per_block = block_size
- blocks_per_grid_x = (n + block_size[0] - 1) // block_size[0]
- blocks_per_grid_y = (n + block_size[1] - 1) // block_size[1]
- blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)
- # Time measurement
- start = time.time()
- # Main loop
- for step in range(steps):
- heat2d_kernel[blocks_per_grid, threads_per_block](
- d_u, d_u_new, n, alpha, dt, dx)
- d_u, d_u_new = d_u_new, d_u
- cuda.synchronize()
- elapsed = time.time() - start
- print(f"Time (block_size={block_size}): {elapsed:.4f} s")
- # Get result back
- result = d_u.copy_to_host()
- return result
- if __name__ == "__main__":
- solve_heat2d_numba(n=128, steps=1000, dt=0.0001, block_size=(16, 16))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement