Advertisement
Danila_lipatov

pycuda_example

May 13th, 2025
348
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.22 KB | None | 0 0
  1. import pycuda.autoinit
  2. import pycuda.driver as drv
  3. import numpy as np
  4. import time
  5. from pycuda.compiler import SourceModule
  6. import matplotlib.pyplot as plt
  7.  
  8.  
  9. kernel_code = """
  10. __global__ void heat_step(double *u, double *u_new, double alpha, double dt, double dx2, double dy2, int nx, int ny) {
  11.    int i = blockIdx.x * blockDim.x + threadIdx.x + 1;
  12.    int j = blockIdx.y * blockDim.y + threadIdx.y + 1;
  13.  
  14.    if (i < nx - 1 && j < ny - 1) {
  15.        int idx = i * ny + j;
  16.        int up = (i - 1) * ny + j;
  17.        int down = (i + 1) * ny + j;
  18.        int left = i * ny + (j - 1);
  19.        int right = i * ny + (j + 1);
  20.  
  21.        u_new[idx] = u[idx] + alpha * dt * (
  22.            (u[down] - 2.0 * u[idx] + u[up]) / dx2 +
  23.            (u[right] - 2.0 * u[idx] + u[left]) / dy2
  24.        );
  25.    }
  26. }
  27. """
  28.  
  29.  
  30. def solve_heat_pycuda(nx=128, ny=128, nt=100, alpha=0.01, dx=1.0, dy=1.0, dt=0.1):
  31.     dx2, dy2 = dx ** 2, dy ** 2
  32.     N = nx * ny
  33.  
  34.     u = np.zeros(N, dtype=np.float64)
  35.     u_new = np.zeros_like(u)
  36.  
  37.     # горячая точка по центру
  38.     u[(nx // 2) * ny + (ny // 2)] = 100.0
  39.  
  40.     # компиляция ядра
  41.     mod = SourceModule(kernel_code)
  42.     heat_step = mod.get_function("heat_step")
  43.  
  44.     # выделение памяти на GPU
  45.     u_gpu = drv.mem_alloc(u.nbytes)
  46.     u_new_gpu = drv.mem_alloc(u.nbytes)
  47.  
  48.     drv.memcpy_htod(u_gpu, u)
  49.     drv.memcpy_htod(u_new_gpu, u_new)
  50.  
  51.     block_size = (16, 16, 1)
  52.     grid_size = ((nx - 2) // 16 + 1, (ny - 2) // 16 + 1)
  53.  
  54.     start = time.time()
  55.     for _ in range(nt):
  56.         heat_step(u_gpu, u_new_gpu,
  57.                   np.float64(alpha), np.float64(dt),
  58.                   np.float64(dx2), np.float64(dy2),
  59.                   np.int32(nx), np.int32(ny),
  60.                   block=block_size, grid=grid_size)
  61.         u_gpu, u_new_gpu = u_new_gpu, u_gpu
  62.     drv.Context.synchronize()
  63.     end = time.time()
  64.  
  65.     # результат обратно на CPU
  66.     drv.memcpy_dtoh(u, u_gpu)
  67.     u = u.reshape((nx, ny))
  68.     return u, end - start
  69.  
  70.  
  71. if __name__ == "__main__":
  72.     for n in [64, 128, 256]:
  73.         print(f"\nGrid size: {n}x{n}")
  74.         _, T = solve_heat_pycuda(nx=n, ny=n, nt=200)
  75.         print(f"Time elapsed: {T:.4f} s")
  76.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement