-
Notifications
You must be signed in to change notification settings - Fork 79
Open
Description
This 5-point stencil kernel produces the following OpenCL code without a barrier, which would be required for correctness.
import loopy as lp
import numpy as np
knl = lp.make_kernel(
"[nx,nt] -> {[x, t]: 0<=x<nx and 0<=t<nt}",
"""
u[t+2,x+1] = 2*u[t+1,x+1] + dt**2/dx**2 * (u[t+1,x+2] - 2*u[t+1,x+1] + u[t+1,x]) - u[t,x+1]
"""
)
knl = lp.add_dtypes(knl, {"u": np.float32, "dx": np.float32, "dt": np.float32})
knl = lp.split_iname(knl, "x", 14)
knl = lp.assume(knl, "nx % 14 = 0 and nt >= 1 and nx >= 1")
knl = lp.tag_inames(knl, "x_outer:g.0, x_inner:l.0")
print(lp.generate_code_v2(knl).device_code())
Generated code:
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
__kernel void __attribute__ ((reqd_work_group_size(14, 1, 1))) loopy_kernel(float const dt, float const dx, int const nt, int const nx, __global float *__restrict__ u)
{
for (int t = 0; t <= -1 + nt; ++t)
u[(2 + nx) * (2 + t) + 1 + 14 * gid(0) + lid(0)] = 2.0f * u[(2 + nx) * (1 + t) + 1 + 14 * gid(0) + lid(0)] + ((dt * dt) / (dx * dx)) * (u[(2 + nx) * (1 + t) + 2 + 14 * gid(0) + lid(0)] + -1.0f * 2.0f * u[(2 + nx) * (1 + t) + 1 + 14 * gid(0) + lid(0)] + u[(2 + nx) * (1 + t) + 14 * gid(0) + lid(0)]) + -1.0f * u[(2 + nx) * t + 1 + 14 * gid(0) + lid(0)];
}
Metadata
Metadata
Assignees
Labels
No labels