Example
sph_dam_break_2d_udt
SPH dam-break organized around UDT-backed simulation state.
// ============================================================================
// 2D SPH Dam-Break Simulation (Parallel, UDT-backed)
//
// The hot loops still operate over arrays, but the simulation is organized
// around UDT-backed parameter and state blocks.
// ============================================================================
udt SimParams {
int64 n
real64 h
real64 edge
real64 width
real64 height
real64 m
real64 rho0
real64 c0
real64 gamma
real64 dt
real64 k
real64 g
int64 max_steps
int64 output_every
int64 progress_every
int64 quiet_steps_needed
real64 visc
real64 eps
real64 bdry_reflec
real64 bdry_proj_dist
real64 steady_ke_per_particle_tol
real64 x_min
real64 y_min
real64 x_max
real64 y_max
int64 init_nx
int64 init_ny
int64 grid_nx
int64 grid_ny
int64 num_cells
real64 h2
real64 poly6_coeff
real64 spiky_grad_coeff
real64 visc_lap_coeff
}
udt FluidState {
real64[:] x
real64[:] y
real64[:] u
real64[:] v
real64[:] accel_x
real64[:] accel_y
real64[:] rho
real64[:] p
}
udt GridScratch {
int64[:] particle_cell
int64[:] particle_order
int64[:] cell_counts
int64[:] cell_starts
int64[:] cell_offsets
real64[:] x_tmp
real64[:] y_tmp
real64[:] u_tmp
real64[:] v_tmp
real64[:] accel_x_tmp
real64[:] accel_y_tmp
real64[:] rho_tmp
real64[:] p_tmp
}
function clamp(real64 x, real64 lo, real64 hi) -> real64 {
if (x < lo) {
return lo
}
if (x > hi) {
return hi
}
return x
}
program main {
use array: full_int64, full_real64
string out_dir = "sph_dam_break_2d_udt_parallel_output"
ensure_dir(out_dir)
SimParams sim
sim.n = 1200
sim.h = 5.0
sim.edge = 5.0
sim.width = 400.0
sim.height = 400.0
sim.m = 1.0
sim.rho0 = 1.0
sim.c0 = 10.0
sim.gamma = 7.0
sim.dt = 0.05
sim.k = 20.0
sim.g = 0.2
sim.max_steps = 10000
sim.output_every = 50
sim.progress_every = 100
sim.quiet_steps_needed = 200
sim.visc = 0.5
sim.eps = 0.1
sim.bdry_reflec = 0.5
sim.bdry_proj_dist = 0.0
sim.steady_ke_per_particle_tol = 0.0002
sim.x_min = sim.edge
sim.y_min = sim.edge
sim.x_max = sim.width - sim.edge
sim.y_max = sim.height - sim.edge
sim.init_nx = 20
sim.init_ny = 60
sim.grid_nx = int64((sim.x_max - sim.x_min) / sim.h) + 1
sim.grid_ny = int64((sim.y_max - sim.y_min) / sim.h) + 1
sim.num_cells = sim.grid_nx * sim.grid_ny
sim.h2 = sim.h * sim.h
sim.poly6_coeff = 315.0 / (64.0 * 3.141592653589793 * sim.h * sim.h * sim.h * sim.h * sim.h * sim.h * sim.h * sim.h * sim.h)
sim.spiky_grad_coeff = -45.0 / (3.141592653589793 * sim.h * sim.h * sim.h * sim.h * sim.h * sim.h)
sim.visc_lap_coeff = 45.0 / (3.141592653589793 * sim.h * sim.h * sim.h * sim.h * sim.h * sim.h)
FluidState state
state.x = full_real64(sim.n, 0.0)
state.y = full_real64(sim.n, 0.0)
state.u = full_real64(sim.n, 0.0)
state.v = full_real64(sim.n, 0.0)
state.accel_x = full_real64(sim.n, 0.0)
state.accel_y = full_real64(sim.n, 0.0)
state.rho = full_real64(sim.n, sim.rho0)
state.p = full_real64(sim.n, 0.0)
GridScratch grid
grid.particle_cell = full_int64(sim.n, 0)
grid.particle_order = full_int64(sim.n, 0)
grid.cell_counts = full_int64(sim.num_cells, 0)
grid.cell_starts = full_int64(sim.num_cells, 0)
grid.cell_offsets = full_int64(sim.num_cells, 0)
grid.x_tmp = full_real64(sim.n, 0.0)
grid.y_tmp = full_real64(sim.n, 0.0)
grid.u_tmp = full_real64(sim.n, 0.0)
grid.v_tmp = full_real64(sim.n, 0.0)
grid.accel_x_tmp = full_real64(sim.n, 0.0)
grid.accel_y_tmp = full_real64(sim.n, 0.0)
grid.rho_tmp = full_real64(sim.n, sim.rho0)
grid.p_tmp = full_real64(sim.n, 0.0)
int64 quiet_steps = 0
int64 step = 0
int64 i = 0, ix = 0, iy = 0
int64 cell = 0, cx = 0, cy = 0, ncx = 0, ncy = 0
int64 sx0 = 0, sx1 = 0, sy0 = 0, sy1 = 0
int64 start = 0, count = 0, pos = 0, j = 0, running = 0
real64 dx = 0.0, dy = 0.0, rij = 0.0, r2 = 0.0, t = 0.0
real64 grad_fac = 0.0, lapv = 0.0
real64 grad_px = 0.0, grad_py = 0.0
real64 visc_x = 0.0, visc_y = 0.0
real64 rho_i = 0.0, avg_rho = 0.0, max_speed = 0.0, speed = 0.0
real64 kinetic_energy = 0.0, ke_per_particle = 0.0
real64 bx0 = 0.0, bx1 = 0.0, by0 = 0.0, by1 = 0.0
real64 jitter_x = 0.0, jitter_y = 0.0
string csv = ""
string frame_path = ""
do iy = 0, sim.init_ny - 1 {
do ix = 0, sim.init_nx - 1 {
i = ix + sim.init_nx * iy
if (i < sim.n) {
jitter_x = rand()
jitter_y = rand()
state.x[i] = sim.x_min + sim.h * real64(ix) + sim.h * sim.eps * jitter_x
state.y[i] = sim.y_min + sim.h * real64(iy) + sim.h * sim.eps * jitter_y
state.u[i] = 0.0
state.v[i] = 0.0
state.accel_x[i] = 0.0
state.accel_y[i] = 0.0
state.rho[i] = sim.rho0
state.p[i] = 0.0
}
}
}
string meta = "max_steps=" + to_string(sim.max_steps) + "\n"
meta += "output_every=" + to_string(sim.output_every) + "\n"
meta += "n=" + to_string(sim.n) + "\n"
meta += "h=" + to_string(sim.h) + "\n"
meta += "dt=" + to_string(sim.dt) + "\n"
meta += "steady_ke_per_particle_tol=" + to_string(sim.steady_ke_per_particle_tol) + "\n"
meta += "quiet_steps_needed=" + to_string(sim.quiet_steps_needed) + "\n"
meta += "x_min=" + to_string(sim.x_min) + "\n"
meta += "x_max=" + to_string(sim.x_max) + "\n"
meta += "y_min=" + to_string(sim.y_min) + "\n"
meta += "y_max=" + to_string(sim.y_max) + "\n"
write_file(out_dir + "/meta.txt", meta)
print("SPH 2D dam break UDT parallel: n=", sim.n, "h=", sim.h, "dt=", sim.dt, "steady_ke=", sim.steady_ke_per_particle_tol)
step = 0
while (step <= sim.max_steps && quiet_steps < sim.quiet_steps_needed) {
grid.cell_counts = full_int64(sim.num_cells, 0)
do i = 0, sim.n - 1 {
cx = int64(clamp((state.x[i] - sim.x_min) / sim.h, 0.0, real64(sim.grid_nx - 1)))
cy = int64(clamp((state.y[i] - sim.y_min) / sim.h, 0.0, real64(sim.grid_ny - 1)))
cell = cx + cy * sim.grid_nx
grid.particle_cell[i] = cell
grid.cell_counts[cell] += 1
}
running = 0
do cell = 0, sim.num_cells - 1 {
grid.cell_starts[cell] = running
grid.cell_offsets[cell] = running
running += grid.cell_counts[cell]
}
do i = 0, sim.n - 1 {
cell = grid.particle_cell[i]
grid.particle_order[grid.cell_offsets[cell]] = i
grid.cell_offsets[cell] += 1
}
do pos = 0, sim.n - 1 {
i = grid.particle_order[pos]
grid.x_tmp[pos] = state.x[i]
grid.y_tmp[pos] = state.y[i]
grid.u_tmp[pos] = state.u[i]
grid.v_tmp[pos] = state.v[i]
grid.accel_x_tmp[pos] = state.accel_x[i]
grid.accel_y_tmp[pos] = state.accel_y[i]
grid.rho_tmp[pos] = state.rho[i]
grid.p_tmp[pos] = state.p[i]
}
state.x = grid.x_tmp
state.y = grid.y_tmp
state.u = grid.u_tmp
state.v = grid.v_tmp
state.accel_x = grid.accel_x_tmp
state.accel_y = grid.accel_y_tmp
state.rho = grid.rho_tmp
state.p = grid.p_tmp
do parallel i = 0, sim.n - 1 {
real64 rho_local = 0.0
int64 cx_local = int64(clamp((state.x[i] - sim.x_min) / sim.h, 0.0, real64(sim.grid_nx - 1)))
int64 cy_local = int64(clamp((state.y[i] - sim.y_min) / sim.h, 0.0, real64(sim.grid_ny - 1)))
int64 sx0_local = max(0, cx_local - 1)
int64 sx1_local = min(sim.grid_nx - 1, cx_local + 1)
int64 sy0_local = max(0, cy_local - 1)
int64 sy1_local = min(sim.grid_ny - 1, cy_local + 1)
int64 ncy_local = 0
int64 ncx_local = 0
int64 cell_local = 0
int64 start_local = 0
int64 count_local = 0
int64 pos_local = 0
int64 j_local = 0
real64 dx_local = 0.0
real64 dy_local = 0.0
real64 r2_local = 0.0
real64 t_local = 0.0
do ncy_local = sy0_local, sy1_local {
do ncx_local = sx0_local, sx1_local {
cell_local = ncx_local + ncy_local * sim.grid_nx
start_local = grid.cell_starts[cell_local]
count_local = grid.cell_counts[cell_local]
do pos_local = 0, count_local - 1 {
j_local = start_local + pos_local
dx_local = state.x[i] - state.x[j_local]
dy_local = state.y[i] - state.y[j_local]
r2_local = dx_local * dx_local + dy_local * dy_local
if (r2_local <= sim.h2) {
t_local = sim.h2 - r2_local
rho_local += sim.m * sim.poly6_coeff * t_local * t_local * t_local
}
}
}
}
state.rho[i] = rho_local
}
do parallel i = 0, sim.n - 1 {
state.p[i] = sim.k * (state.rho[i] - sim.rho0)
}
do parallel i = 0, sim.n - 1 {
real64 ax_local_acc = 0.0
real64 ay_local_acc = -sim.g
int64 cx_local_acc = int64(clamp((state.x[i] - sim.x_min) / sim.h, 0.0, real64(sim.grid_nx - 1)))
int64 cy_local_acc = int64(clamp((state.y[i] - sim.y_min) / sim.h, 0.0, real64(sim.grid_ny - 1)))
int64 sx0_local_acc = max(0, cx_local_acc - 1)
int64 sx1_local_acc = min(sim.grid_nx - 1, cx_local_acc + 1)
int64 sy0_local_acc = max(0, cy_local_acc - 1)
int64 sy1_local_acc = min(sim.grid_ny - 1, cy_local_acc + 1)
int64 ncy_local_acc = 0
int64 ncx_local_acc = 0
int64 cell_local_acc = 0
int64 start_local_acc = 0
int64 count_local_acc = 0
int64 pos_local_acc = 0
int64 j_local_acc = 0
real64 dx_local_acc = 0.0
real64 dy_local_acc = 0.0
real64 rij_local_acc = 0.0
real64 r2_local_acc = 0.0
real64 grad_fac_local_acc = 0.0
real64 lapv_local_acc = 0.0
real64 grad_px_local_acc = 0.0
real64 grad_py_local_acc = 0.0
real64 visc_x_local_acc = 0.0
real64 visc_y_local_acc = 0.0
do ncy_local_acc = sy0_local_acc, sy1_local_acc {
do ncx_local_acc = sx0_local_acc, sx1_local_acc {
cell_local_acc = ncx_local_acc + ncy_local_acc * sim.grid_nx
start_local_acc = grid.cell_starts[cell_local_acc]
count_local_acc = grid.cell_counts[cell_local_acc]
do pos_local_acc = 0, count_local_acc - 1 {
j_local_acc = start_local_acc + pos_local_acc
if (j_local_acc != i) {
dx_local_acc = state.x[i] - state.x[j_local_acc]
dy_local_acc = state.y[i] - state.y[j_local_acc]
r2_local_acc = dx_local_acc * dx_local_acc + dy_local_acc * dy_local_acc
if (r2_local_acc > 0.0 && r2_local_acc <= sim.h2) {
rij_local_acc = sqrt(r2_local_acc)
grad_fac_local_acc = sim.spiky_grad_coeff * (sim.h - rij_local_acc) * (sim.h - rij_local_acc) / rij_local_acc
grad_px_local_acc = sim.m * (state.p[i] + state.p[j_local_acc]) * grad_fac_local_acc * dx_local_acc / (2.0 * state.rho[i] * state.rho[j_local_acc])
grad_py_local_acc = sim.m * (state.p[i] + state.p[j_local_acc]) * grad_fac_local_acc * dy_local_acc / (2.0 * state.rho[i] * state.rho[j_local_acc])
lapv_local_acc = sim.visc_lap_coeff * (sim.h - rij_local_acc)
visc_x_local_acc = sim.m * sim.visc * (state.u[j_local_acc] - state.u[i]) * lapv_local_acc / (state.rho[i] * state.rho[j_local_acc])
visc_y_local_acc = sim.m * sim.visc * (state.v[j_local_acc] - state.v[i]) * lapv_local_acc / (state.rho[i] * state.rho[j_local_acc])
ax_local_acc += grad_px_local_acc + visc_x_local_acc
ay_local_acc += grad_py_local_acc + visc_y_local_acc
}
}
}
}
}
state.accel_x[i] = ax_local_acc
state.accel_y[i] = ay_local_acc
}
do parallel i = 0, sim.n - 1 {
state.u[i] += sim.dt * state.accel_x[i]
state.v[i] += sim.dt * state.accel_y[i]
state.x[i] += sim.dt * state.u[i]
state.y[i] += sim.dt * state.v[i]
}
do parallel i = 0, sim.n - 1 {
if (state.x[i] < sim.x_min + sim.bdry_proj_dist) {
state.x[i] = sim.x_min + sim.bdry_proj_dist
state.u[i] = -sim.bdry_reflec * state.u[i]
}
if (state.x[i] > sim.x_max - sim.bdry_proj_dist) {
state.x[i] = sim.x_max - sim.bdry_proj_dist
state.u[i] = -sim.bdry_reflec * state.u[i]
}
if (state.y[i] < sim.y_min + sim.bdry_proj_dist) {
state.y[i] = sim.y_min + sim.bdry_proj_dist
state.v[i] = -sim.bdry_reflec * state.v[i]
}
if (state.y[i] > sim.y_max - sim.bdry_proj_dist) {
state.y[i] = sim.y_max - sim.bdry_proj_dist
state.v[i] = -sim.bdry_reflec * state.v[i]
}
}
avg_rho = 0.0
max_speed = 0.0
kinetic_energy = 0.0
bx0 = state.x[0]
bx1 = state.x[0]
by0 = state.y[0]
by1 = state.y[0]
do i = 0, sim.n - 1 {
avg_rho += state.rho[i]
speed = sqrt(state.u[i] * state.u[i] + state.v[i] * state.v[i])
if (speed > max_speed) {
max_speed = speed
}
kinetic_energy += 0.5 * sim.m * (state.u[i] * state.u[i] + state.v[i] * state.v[i])
if (state.x[i] < bx0) {
bx0 = state.x[i]
}
if (state.x[i] > bx1) {
bx1 = state.x[i]
}
if (state.y[i] < by0) {
by0 = state.y[i]
}
if (state.y[i] > by1) {
by1 = state.y[i]
}
}
avg_rho /= real64(sim.n)
ke_per_particle = kinetic_energy / real64(sim.n)
if (ke_per_particle < sim.steady_ke_per_particle_tol) {
quiet_steps += 1
} else {
quiet_steps = 0
}
if (step % sim.progress_every == 0) {
print("step", step, "/", sim.max_steps, "avg_rho", avg_rho, "max_speed", max_speed, "ke_per_particle", ke_per_particle, "quiet", quiet_steps)
}
if (step % sim.output_every == 0) {
csv = "x,y,u,v,rho,p\n"
do i = 0, sim.n - 1 {
csv += to_string(state.x[i]) + "," + to_string(state.y[i]) + "," + to_string(state.u[i]) + "," + to_string(state.v[i]) + "," + to_string(state.rho[i]) + "," + to_string(state.p[i]) + "\n"
}
frame_path = out_dir + "/frame_" + to_string(step) + ".csv"
write_file(frame_path, csv)
}
step += 1
}
string summary = "executed_steps=" + to_string(step) + "\n"
summary += "quiet_steps=" + to_string(quiet_steps) + "\n"
summary += "avg_rho=" + to_string(avg_rho) + "\n"
summary += "max_speed=" + to_string(max_speed) + "\n"
summary += "ke_per_particle=" + to_string(ke_per_particle) + "\n"
summary += "min_x=" + to_string(bx0) + "\n"
summary += "max_x=" + to_string(bx1) + "\n"
summary += "min_y=" + to_string(by0) + "\n"
summary += "max_y=" + to_string(by1) + "\n"
write_file(out_dir + "/summary.txt", summary)
print("Done:", sim.n, "particles, avg_rho=", avg_rho, "bounds=", bx0, bx1, by0, by1)
}