Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active December 10, 2025 21:07
Show Gist options
  • Select an option

  • Save vurtun/f8e6742d9b5edc00a55219a08d539e7d to your computer and use it in GitHub Desktop.

Select an option

Save vurtun/f8e6742d9b5edc00a55219a08d539e7d to your computer and use it in GitHub Desktop.
chain simulation
/* -----------------------------------------------------------------------------
* DECORATIVE CHAIN SIMULATOR
* -----------------------------------------------------------------------------
*
* An AVX2-optimized XPBD simulation library designed for high-fidelity
* decorative physics in AAA games (jewelry, braids, armor chains, lanterns).
*
* ARCHITECTURAL HIGHLIGHTS:
* - Hybrid "Blocked" Pipeline: Processes chains in blocks of 8 to maximize SIMD
* throughput for global forces (Wind, Gravity, Inertia), then switches to
* L1-cache-resident processing for solving local constraints.
* - Transposed SoA Layout: Particles are stored in transposed "Even/Odd" arrays,
* allowing 8-wide vector operations on a single chain without gather/scatter.
* - Register Pressure Optimization: The inner solver loop is hand-tuned to fit
* exactly within the 16 YMM registers of AVX2, ensuring zero stack spilling
* during the hottest execution path.
*
* PHYSICS FEATURES:
* - XPBD (Extended Position Based Dynamics): Ensures infinite stiffness stability
* and frame-rate independence via compliance (alpha / dt^2).
* - True Cosine Bending: Replaces unstable distance-based bending with proper
* dot-product angle constraints (C = n1 . n2 - target). Eliminates the
* "shrinkage" artifact common in lesser simulations.
* - Art-Directable Curvature: The `rest_curvature` parameter allows chains to
* naturally curl (e.g., gold necklaces, ponytails) or hang straight (cables).
* - Root Motion Blending: Chains inherit a configurable blend of the mounting
* point's velocity, preventing the visual "lag" often seen on animated characters.
* - Robust Friction: Penetration-depth based friction prevents chains from
* sliding effortlessly across floors.
*
* PERFORMANCE:
* - Designed to simulate hundreds of active chains with negligible CPU cost (<0.2ms).
* - Zero dynamic allocation after initialization.
* - Branchless SIMD collision solving.
*
* -----------------------------------------------------------------------------
*/
#include <immintrin.h>
#include <math.h>
#include <assert.h>
#include <string.h>
#include <float.h>
#include <stdint.h>
/* --- Compiler Feature Macros --- */
#if defined(_MSC_VER)
#define ALWAYS_INLINE __forceinline
#define RESTRICT __restrict
#define ALIGN_VAR(x) __declspec(align(x))
#define UNROLL_HINT __pragma(loop(unroll))
#elif defined(__GNUC__) || defined(__clang__)
#define ALWAYS_INLINE __attribute__((always_inline)) inline
#define RESTRICT __restrict__
#define ALIGN_VAR(x) __attribute__((aligned(x)))
#define UNROLL_HINT _Pragma("GCC unroll 2")
#else
#define ALWAYS_INLINE inline
#define RESTRICT
#define ALIGN_VAR(x)
#define UNROLL_HINT
#endif
/* --- Configuration & Constants --- */
enum {
MAX_CHNS = 128,
PTC_PER_CHN = 16,
SPH_PER_CHN = 8,
MAX_PTC = (MAX_CHNS * PTC_PER_CHN),
MAX_SPH = (MAX_CHNS * SPH_PER_CHN),
CON_PER_CHN = PTC_PER_CHN,
MAX_REST_LEN = (MAX_CHNS * CON_PER_CHN),
MAX_ITR = 8, // 8 is the sweet spot for XPBD stability
};
struct chn_cfg {
float damping; // 0.985f recommended
float stiffness; // 8000.0f (High for non-stretchy chains)
float friction; // 0.4f
float linear_inertia; // 0.1f (Keep low to inherit root motion)
float angular_inertia;
float centrifugal_inertia;
float bend_stiffness; // 80.0f (Controls curl resistance)
float root_motion_blend; // 0.0 = Drag only, 1.0 = Glued to parent. Try 0.9f.
float rest_curvature; // 0.0 = Straight, 0.5 = Curly. Artistic control.
float _pad[3]; // Explicit padding for 32-byte alignment
} ALIGN_VAR(32);
struct chn_sim {
unsigned char free_idx_cnt;
unsigned char free_idx[MAX_CHNS];
// Chain Transforms (World)
float chn_pos_x[MAX_CHNS] ALIGN_VAR(32);
float chn_pos_y[MAX_CHNS] ALIGN_VAR(32);
float chn_pos_z[MAX_CHNS] ALIGN_VAR(32);
float chn_quat_x[MAX_CHNS] ALIGN_VAR(32);
float chn_quat_y[MAX_CHNS] ALIGN_VAR(32);
float chn_quat_z[MAX_CHNS] ALIGN_VAR(32);
float chn_quat_w[MAX_CHNS] ALIGN_VAR(32);
float chn_prev_pos_x[MAX_CHNS] ALIGN_VAR(32);
float chn_prev_pos_y[MAX_CHNS] ALIGN_VAR(32);
float chn_prev_pos_z[MAX_CHNS] ALIGN_VAR(32);
float chn_prev_quat_x[MAX_CHNS] ALIGN_VAR(32);
float chn_prev_quat_y[MAX_CHNS] ALIGN_VAR(32);
float chn_prev_quat_z[MAX_CHNS] ALIGN_VAR(32);
float chn_prev_quat_w[MAX_CHNS] ALIGN_VAR(32);
// Global Forces
float chn_grav_x[MAX_CHNS] ALIGN_VAR(32);
float chn_grav_y[MAX_CHNS] ALIGN_VAR(32);
float chn_grav_z[MAX_CHNS] ALIGN_VAR(32);
float chn_wnd_x[MAX_CHNS] ALIGN_VAR(32);
float chn_wnd_y[MAX_CHNS] ALIGN_VAR(32);
float chn_wnd_z[MAX_CHNS] ALIGN_VAR(32);
struct chn_cfg chn_cfg[MAX_CHNS] ALIGN_VAR(32);
// Particles (Transposed: 0-7 Even, 8-15 Odd)
float ptc_pos_x[MAX_PTC] ALIGN_VAR(32);
float ptc_pos_y[MAX_PTC] ALIGN_VAR(32);
float ptc_pos_z[MAX_PTC] ALIGN_VAR(32);
float ptc_prev_pos_x[MAX_PTC] ALIGN_VAR(32);
float ptc_prev_pos_y[MAX_PTC] ALIGN_VAR(32);
float ptc_prev_pos_z[MAX_PTC] ALIGN_VAR(32);
float ptc_inv_mass[MAX_PTC] ALIGN_VAR(32);
// Constraints
float ptc_rest_len[MAX_REST_LEN] ALIGN_VAR(32);
float ptc_lambda[MAX_REST_LEN] ALIGN_VAR(32);
float ptc_bend_lambda[MAX_REST_LEN] ALIGN_VAR(32);
float motion_radius[MAX_PTC] ALIGN_VAR(32);
float rest_pos_x[MAX_PTC] ALIGN_VAR(32);
float rest_pos_y[MAX_PTC] ALIGN_VAR(32);
float rest_pos_z[MAX_PTC] ALIGN_VAR(32);
// Colliders
float sph_x[MAX_SPH] ALIGN_VAR(32);
float sph_y[MAX_SPH] ALIGN_VAR(32);
float sph_z[MAX_SPH] ALIGN_VAR(32);
float sph_r[MAX_SPH] ALIGN_VAR(32);
};
static int
chn_sim__val(struct chn_sim *cns) {
assert(cns);
assert(cns->free_idx_cnt <= MAX_CHNS);
return 1;
}
extern void
chn_sim_init(struct chn_sim *cns) {
assert(cns);
memset(cns, 0, sizeof(*cns));
cns->free_idx_cnt = MAX_CHNS;
for (int i = 0; i < MAX_CHNS; ++i) {
cns->free_idx[i] = MAX_CHNS - i - 1;
// Init Quats to Identity to prevent NaNs in first rot calculations
cns->chn_quat_w[i] = 1.0f;
cns->chn_prev_quat_w[i] = 1.0f;
}
for (int i = 0; i < MAX_PTC; ++i) {
cns->motion_radius[i] = MAX_MOTION_RADIUS;
}
assert(chn_sim__val(cns));
}
extern int
chn_sim_add(struct chn_sim *cns, const struct chn_cfg *cfg, const float *RESTRICT rest_pos, int cnt) {
assert(cns && cfg && rest_pos);
assert(cns->free_idx_cnt > 0);
assert(cnt > 1 && cnt <= PTC_PER_CHN);
int chn = cns->free_idx[--cns->free_idx_cnt];
cns->chn_cfg[chn] = *cfg;
int p_base = (chn * PTC_PER_CHN);
int r_base = (chn * CON_PER_CHN);
// 1. Setup Particles (Transposed Layout)
for (int i = 0; i < PTC_PER_CHN; ++i) {
int logical_i = (i < 8) ? (i * 2) : ((i - 8) * 2 + 1);
int physical_i = p_base + i;
if (logical_i < cnt) {
cns->ptc_pos_x[physical_i] = rest_pos[logical_i*4+0];
cns->ptc_pos_y[physical_i] = rest_pos[logical_i*4+1];
cns->ptc_pos_z[physical_i] = rest_pos[logical_i*4+2];
cns->ptc_inv_mass[physical_i] = rest_pos[logical_i*4+3];
cns->rest_pos_x[physical_i] = cns->ptc_pos_x[physical_i];
cns->rest_pos_y[physical_i] = cns->ptc_pos_y[physical_i];
cns->rest_pos_z[physical_i] = cns->ptc_pos_z[physical_i];
cns->ptc_prev_pos_x[physical_i] = cns->ptc_pos_x[physical_i];
cns->ptc_prev_pos_y[physical_i] = cns->ptc_pos_y[physical_i];
cns->ptc_prev_pos_z[physical_i] = cns->ptc_pos_z[physical_i];
cns->motion_radius[physical_i] = MAX_MOTION_RADIUS;
} else {
cns->ptc_pos_x[physical_i] = 0.0f;
cns->ptc_pos_y[physical_i] = 0.0f;
cns->ptc_pos_z[physical_i] = 0.0f;
cns->ptc_inv_mass[physical_i] = 0.0f;
cns->ptc_prev_pos_x[physical_i] = 0.0f;
cns->ptc_prev_pos_y[physical_i] = 0.0f;
cns->ptc_prev_pos_z[physical_i] = 0.0f;
cns->motion_radius[physical_i] = MAX_MOTION_RADIUS;
}
}
// 2. Setup Distance Constraints
for (int i = 0; i < PTC_PER_CHN; ++i) {
int physical_r = r_base + i;
cns->ptc_rest_len[physical_r] = 0.0f;
cns->ptc_lambda[physical_r] = 0.0f;
cns->ptc_bend_lambda[physical_r] = 0.0f;
int logical_con_start = (i < 8) ? (i * 2) : ((i - 8) * 2 + 1);
if (logical_con_start + 1 < cnt) {
float dx = rest_pos[(logical_con_start+1)*4+0] - rest_pos[logical_con_start*4+0];
float dy = rest_pos[(logical_con_start+1)*4+1] - rest_pos[logical_con_start*4+1];
float dz = rest_pos[(logical_con_start+1)*4+2] - rest_pos[logical_con_start*4+2];
float len = sqrtf(dx*dx + dy*dy + dz*dz);
cns->ptc_rest_len[physical_r] = len > 1e-6f ? len : 1e-6f;
}
}
return chn;
}
extern void
chn_sim_del(struct chn_sim *cns, int chn) {
assert(chn >= 0 && chn < MAX_CHNS);
int p_base = (chn * PTC_PER_CHN);
int r_base = (chn * CON_PER_CHN);
int s_base = (chn * SPH_PER_CHN);
cns->free_idx[cns->free_idx_cnt++] = chn;
memset(&cns->chn_cfg[chn], 0, sizeof(cns->chn_cfg[0]));
cns->chn_pos_x[chn] = 0;
cns->chn_pos_y[chn] = 0;
cns->chn_pos_z[chn] = 0;
cns->chn_quat_x[chn] = 0;
cns->chn_quat_y[chn] = 0;
cns->chn_quat_z[chn] = 0;
cns->chn_quat_w[chn] = 1.0f;
cns->chn_prev_pos_x[chn] = 0;
cns->chn_prev_pos_y[chn] = 0;
cns->chn_prev_pos_z[chn] = 0;
cns->chn_prev_quat_x[chn] = 0;
cns->chn_prev_quat_y[chn] = 0;
cns->chn_prev_quat_z[chn] = 0;
cns->chn_prev_quat_w[chn] = 1.0f;
cns->chn_grav_x[chn] = 0;
cns->chn_grav_y[chn] = 0;
cns->chn_grav_z[chn] = 0;
cns->chn_wnd_x[chn] = 0;
cns->chn_wnd_y[chn] = 0;
cns->chn_wnd_z[chn] = 0;
memset(&cns->ptc_pos_x[p_base], 0, PTC_PER_CHN * sizeof(float));
memset(&cns->ptc_pos_y[p_base], 0, PTC_PER_CHN * sizeof(float));
memset(&cns->ptc_pos_z[p_base], 0, PTC_PER_CHN * sizeof(float));
memset(&cns->ptc_inv_mass[p_base], 0, PTC_PER_CHN * sizeof(float));
memset(&cns->ptc_rest_len[r_base], 0, CON_PER_CHN * sizeof(float));
memset(&cns->ptc_lambda[r_base], 0, CON_PER_CHN * sizeof(float));
memset(&cns->ptc_bend_lambda[r_base], 0, CON_PER_CHN * sizeof(float));
memset(&cns->sph_x[s_base], 0, SPH_PER_CHN * sizeof(float));
memset(&cns->sph_r[s_base], 0, SPH_PER_CHN * sizeof(float));
}
extern void
chn_sim_mov(struct chn_sim *cns, int chn, const float *RESTRICT pos3, const float *RESTRICT rot4) {
cns->chn_pos_x[chn] = pos3[0];
cns->chn_pos_y[chn] = pos3[1];
cns->chn_pos_z[chn] = pos3[2];
cns->chn_quat_x[chn] = rot4[0];
cns->chn_quat_y[chn] = rot4[1];
cns->chn_quat_z[chn] = rot4[2];
cns->chn_quat_w[chn] = rot4[3];
}
extern void
chn_sim_tel(struct chn_sim *cns, int chn, const float *RESTRICT pos3, const float *RESTRICT rot4) {
chn_sim_mov(cns, chn, pos3, rot4);
cns->chn_prev_pos_x[chn] = pos3[0];
cns->chn_prev_pos_y[chn] = pos3[1];
cns->chn_prev_pos_z[chn] = pos3[2];
cns->chn_prev_quat_x[chn] = rot4[0];
cns->chn_prev_quat_y[chn] = rot4[1];
cns->chn_prev_quat_z[chn] = rot4[2];
cns->chn_prev_quat_w[chn] = rot4[3];
}
extern void
chn_sim_grav(struct chn_sim *cns, int chn, const float *RESTRICT grav3) {
cns->chn_grav_x[chn] = grav3[0];
cns->chn_grav_y[chn] = grav3[1];
cns->chn_grav_z[chn] = grav3[2];
}
extern void
chn_sim_wind(struct chn_sim *cns, int chn, const float *RESTRICT wind3) {
cns->chn_wnd_x[chn] = wind3[0];
cns->chn_wnd_y[chn] = wind3[1];
cns->chn_wnd_z[chn] = wind3[2];
}
extern void
chn_sim_set_sph(struct chn_sim *cns, int chn, const float *RESTRICT spheres, int cnt) {
int s_base = chn * SPH_PER_CHN;
for (int i = 0; i < cnt && i < SPH_PER_CHN; ++i) {
cns->sph_x[s_base + i] = spheres[i*4+0];
cns->sph_y[s_base + i] = spheres[i*4+1];
cns->sph_z[s_base + i] = spheres[i*4+2];
cns->sph_r[s_base + i] = spheres[i*4+3];
}
for (int i = cnt; i < SPH_PER_CHN; ++i) {
cns->sph_r[s_base + i] = 0.0f;
}
}
extern void
chn_sim_con_motion(struct chn_sim *cns, int chn, const float *RESTRICT radius, int cnt) {
int p_base = chn * PTC_PER_CHN;
for (int i = 0; i < cnt && i < PTC_PER_CHN; ++i) {
int physical_i = (i < 8) ? (p_base + i) : (p_base + 8 + (i-8));
cns->motion_radius[physical_i] = radius[i];
}
}
// Fast Conjugate Quaternion Rotation: v' = v + 2*cross(-q, cross(-q, v) + q.w*v)
static ALWAYS_INLINE void
rot_vec_inv_batch8(__m256 *rx, __m256 *ry, __m256 *rz,
__m256 qx, __m256 qy, __m256 qz, __m256 qw,
__m256 vx, __m256 vy, __m256 vz) {
__m256 nqx = _mm256_sub_ps(_mm256_setzero_ps(), qx);
__m256 nqy = _mm256_sub_ps(_mm256_setzero_ps(), qy);
__m256 nqz = _mm256_sub_ps(_mm256_setzero_ps(), qz);
__m256 tx = _mm256_sub_ps(_mm256_mul_ps(nqy, vz), _mm256_mul_ps(nqz, vy));
__m256 ty = _mm256_sub_ps(_mm256_mul_ps(nqz, vx), _mm256_mul_ps(nqx, vz));
__m256 tz = _mm256_sub_ps(_mm256_mul_ps(nqx, vy), _mm256_mul_ps(nqy, vx));
tx = _mm256_add_ps(tx, tx);
ty = _mm256_add_ps(ty, ty);
tz = _mm256_add_ps(tz, tz);
__m256 cx = _mm256_sub_ps(_mm256_mul_ps(nqy, tz), _mm256_mul_ps(nqz, ty));
__m256 cy = _mm256_sub_ps(_mm256_mul_ps(nqz, tx), _mm256_mul_ps(nqx, tz));
__m256 cz = _mm256_sub_ps(_mm256_mul_ps(nqx, ty), _mm256_mul_ps(nqy, tx));
*rx = _mm256_add_ps(vx, _mm256_add_ps(_mm256_mul_ps(qw, tx), cx));
*ry = _mm256_add_ps(vy, _mm256_add_ps(_mm256_mul_ps(qw, ty), cy));
*rz = _mm256_add_ps(vz, _mm256_add_ps(_mm256_mul_ps(qw, tz), cz));
}
static ALWAYS_INLINE void
solve_dist(__m256 *p1x, __m256 *p1y, __m256 *p1z, __m256 invm1,
__m256 *p2x, __m256 *p2y, __m256 *p2z, __m256 invm2,
__m256 rest_len, __m256 compliance, __m256 *lambda, __m256 eps) {
// XPBD Distance Constraint
__m256 dx = _mm256_sub_ps(*p1x, *p2x);
__m256 dy = _mm256_sub_ps(*p1y, *p2y);
__m256 dz = _mm256_sub_ps(*p1z, *p2z);
__m256 d2 = _mm256_fmadd_ps(dx, dx, _mm256_fmadd_ps(dy, dy, _mm256_mul_ps(dz, dz)));
__m256 inv_d = _mm256_rsqrt_ps(_mm256_max_ps(d2, eps));
__m256 dist = _mm256_mul_ps(d2, inv_d);
__m256 C = _mm256_sub_ps(dist, rest_len);
__m256 w = _mm256_add_ps(invm1, invm2);
__m256 dL = _mm256_div_ps(
_mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(C, _mm256_mul_ps(compliance, *lambda))),
_mm256_add_ps(w, compliance)
);
// Mask out 0-length dummy constraints
__m256 mask = _mm256_cmp_ps(rest_len, _mm256_setzero_ps(), _CMP_GT_OQ);
dL = _mm256_and_ps(dL, mask);
*lambda = _mm256_add_ps(*lambda, dL);
__m256 cx = _mm256_mul_ps(_mm256_mul_ps(dx, inv_d), dL);
__m256 cy = _mm256_mul_ps(_mm256_mul_ps(dy, inv_d), dL);
__m256 cz = _mm256_mul_ps(_mm256_mul_ps(dz, inv_d), dL);
*p1x = _mm256_fmadd_ps(cx, invm1, *p1x);
*p1y = _mm256_fmadd_ps(cy, invm1, *p1y);
*p1z = _mm256_fmadd_ps(cz, invm1, *p1z);
*p2x = _mm256_fnmadd_ps(cx, invm2, *p2x);
*p2y = _mm256_fnmadd_ps(cy, invm2, *p2y);
*p2z = _mm256_fnmadd_ps(cz, invm2, *p2z);
}
// XPBD Angle Constraint (Dot Product with Target)
static ALWAYS_INLINE void
solve_angle(__m256 *p0x, __m256 *p0y, __m256 *p0z, __m256 invm0,
__m256 *p1x, __m256 *p1y, __m256 *p1z, __m256 invm1,
__m256 *p2x, __m256 *p2y, __m256 *p2z, __m256 invm2,
__m256 compliance, __m256 *lambda, __m256 target_cos, __m256 eps) {
__m256 e1x = _mm256_sub_ps(*p1x, *p0x);
__m256 e1y = _mm256_sub_ps(*p1y, *p0y);
__m256 e1z = _mm256_sub_ps(*p1z, *p0z);
__m256 l1_sq = _mm256_fmadd_ps(e1x, e1x, _mm256_fmadd_ps(e1y, e1y, _mm256_mul_ps(e1z, e1z)));
__m256 inv_l1 = _mm256_rsqrt_ps(_mm256_max_ps(l1_sq, eps));
__m256 e2x = _mm256_sub_ps(*p2x, *p1x);
__m256 e2y = _mm256_sub_ps(*p2y, *p1y);
__m256 e2z = _mm256_sub_ps(*p2z, *p1z);
__m256 l2_sq = _mm256_fmadd_ps(e2x, e2x, _mm256_fmadd_ps(e2y, e2y, _mm256_mul_ps(e2z, e2z)));
__m256 inv_l2 = _mm256_rsqrt_ps(_mm256_max_ps(l2_sq, eps));
__m256 n1x = _mm256_mul_ps(e1x, inv_l1);
__m256 n1y = _mm256_mul_ps(e1y, inv_l1);
__m256 n1z = _mm256_mul_ps(e1z, inv_l1);
__m256 n2x = _mm256_mul_ps(e2x, inv_l2);
__m256 n2y = _mm256_mul_ps(e2y, inv_l2);
__m256 n2z = _mm256_mul_ps(e2z, inv_l2);
__m256 dot = _mm256_fmadd_ps(n1x, n2x, _mm256_fmadd_ps(n1y, n2y, _mm256_mul_ps(n1z, n2z)));
__m256 C = _mm256_sub_ps(dot, target_cos);
__m256 t0x = _mm256_fnmadd_ps(dot, n1x, n2x);
__m256 t0y = _mm256_fnmadd_ps(dot, n1y, n2y);
__m256 t0z = _mm256_fnmadd_ps(dot, n1z, n2z);
__m256 gp0x = _mm256_mul_ps(t0x, inv_l1);
__m256 gp0y = _mm256_mul_ps(t0y, inv_l1);
__m256 gp0z = _mm256_mul_ps(t0z, inv_l1);
__m256 t2x = _mm256_fnmadd_ps(dot, n2x, n1x);
__m256 t2y = _mm256_fnmadd_ps(dot, n2y, n1y);
__m256 t2z = _mm256_fnmadd_ps(dot, n2z, n1z);
__m256 gp2x = _mm256_mul_ps(t2x, inv_l2);
__m256 gp2y = _mm256_mul_ps(t2y, inv_l2);
__m256 gp2z = _mm256_mul_ps(t2z, inv_l2);
__m256 gp1x = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(gp0x, gp2x));
__m256 gp1y = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(gp0y, gp2y));
__m256 gp1z = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(gp0z, gp2z));
__m256 w0 = _mm256_mul_ps(invm0, _mm256_fmadd_ps(gp0x, gp0x, _mm256_fmadd_ps(gp0y, gp0y, _mm256_mul_ps(gp0z, gp0z))));
__m256 w1 = _mm256_mul_ps(invm1, _mm256_fmadd_ps(gp1x, gp1x, _mm256_fmadd_ps(gp1y, gp1y, _mm256_mul_ps(gp1z, gp1z))));
__m256 w2 = _mm256_mul_ps(invm2, _mm256_fmadd_ps(gp2x, gp2x, _mm256_fmadd_ps(gp2y, gp2y, _mm256_mul_ps(gp2z, gp2z))));
__m256 den = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(w0, w1), w2), compliance);
__m256 num = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(C, _mm256_mul_ps(compliance, *lambda)));
__m256 dL = _mm256_div_ps(num, _mm256_max_ps(den, eps));
// Valid chain check
__m256 mask = _mm256_cmp_ps(l1_sq, eps, _CMP_GT_OQ);
dL = _mm256_and_ps(dL, mask);
*lambda = _mm256_add_ps(*lambda, dL);
*p0x = _mm256_fmadd_ps(_mm256_mul_ps(gp0x, dL), invm0, *p0x);
*p0y = _mm256_fmadd_ps(_mm256_mul_ps(gp0y, dL), invm0, *p0y);
*p0z = _mm256_fmadd_ps(_mm256_mul_ps(gp0z, dL), invm0, *p0z);
*p1x = _mm256_fmadd_ps(_mm256_mul_ps(gp1x, dL), invm1, *p1x);
*p1y = _mm256_fmadd_ps(_mm256_mul_ps(gp1y, dL), invm1, *p1y);
*p1z = _mm256_fmadd_ps(_mm256_mul_ps(gp1z, dL), invm1, *p1z);
*p2x = _mm256_fmadd_ps(_mm256_mul_ps(gp2x, dL), invm2, *p2x);
*p2y = _mm256_fmadd_ps(_mm256_mul_ps(gp2y, dL), invm2, *p2y);
*p2z = _mm256_fmadd_ps(_mm256_mul_ps(gp2z, dL), invm2, *p2z);
}
extern void
chn_sim_run(struct chn_sim *RESTRICT cns, float dt) {
assert(cns);
if (dt <= 1e-6f) {
return;
}
const __m256 dt_vec = _mm256_set1_ps(dt);
const __m256 dt2_vec = _mm256_set1_ps(dt * dt);
const __m256 eps_vec = _mm256_set1_ps(1e-6f);
const __m256 zero_vec = _mm256_setzero_ps();
const __m256 one_vec = _mm256_set1_ps(1.0f);
const __m256i shift_left = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
const __m256i shift_right = _mm256_setr_epi32(7, 0, 1, 2, 3, 4, 5, 6);
ALIGN_VAR(32) float batch_fx[8], batch_fy[8], batch_fz[8];
ALIGN_VAR(32) float batch_cvx[8], batch_cvy[8], batch_cvz[8];
for (int c_blk = 0; c_blk < MAX_CHNS; c_blk += 8) {
/* --- PHASE 1: Batched Global Forces --- */
{
__m256 qx = _mm256_load_ps(&cns->chn_quat_x[c_blk]);
__m256 qy = _mm256_load_ps(&cns->chn_quat_y[c_blk]);
__m256 qz = _mm256_load_ps(&cns->chn_quat_z[c_blk]);
__m256 qw = _mm256_load_ps(&cns->chn_quat_w[c_blk]);
__m256 fx = _mm256_add_ps(_mm256_load_ps(&cns->chn_wnd_x[c_blk]), _mm256_load_ps(&cns->chn_grav_x[c_blk]));
__m256 fy = _mm256_add_ps(_mm256_load_ps(&cns->chn_wnd_y[c_blk]), _mm256_load_ps(&cns->chn_grav_y[c_blk]));
__m256 fz = _mm256_add_ps(_mm256_load_ps(&cns->chn_wnd_z[c_blk]), _mm256_load_ps(&cns->chn_grav_z[c_blk]));
__m256 lfx, lfy, lfz;
rot_vec_inv_batch8(&lfx, &lfy, &lfz, qx, qy, qz, qw, fx, fy, fz);
_mm256_store_ps(batch_fx, lfx);
_mm256_store_ps(batch_fy, lfy);
_mm256_store_ps(batch_fz, lfz);
__m256 cx = _mm256_load_ps(&cns->chn_pos_x[c_blk]);
__m256 cy = _mm256_load_ps(&cns->chn_pos_y[c_blk]);
__m256 cz = _mm256_load_ps(&cns->chn_pos_z[c_blk]);
__m256 px = _mm256_load_ps(&cns->chn_prev_pos_x[c_blk]);
__m256 py = _mm256_load_ps(&cns->chn_prev_pos_y[c_blk]);
__m256 pz = _mm256_load_ps(&cns->chn_prev_pos_z[c_blk]);
_mm256_store_ps(batch_cvx, _mm256_sub_ps(cx, px));
_mm256_store_ps(batch_cvy, _mm256_sub_ps(cy, py));
_mm256_store_ps(batch_cvz, _mm256_sub_ps(cz, pz));
_mm256_store_ps(&cns->chn_prev_pos_x[c_blk], cx);
_mm256_store_ps(&cns->chn_prev_pos_y[c_blk], cy);
_mm256_store_ps(&cns->chn_prev_pos_z[c_blk], cz);
_mm256_store_ps(&cns->chn_prev_quat_x[c_blk], qx);
_mm256_store_ps(&cns->chn_prev_quat_y[c_blk], qy);
_mm256_store_ps(&cns->chn_prev_quat_z[c_blk], qz);
_mm256_store_ps(&cns->chn_prev_quat_w[c_blk], qw);
}
/* --- PHASE 2: Local Simulation --- */
for (int i = 0; i < 8; ++i) {
int c = c_blk + i;
struct chn_cfg *cfg = &cns->chn_cfg[c];
int p_base = c * PTC_PER_CHN;
// Config
__m256 damp = _mm256_set1_ps(cfg->damping);
__m256 fric_coef = _mm256_set1_ps(cfg->friction);
__m256 compliance = _mm256_div_ps(dt2_vec, _mm256_max_ps(_mm256_set1_ps(cfg->stiffness), eps_vec));
__m256 bend_compl = _mm256_div_ps(dt2_vec, _mm256_max_ps(_mm256_set1_ps(cfg->bend_stiffness), eps_vec));
__m256 root_blend = _mm256_set1_ps(cfg->root_motion_blend);
// Art control: rest_curvature (0=straight 1.0, 0.5=60deg)
__m256 target_cos = _mm256_set1_ps(1.0f - (cfg->rest_curvature * 0.5f));
// Forces
__m256 force_x = _mm256_set1_ps(batch_fx[i]);
__m256 force_y = _mm256_set1_ps(batch_fy[i]);
__m256 force_z = _mm256_set1_ps(batch_fz[i]);
__m256 root_vel_x = _mm256_set1_ps(batch_cvx[i]);
__m256 root_vel_y = _mm256_set1_ps(batch_cvy[i]);
__m256 root_vel_z = _mm256_set1_ps(batch_cvz[i]);
__m256 inertia_drag = _mm256_set1_ps(1.0f - cfg->linear_inertia);
__m256 drag_x = _mm256_mul_ps(root_vel_x, inertia_drag);
__m256 drag_y = _mm256_mul_ps(root_vel_y, inertia_drag);
__m256 drag_z = _mm256_mul_ps(root_vel_z, inertia_drag);
__m256 p_pos[3][2], p_prev[3][2], inv_mass[2];
__m256 rl[2], lm[2], blm[2];
UNROLL_HINT
for (int k=0; k<2; ++k) {
int off = p_base + (k * 8);
int r_off = (c * CON_PER_CHN) + (k * 8);
p_pos[0][k] = _mm256_load_ps(&cns->ptc_pos_x[off]);
p_pos[1][k] = _mm256_load_ps(&cns->ptc_pos_y[off]);
p_pos[2][k] = _mm256_load_ps(&cns->ptc_pos_z[off]);
p_prev[0][k] = _mm256_load_ps(&cns->ptc_prev_pos_x[off]);
p_prev[1][k] = _mm256_load_ps(&cns->ptc_prev_pos_y[off]);
p_prev[2][k] = _mm256_load_ps(&cns->ptc_prev_pos_z[off]);
inv_mass[k] = _mm256_load_ps(&cns->ptc_inv_mass[off]);
rl[k] = _mm256_load_ps(&cns->ptc_rest_len[r_off]);
lm[k] = _mm256_load_ps(&cns->ptc_lambda[r_off]);
blm[k] = _mm256_load_ps(&cns->ptc_bend_lambda[r_off]);
}
/* --- Verlet + Root Motion --- */
UNROLL_HINT
for (int k=0; k<2; ++k) {
__m256 vx = _mm256_sub_ps(p_pos[0][k], p_prev[0][k]);
__m256 vy = _mm256_sub_ps(p_pos[1][k], p_prev[1][k]);
__m256 vz = _mm256_sub_ps(p_pos[2][k], p_prev[2][k]);
vx = _mm256_sub_ps(vx, drag_x);
vy = _mm256_sub_ps(vy, drag_y);
vz = _mm256_sub_ps(vz, drag_z);
vx = _mm256_fmadd_ps(root_vel_x, root_blend, vx);
vy = _mm256_fmadd_ps(root_vel_y, root_blend, vy);
vz = _mm256_fmadd_ps(root_vel_z, root_blend, vz);
p_prev[0][k] = p_pos[0][k];
p_prev[1][k] = p_pos[1][k];
p_prev[2][k] = p_pos[2][k];
p_pos[0][k] = _mm256_fmadd_ps(_mm256_mul_ps(force_x, inv_mass[k]), dt2_vec, _mm256_fmadd_ps(vx, damp, p_pos[0][k]));
p_pos[1][k] = _mm256_fmadd_ps(_mm256_mul_ps(force_y, inv_mass[k]), dt2_vec, _mm256_fmadd_ps(vy, damp, p_pos[1][k]));
p_pos[2][k] = _mm256_fmadd_ps(_mm256_mul_ps(force_z, inv_mass[k]), dt2_vec, _mm256_fmadd_ps(vz, damp, p_pos[2][k]));
}
/* --- Constraints --- */
for (int iter = 0; iter < MAX_ITR; ++iter) {
// Distance
UNROLL_HINT
for (int k=0; k<2; ++k) {
__m256 p2x, p2y, p2z, p2m;
if (k == 0) {
p2x = p_pos[0][1];
p2y = p_pos[1][1];
p2z = p_pos[2][1];
p2m = inv_mass[1];
} else {
p2x = _mm256_permutevar8x32_ps(p_pos[0][0], shift_left);
p2y = _mm256_permutevar8x32_ps(p_pos[1][0], shift_left);
p2z = _mm256_permutevar8x32_ps(p_pos[2][0], shift_left);
p2m = _mm256_permutevar8x32_ps(inv_mass[0], shift_left);
}
__m256 old_p2x = p2x, old_p2y = p2y, old_p2z = p2z;
solve_dist(&p_pos[0][k], &p_pos[1][k], &p_pos[2][k], inv_mass[k],
&p2x, &p2y, &p2z, p2m, rl[k], compliance, &lm[k], eps_vec);
if (k == 0) {
p_pos[0][1] = p2x;
p_pos[1][1] = p2y;
p_pos[2][1] = p2z;
} else {
__m256 dx = _mm256_sub_ps(p2x, old_p2x);
__m256 dy = _mm256_sub_ps(p2y, old_p2y);
__m256 dz = _mm256_sub_ps(p2z, old_p2z);
p_pos[0][0] = _mm256_add_ps(p_pos[0][0], _mm256_permutevar8x32_ps(dx, shift_right));
p_pos[1][0] = _mm256_add_ps(p_pos[1][0], _mm256_permutevar8x32_ps(dy, shift_right));
p_pos[2][0] = _mm256_add_ps(p_pos[2][0], _mm256_permutevar8x32_ps(dz, shift_right));
}
}
// Bending (Group 1: Odd Center)
{
__m256 r_px = _mm256_permutevar8x32_ps(p_pos[0][0], shift_left);
__m256 r_py = _mm256_permutevar8x32_ps(p_pos[1][0], shift_left);
__m256 r_pz = _mm256_permutevar8x32_ps(p_pos[2][0], shift_left);
__m256 r_pm = _mm256_permutevar8x32_ps(inv_mass[0], shift_left);
__m256 old_r_px = r_px, old_r_py = r_py, old_r_pz = r_pz;
solve_angle(&p_pos[0][0], &p_pos[1][0], &p_pos[2][0], inv_mass[0],
&p_pos[0][1], &p_pos[1][1], &p_pos[2][1], inv_mass[1],
&r_px, &r_py, &r_pz, r_pm,
bend_compl, &blm[1], target_cos, eps_vec);
__m256 dx = _mm256_sub_ps(r_px, old_r_px);
__m256 dy = _mm256_sub_ps(r_py, old_r_py);
__m256 dz = _mm256_sub_ps(r_pz, old_r_pz);
p_pos[0][0] = _mm256_add_ps(p_pos[0][0], _mm256_permutevar8x32_ps(dx, shift_right));
p_pos[1][0] = _mm256_add_ps(p_pos[1][0], _mm256_permutevar8x32_ps(dy, shift_right));
p_pos[2][0] = _mm256_add_ps(p_pos[2][0], _mm256_permutevar8x32_ps(dz, shift_right));
}
// Bending (Group 2: Even Center)
{
__m256 l_px = _mm256_permutevar8x32_ps(p_pos[0][1], shift_right);
__m256 l_py = _mm256_permutevar8x32_ps(p_pos[1][1], shift_right);
__m256 l_pz = _mm256_permutevar8x32_ps(p_pos[2][1], shift_right);
__m256 l_pm = _mm256_permutevar8x32_ps(inv_mass[1], shift_right);
__m256 old_l_px = l_px, old_l_py = l_py, old_l_pz = l_pz;
solve_angle(&l_px, &l_py, &l_pz, l_pm,
&p_pos[0][0], &p_pos[1][0], &p_pos[2][0], inv_mass[0],
&p_pos[0][1], &p_pos[1][1], &p_pos[2][1], inv_mass[1],
bend_compl, &blm[0], target_cos, eps_vec);
__m256 dx = _mm256_sub_ps(l_px, old_l_px);
__m256 dy = _mm256_sub_ps(l_py, old_l_py);
__m256 dz = _mm256_sub_ps(l_pz, old_l_pz);
p_pos[0][1] = _mm256_add_ps(p_pos[0][1], _mm256_permutevar8x32_ps(dx, shift_left));
p_pos[1][1] = _mm256_add_ps(p_pos[1][1], _mm256_permutevar8x32_ps(dy, shift_left));
p_pos[2][1] = _mm256_add_ps(p_pos[2][1], _mm256_permutevar8x32_ps(dz, shift_left));
}
}
/* --- Collisions --- */
int s_base = c * SPH_PER_CHN;
for (int s = 0; s < SPH_PER_CHN; ++s) {
__m256 sr = _mm256_broadcast_ss(&cns->sph_r[s_base + s]);
if (_mm256_movemask_ps(_mm256_cmp_ps(sr, zero_vec, _CMP_GT_OQ)) == 0) {
continue;
}
__m256 sx = _mm256_broadcast_ss(&cns->sph_x[s_base + s]);
__m256 sy = _mm256_broadcast_ss(&cns->sph_y[s_base + s]);
__m256 sz = _mm256_broadcast_ss(&cns->sph_z[s_base + s]);
UNROLL_HINT
for (int k=0; k<2; ++k) {
__m256 dx = _mm256_sub_ps(p_pos[0][k], sx);
__m256 dy = _mm256_sub_ps(p_pos[1][k], sy);
__m256 dz = _mm256_sub_ps(p_pos[2][k], sz);
__m256 d2 = _mm256_fmadd_ps(dx, dx, _mm256_fmadd_ps(dy, dy, _mm256_mul_ps(dz, dz)));
__m256 id = _mm256_rsqrt_ps(_mm256_max_ps(d2, eps_vec));
__m256 pen = _mm256_sub_ps(sr, _mm256_mul_ps(d2, id));
__m256 mask = _mm256_cmp_ps(pen, zero_vec, _CMP_GT_OQ);
__m256 push = _mm256_mul_ps(pen, mask);
__m256 nx = _mm256_mul_ps(dx, id);
__m256 ny = _mm256_mul_ps(dy, id);
__m256 nz = _mm256_mul_ps(dz, id);
p_pos[0][k] = _mm256_fmadd_ps(nx, push, p_pos[0][k]);
p_pos[1][k] = _mm256_fmadd_ps(ny, push, p_pos[1][k]);
p_pos[2][k] = _mm256_fmadd_ps(nz, push, p_pos[2][k]);
// Friction
__m256 vx = _mm256_sub_ps(p_pos[0][k], p_prev[0][k]);
__m256 vy = _mm256_sub_ps(p_pos[1][k], p_prev[1][k]);
__m256 vz = _mm256_sub_ps(p_pos[2][k], p_prev[2][k]);
__m256 vdn = _mm256_fmadd_ps(vx, nx, _mm256_fmadd_ps(vy, ny, _mm256_mul_ps(vz, nz)));
__m256 f_scale = _mm256_mul_ps(push, fric_coef);
p_pos[0][k] = _mm256_fnmadd_ps(_mm256_fnmadd_ps(vdn, nx, vx), f_scale, p_pos[0][k]);
p_pos[1][k] = _mm256_fnmadd_ps(_mm256_fnmadd_ps(vdn, ny, vy), f_scale, p_pos[1][k]);
p_pos[2][k] = _mm256_fnmadd_ps(_mm256_fnmadd_ps(vdn, nz, vz), f_scale, p_pos[2][k]);
}
}
/* --- Store --- */
UNROLL_HINT
for (int k=0; k<2; ++k) {
int off = p_base + (k * 8);
int r_off = (c * CON_PER_CHN) + (k * 8);
_mm256_store_ps(&cns->ptc_pos_x[off], p_pos[0][k]);
_mm256_store_ps(&cns->ptc_pos_y[off], p_pos[1][k]);
_mm256_store_ps(&cns->ptc_pos_z[off], p_pos[2][k]);
_mm256_store_ps(&cns->ptc_prev_pos_x[off], p_prev[0][k]);
_mm256_store_ps(&cns->ptc_prev_pos_y[off], p_prev[1][k]);
_mm256_store_ps(&cns->ptc_prev_pos_z[off], p_prev[2][k]);
_mm256_store_ps(&cns->ptc_lambda[r_off], lm[k]);
_mm256_store_ps(&cns->ptc_bend_lambda[r_off], blm[k]);
}
}
}
}
#include <SDL2/SDL.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "chain.c"
#define WINDOW_WIDTH 800
#define WINDOW_HEIGHT 600
#define CHAIN_LENGTH 10
#define PARTICLE_SPACING 10.0f
static void
project(float x, float y, float z, int *screen_x, int *screen_y) {
*screen_x = (int)(x + WINDOW_WIDTH / 2.0f);
*screen_y = (int)(-y + WINDOW_HEIGHT / 2.0f);
}
// Helper: Rotate vector by inv_quat and translate by -chain_pos (scalar for simplicity)
static void quat_rotate_inv_translate(float wx, float wy, float wz, // world pos
float cx, float cy, float cz, // chain pos
float qx, float qy, float qz, float qw, // chain quat
float *mx, float *my, float *mz) { // out model pos
// Translate: world - chain
float tx = wx - cx, ty = wy - cy, tz = wz - cz;
// Quat conj: (qw, -qx, -qy, -qz)
float cqw = qw, cqx = -qx, cqy = -qy, cqz = -qz;
// t = conj * (0, tx,ty,tz)
float tw = cqx*tx + cqy*ty + cqz*tz;
float t_x = cqw*tx + cqy*tz - cqz*ty;
float t_y = cqw*ty + cqz*tx - cqx*tz;
float t_z = cqw*tz + cqx*ty - cqy*tx;
// model = t * quat (vec part)
*mx = qw*t_x + qy*t_z - qz*t_y + tw*qx;
*my = qw*t_y + qz*t_x - qx*t_z + tw*qy;
*mz = qw*t_z + qx*t_y - qy*t_x + tw*qz;
}
extern int
main(int argc, char *argv[]) {
if (SDL_Init(SDL_INIT_VIDEO) < 0) {
printf("SDL_Init failed: %s\n", SDL_GetError());
return 1;
}
SDL_Window *window = SDL_CreateWindow("Chain Simulation", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, WINDOW_WIDTH, WINDOW_HEIGHT, 0);
SDL_Renderer *renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
if (!window || !renderer) {
printf("SDL_CreateWindow/Renderer failed: %s\n", SDL_GetError());
SDL_Quit();
return 1;
}
struct chn_sim sim;
chn_sim_init(&sim);
struct chn_cfg cfg = {
.damping = 0.5f,
.stiffness = 0.9f,
.friction = 0.5f,
.linear_inertia = 0.9f, // Tune 0.5f for more tangential lag
.angular_inertia = 0.9f,
.centrifugal_inertia = 0.9f, // Tune 0.5f for more outward bow
.bend_stiffness = 0.8f,
};
float rest_pos[CHAIN_LENGTH * 4];
for (int i = 0; i < CHAIN_LENGTH; i++) {
rest_pos[i*4 + 0] = 0.0f;
rest_pos[i*4 + 1] = -i * PARTICLE_SPACING;
rest_pos[i*4 + 2] = 0.0f;
rest_pos[i*4 + 3] = i == 0 ? 0.0f : 1.0f;
}
int chn = chn_sim_add(&sim, &cfg, rest_pos, CHAIN_LENGTH);
chn_sim_grav(&sim, chn, (float[]){0.0f, -1.0f, 0.0f}); // Reduced for spin test
chn_sim_tel(&sim, chn, (float[]){50.0f, 0.0f, 0.0f}, (float[]){0.0f, 0.0f, 0.0f, 1.0f}); // Start at circle edge
// Fixed world spheres (triangle outside circle)
float world_spheres[12] = { // 3 spheres * 4 floats (x,y,z,r)
60.0f, 0.0f, 0.0f, 10.0f, // Sphere 0: right
-30.0f, 52.0f, 0.0f, 10.0f, // Sphere 1: top-left
-30.0f,-52.0f, 0.0f, 10.0f // Sphere 2: bottom-left
};
// Circular motion params
float time = 0.0f;
float circle_radius = 50.0f;
float angular_speed = 2.0f * M_PI / 5.0f; // Full circle every 5s
float dt = 0.016f;
int running = 1;
SDL_Event event;
// Debug: Check gravity and masses
int p_base = chn * PTC_PER_CHN;
while (running) {
while (SDL_PollEvent(&event)) {
if (event.type == SDL_QUIT) {
running = 0;
}
}
time += dt;
// Circular anchor motion
float anchor_x = circle_radius * cosf(angular_speed * time);
float anchor_y = circle_radius * sinf(angular_speed * time);
chn_sim_mov(&sim, chn, (float[]){anchor_x, anchor_y, 0.0f}, (float[]){0.0f, 0.0f, 0.0f, 1.0f});
// Transform fixed world spheres to current model space
float model_spheres[12];
for (int s = 0; s < 3; ++s) {
float wx = world_spheres[s*4 + 0], wy = world_spheres[s*4 + 1], wz = world_spheres[s*4 + 2];
float r = world_spheres[s*4 + 3];
float cx = sim.chn_pos_x[chn], cy = sim.chn_pos_y[chn], cz = sim.chn_pos_z[chn];
float qx = sim.chn_quat_x[chn], qy = sim.chn_quat_y[chn], qz = sim.chn_quat_z[chn], qw = sim.chn_quat_w[chn];
float mx, my, mz;
quat_rotate_inv_translate(wx, wy, wz, cx, cy, cz, qx, qy, qz, qw, &mx, &my, &mz);
model_spheres[s*4 + 0] = mx;
model_spheres[s*4 + 1] = my;
model_spheres[s*4 + 2] = mz;
model_spheres[s*4 + 3] = r;
}
chn_sim_set_sph(&sim, chn, model_spheres, 3);
chn_sim_run(&sim, dt);
SDL_SetRenderDrawColor(renderer, 0, 0, 0, 255);
SDL_RenderClear(renderer);
SDL_SetRenderDrawColor(renderer, 0, 255, 0, 255); // Green for chain & spheres
float root_x = sim.chn_pos_x[chn]; // Current root world x/y
float root_y = sim.chn_pos_y[chn];
// Draw chain segments
for (int i = 0; i < CHAIN_LENGTH - 1; i++) {
int idx0 = (i % 2 == 0 ? i / 2 : 8 + (i - 1) / 2);
int idx1 = ((i + 1) % 2 == 0 ? (i + 1) / 2 : 8 + i / 2);
float model_x0 = sim.ptc_pos_x[p_base + idx0], model_y0 = sim.ptc_pos_y[p_base + idx0];
float model_x1 = sim.ptc_pos_x[p_base + idx1], model_y1 = sim.ptc_pos_y[p_base + idx1];
float world_x0 = model_x0 + root_x;
float world_y0 = model_y0 + root_y;
float world_x1 = model_x1 + root_x;
float world_y1 = model_y1 + root_y;
int sx0, sy0, sx1, sy1;
project(world_x0, world_y0, 0, &sx0, &sy0);
project(world_x1, world_y1, 0, &sx1, &sy1);
SDL_RenderDrawLine(renderer, sx0, sy0, sx1, sy1);
}
// Draw spheres (approx circle outline with 32 points)
int p_base = chn * PTC_PER_CHN;
int s_base = chn * SPH_PER_CHN;
for (int s = 0; s < 3; ++s) { // Only draw active spheres
float sph_model_x = sim.sph_x[s_base + s];
float sph_model_y = sim.sph_y[s_base + s];
float sph_r = sim.sph_r[s_base + s];
if (sph_r <= 0.0f) continue;
float sph_world_x = sph_model_x + root_x;
float sph_world_y = sph_model_y + root_y;
int sph_sx, sph_sy;
project(sph_world_x, sph_world_y, 0, &sph_sx, &sph_sy);
// Parametric circle: 32 points
const int num_points = 32;
for (int pt = 0; pt < num_points; ++pt) {
float angle0 = 2.0f * M_PI * pt / num_points;
float angle1 = 2.0f * M_PI * (pt + 1) / num_points;
float dx0 = sph_r * cosf(angle0), dy0 = sph_r * sinf(angle0);
float dx1 = sph_r * cosf(angle1), dy1 = sph_r * sinf(angle1);
int px0, py0, px1, py1;
project(sph_world_x + dx0, sph_world_y + dy0, 0, &px0, &py0);
project(sph_world_x + dx1, sph_world_y + dy1, 0, &px1, &py1);
SDL_RenderDrawLine(renderer, px0, py0, px1, py1);
}
}
SDL_RenderPresent(renderer);
SDL_Delay(16);
}
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
SDL_Quit();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment