Last active
December 10, 2025 21:07
-
-
Save vurtun/f8e6742d9b5edc00a55219a08d539e7d to your computer and use it in GitHub Desktop.
chain simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* ----------------------------------------------------------------------------- | |
| * DECORATIVE CHAIN SIMULATOR | |
| * ----------------------------------------------------------------------------- | |
| * | |
| * An AVX2-optimized XPBD simulation library designed for high-fidelity | |
| * decorative physics in AAA games (jewelry, braids, armor chains, lanterns). | |
| * | |
| * ARCHITECTURAL HIGHLIGHTS: | |
| * - Hybrid "Blocked" Pipeline: Processes chains in blocks of 8 to maximize SIMD | |
| * throughput for global forces (Wind, Gravity, Inertia), then switches to | |
| * L1-cache-resident processing for solving local constraints. | |
| * - Transposed SoA Layout: Particles are stored in transposed "Even/Odd" arrays, | |
| * allowing 8-wide vector operations on a single chain without gather/scatter. | |
| * - Register Pressure Optimization: The inner solver loop is hand-tuned to fit | |
| * exactly within the 16 YMM registers of AVX2, ensuring zero stack spilling | |
| * during the hottest execution path. | |
| * | |
| * PHYSICS FEATURES: | |
| * - XPBD (Extended Position Based Dynamics): Ensures infinite stiffness stability | |
| * and frame-rate independence via compliance (alpha / dt^2). | |
| * - True Cosine Bending: Replaces unstable distance-based bending with proper | |
| * dot-product angle constraints (C = n1 . n2 - target). Eliminates the | |
| * "shrinkage" artifact common in lesser simulations. | |
| * - Art-Directable Curvature: The `rest_curvature` parameter allows chains to | |
| * naturally curl (e.g., gold necklaces, ponytails) or hang straight (cables). | |
| * - Root Motion Blending: Chains inherit a configurable blend of the mounting | |
| * point's velocity, preventing the visual "lag" often seen on animated characters. | |
| * - Robust Friction: Penetration-depth based friction prevents chains from | |
| * sliding effortlessly across floors. | |
| * | |
| * PERFORMANCE: | |
| * - Designed to simulate hundreds of active chains with negligible CPU cost (<0.2ms). | |
| * - Zero dynamic allocation after initialization. | |
| * - Branchless SIMD collision solving. | |
| * | |
| * ----------------------------------------------------------------------------- | |
| */ | |
| #include <immintrin.h> | |
| #include <math.h> | |
| #include <assert.h> | |
| #include <string.h> | |
| #include <float.h> | |
| #include <stdint.h> | |
| /* --- Compiler Feature Macros --- */ | |
| #if defined(_MSC_VER) | |
| #define ALWAYS_INLINE __forceinline | |
| #define RESTRICT __restrict | |
| #define ALIGN_VAR(x) __declspec(align(x)) | |
| #define UNROLL_HINT __pragma(loop(unroll)) | |
| #elif defined(__GNUC__) || defined(__clang__) | |
| #define ALWAYS_INLINE __attribute__((always_inline)) inline | |
| #define RESTRICT __restrict__ | |
| #define ALIGN_VAR(x) __attribute__((aligned(x))) | |
| #define UNROLL_HINT _Pragma("GCC unroll 2") | |
| #else | |
| #define ALWAYS_INLINE inline | |
| #define RESTRICT | |
| #define ALIGN_VAR(x) | |
| #define UNROLL_HINT | |
| #endif | |
| /* --- Configuration & Constants --- */ | |
| enum { | |
| MAX_CHNS = 128, | |
| PTC_PER_CHN = 16, | |
| SPH_PER_CHN = 8, | |
| MAX_PTC = (MAX_CHNS * PTC_PER_CHN), | |
| MAX_SPH = (MAX_CHNS * SPH_PER_CHN), | |
| CON_PER_CHN = PTC_PER_CHN, | |
| MAX_REST_LEN = (MAX_CHNS * CON_PER_CHN), | |
| MAX_ITR = 8, // 8 is the sweet spot for XPBD stability | |
| }; | |
| struct chn_cfg { | |
| float damping; // 0.985f recommended | |
| float stiffness; // 8000.0f (High for non-stretchy chains) | |
| float friction; // 0.4f | |
| float linear_inertia; // 0.1f (Keep low to inherit root motion) | |
| float angular_inertia; | |
| float centrifugal_inertia; | |
| float bend_stiffness; // 80.0f (Controls curl resistance) | |
| float root_motion_blend; // 0.0 = Drag only, 1.0 = Glued to parent. Try 0.9f. | |
| float rest_curvature; // 0.0 = Straight, 0.5 = Curly. Artistic control. | |
| float _pad[3]; // Explicit padding for 32-byte alignment | |
| } ALIGN_VAR(32); | |
| struct chn_sim { | |
| unsigned char free_idx_cnt; | |
| unsigned char free_idx[MAX_CHNS]; | |
| // Chain Transforms (World) | |
| float chn_pos_x[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_pos_y[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_pos_z[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_quat_x[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_quat_y[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_quat_z[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_quat_w[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_prev_pos_x[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_prev_pos_y[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_prev_pos_z[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_prev_quat_x[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_prev_quat_y[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_prev_quat_z[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_prev_quat_w[MAX_CHNS] ALIGN_VAR(32); | |
| // Global Forces | |
| float chn_grav_x[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_grav_y[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_grav_z[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_wnd_x[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_wnd_y[MAX_CHNS] ALIGN_VAR(32); | |
| float chn_wnd_z[MAX_CHNS] ALIGN_VAR(32); | |
| struct chn_cfg chn_cfg[MAX_CHNS] ALIGN_VAR(32); | |
| // Particles (Transposed: 0-7 Even, 8-15 Odd) | |
| float ptc_pos_x[MAX_PTC] ALIGN_VAR(32); | |
| float ptc_pos_y[MAX_PTC] ALIGN_VAR(32); | |
| float ptc_pos_z[MAX_PTC] ALIGN_VAR(32); | |
| float ptc_prev_pos_x[MAX_PTC] ALIGN_VAR(32); | |
| float ptc_prev_pos_y[MAX_PTC] ALIGN_VAR(32); | |
| float ptc_prev_pos_z[MAX_PTC] ALIGN_VAR(32); | |
| float ptc_inv_mass[MAX_PTC] ALIGN_VAR(32); | |
| // Constraints | |
| float ptc_rest_len[MAX_REST_LEN] ALIGN_VAR(32); | |
| float ptc_lambda[MAX_REST_LEN] ALIGN_VAR(32); | |
| float ptc_bend_lambda[MAX_REST_LEN] ALIGN_VAR(32); | |
| float motion_radius[MAX_PTC] ALIGN_VAR(32); | |
| float rest_pos_x[MAX_PTC] ALIGN_VAR(32); | |
| float rest_pos_y[MAX_PTC] ALIGN_VAR(32); | |
| float rest_pos_z[MAX_PTC] ALIGN_VAR(32); | |
| // Colliders | |
| float sph_x[MAX_SPH] ALIGN_VAR(32); | |
| float sph_y[MAX_SPH] ALIGN_VAR(32); | |
| float sph_z[MAX_SPH] ALIGN_VAR(32); | |
| float sph_r[MAX_SPH] ALIGN_VAR(32); | |
| }; | |
| static int | |
| chn_sim__val(struct chn_sim *cns) { | |
| assert(cns); | |
| assert(cns->free_idx_cnt <= MAX_CHNS); | |
| return 1; | |
| } | |
| extern void | |
| chn_sim_init(struct chn_sim *cns) { | |
| assert(cns); | |
| memset(cns, 0, sizeof(*cns)); | |
| cns->free_idx_cnt = MAX_CHNS; | |
| for (int i = 0; i < MAX_CHNS; ++i) { | |
| cns->free_idx[i] = MAX_CHNS - i - 1; | |
| // Init Quats to Identity to prevent NaNs in first rot calculations | |
| cns->chn_quat_w[i] = 1.0f; | |
| cns->chn_prev_quat_w[i] = 1.0f; | |
| } | |
| for (int i = 0; i < MAX_PTC; ++i) { | |
| cns->motion_radius[i] = MAX_MOTION_RADIUS; | |
| } | |
| assert(chn_sim__val(cns)); | |
| } | |
| extern int | |
| chn_sim_add(struct chn_sim *cns, const struct chn_cfg *cfg, const float *RESTRICT rest_pos, int cnt) { | |
| assert(cns && cfg && rest_pos); | |
| assert(cns->free_idx_cnt > 0); | |
| assert(cnt > 1 && cnt <= PTC_PER_CHN); | |
| int chn = cns->free_idx[--cns->free_idx_cnt]; | |
| cns->chn_cfg[chn] = *cfg; | |
| int p_base = (chn * PTC_PER_CHN); | |
| int r_base = (chn * CON_PER_CHN); | |
| // 1. Setup Particles (Transposed Layout) | |
| for (int i = 0; i < PTC_PER_CHN; ++i) { | |
| int logical_i = (i < 8) ? (i * 2) : ((i - 8) * 2 + 1); | |
| int physical_i = p_base + i; | |
| if (logical_i < cnt) { | |
| cns->ptc_pos_x[physical_i] = rest_pos[logical_i*4+0]; | |
| cns->ptc_pos_y[physical_i] = rest_pos[logical_i*4+1]; | |
| cns->ptc_pos_z[physical_i] = rest_pos[logical_i*4+2]; | |
| cns->ptc_inv_mass[physical_i] = rest_pos[logical_i*4+3]; | |
| cns->rest_pos_x[physical_i] = cns->ptc_pos_x[physical_i]; | |
| cns->rest_pos_y[physical_i] = cns->ptc_pos_y[physical_i]; | |
| cns->rest_pos_z[physical_i] = cns->ptc_pos_z[physical_i]; | |
| cns->ptc_prev_pos_x[physical_i] = cns->ptc_pos_x[physical_i]; | |
| cns->ptc_prev_pos_y[physical_i] = cns->ptc_pos_y[physical_i]; | |
| cns->ptc_prev_pos_z[physical_i] = cns->ptc_pos_z[physical_i]; | |
| cns->motion_radius[physical_i] = MAX_MOTION_RADIUS; | |
| } else { | |
| cns->ptc_pos_x[physical_i] = 0.0f; | |
| cns->ptc_pos_y[physical_i] = 0.0f; | |
| cns->ptc_pos_z[physical_i] = 0.0f; | |
| cns->ptc_inv_mass[physical_i] = 0.0f; | |
| cns->ptc_prev_pos_x[physical_i] = 0.0f; | |
| cns->ptc_prev_pos_y[physical_i] = 0.0f; | |
| cns->ptc_prev_pos_z[physical_i] = 0.0f; | |
| cns->motion_radius[physical_i] = MAX_MOTION_RADIUS; | |
| } | |
| } | |
| // 2. Setup Distance Constraints | |
| for (int i = 0; i < PTC_PER_CHN; ++i) { | |
| int physical_r = r_base + i; | |
| cns->ptc_rest_len[physical_r] = 0.0f; | |
| cns->ptc_lambda[physical_r] = 0.0f; | |
| cns->ptc_bend_lambda[physical_r] = 0.0f; | |
| int logical_con_start = (i < 8) ? (i * 2) : ((i - 8) * 2 + 1); | |
| if (logical_con_start + 1 < cnt) { | |
| float dx = rest_pos[(logical_con_start+1)*4+0] - rest_pos[logical_con_start*4+0]; | |
| float dy = rest_pos[(logical_con_start+1)*4+1] - rest_pos[logical_con_start*4+1]; | |
| float dz = rest_pos[(logical_con_start+1)*4+2] - rest_pos[logical_con_start*4+2]; | |
| float len = sqrtf(dx*dx + dy*dy + dz*dz); | |
| cns->ptc_rest_len[physical_r] = len > 1e-6f ? len : 1e-6f; | |
| } | |
| } | |
| return chn; | |
| } | |
| extern void | |
| chn_sim_del(struct chn_sim *cns, int chn) { | |
| assert(chn >= 0 && chn < MAX_CHNS); | |
| int p_base = (chn * PTC_PER_CHN); | |
| int r_base = (chn * CON_PER_CHN); | |
| int s_base = (chn * SPH_PER_CHN); | |
| cns->free_idx[cns->free_idx_cnt++] = chn; | |
| memset(&cns->chn_cfg[chn], 0, sizeof(cns->chn_cfg[0])); | |
| cns->chn_pos_x[chn] = 0; | |
| cns->chn_pos_y[chn] = 0; | |
| cns->chn_pos_z[chn] = 0; | |
| cns->chn_quat_x[chn] = 0; | |
| cns->chn_quat_y[chn] = 0; | |
| cns->chn_quat_z[chn] = 0; | |
| cns->chn_quat_w[chn] = 1.0f; | |
| cns->chn_prev_pos_x[chn] = 0; | |
| cns->chn_prev_pos_y[chn] = 0; | |
| cns->chn_prev_pos_z[chn] = 0; | |
| cns->chn_prev_quat_x[chn] = 0; | |
| cns->chn_prev_quat_y[chn] = 0; | |
| cns->chn_prev_quat_z[chn] = 0; | |
| cns->chn_prev_quat_w[chn] = 1.0f; | |
| cns->chn_grav_x[chn] = 0; | |
| cns->chn_grav_y[chn] = 0; | |
| cns->chn_grav_z[chn] = 0; | |
| cns->chn_wnd_x[chn] = 0; | |
| cns->chn_wnd_y[chn] = 0; | |
| cns->chn_wnd_z[chn] = 0; | |
| memset(&cns->ptc_pos_x[p_base], 0, PTC_PER_CHN * sizeof(float)); | |
| memset(&cns->ptc_pos_y[p_base], 0, PTC_PER_CHN * sizeof(float)); | |
| memset(&cns->ptc_pos_z[p_base], 0, PTC_PER_CHN * sizeof(float)); | |
| memset(&cns->ptc_inv_mass[p_base], 0, PTC_PER_CHN * sizeof(float)); | |
| memset(&cns->ptc_rest_len[r_base], 0, CON_PER_CHN * sizeof(float)); | |
| memset(&cns->ptc_lambda[r_base], 0, CON_PER_CHN * sizeof(float)); | |
| memset(&cns->ptc_bend_lambda[r_base], 0, CON_PER_CHN * sizeof(float)); | |
| memset(&cns->sph_x[s_base], 0, SPH_PER_CHN * sizeof(float)); | |
| memset(&cns->sph_r[s_base], 0, SPH_PER_CHN * sizeof(float)); | |
| } | |
| extern void | |
| chn_sim_mov(struct chn_sim *cns, int chn, const float *RESTRICT pos3, const float *RESTRICT rot4) { | |
| cns->chn_pos_x[chn] = pos3[0]; | |
| cns->chn_pos_y[chn] = pos3[1]; | |
| cns->chn_pos_z[chn] = pos3[2]; | |
| cns->chn_quat_x[chn] = rot4[0]; | |
| cns->chn_quat_y[chn] = rot4[1]; | |
| cns->chn_quat_z[chn] = rot4[2]; | |
| cns->chn_quat_w[chn] = rot4[3]; | |
| } | |
| extern void | |
| chn_sim_tel(struct chn_sim *cns, int chn, const float *RESTRICT pos3, const float *RESTRICT rot4) { | |
| chn_sim_mov(cns, chn, pos3, rot4); | |
| cns->chn_prev_pos_x[chn] = pos3[0]; | |
| cns->chn_prev_pos_y[chn] = pos3[1]; | |
| cns->chn_prev_pos_z[chn] = pos3[2]; | |
| cns->chn_prev_quat_x[chn] = rot4[0]; | |
| cns->chn_prev_quat_y[chn] = rot4[1]; | |
| cns->chn_prev_quat_z[chn] = rot4[2]; | |
| cns->chn_prev_quat_w[chn] = rot4[3]; | |
| } | |
| extern void | |
| chn_sim_grav(struct chn_sim *cns, int chn, const float *RESTRICT grav3) { | |
| cns->chn_grav_x[chn] = grav3[0]; | |
| cns->chn_grav_y[chn] = grav3[1]; | |
| cns->chn_grav_z[chn] = grav3[2]; | |
| } | |
| extern void | |
| chn_sim_wind(struct chn_sim *cns, int chn, const float *RESTRICT wind3) { | |
| cns->chn_wnd_x[chn] = wind3[0]; | |
| cns->chn_wnd_y[chn] = wind3[1]; | |
| cns->chn_wnd_z[chn] = wind3[2]; | |
| } | |
| extern void | |
| chn_sim_set_sph(struct chn_sim *cns, int chn, const float *RESTRICT spheres, int cnt) { | |
| int s_base = chn * SPH_PER_CHN; | |
| for (int i = 0; i < cnt && i < SPH_PER_CHN; ++i) { | |
| cns->sph_x[s_base + i] = spheres[i*4+0]; | |
| cns->sph_y[s_base + i] = spheres[i*4+1]; | |
| cns->sph_z[s_base + i] = spheres[i*4+2]; | |
| cns->sph_r[s_base + i] = spheres[i*4+3]; | |
| } | |
| for (int i = cnt; i < SPH_PER_CHN; ++i) { | |
| cns->sph_r[s_base + i] = 0.0f; | |
| } | |
| } | |
| extern void | |
| chn_sim_con_motion(struct chn_sim *cns, int chn, const float *RESTRICT radius, int cnt) { | |
| int p_base = chn * PTC_PER_CHN; | |
| for (int i = 0; i < cnt && i < PTC_PER_CHN; ++i) { | |
| int physical_i = (i < 8) ? (p_base + i) : (p_base + 8 + (i-8)); | |
| cns->motion_radius[physical_i] = radius[i]; | |
| } | |
| } | |
| // Fast Conjugate Quaternion Rotation: v' = v + 2*cross(-q, cross(-q, v) + q.w*v) | |
| static ALWAYS_INLINE void | |
| rot_vec_inv_batch8(__m256 *rx, __m256 *ry, __m256 *rz, | |
| __m256 qx, __m256 qy, __m256 qz, __m256 qw, | |
| __m256 vx, __m256 vy, __m256 vz) { | |
| __m256 nqx = _mm256_sub_ps(_mm256_setzero_ps(), qx); | |
| __m256 nqy = _mm256_sub_ps(_mm256_setzero_ps(), qy); | |
| __m256 nqz = _mm256_sub_ps(_mm256_setzero_ps(), qz); | |
| __m256 tx = _mm256_sub_ps(_mm256_mul_ps(nqy, vz), _mm256_mul_ps(nqz, vy)); | |
| __m256 ty = _mm256_sub_ps(_mm256_mul_ps(nqz, vx), _mm256_mul_ps(nqx, vz)); | |
| __m256 tz = _mm256_sub_ps(_mm256_mul_ps(nqx, vy), _mm256_mul_ps(nqy, vx)); | |
| tx = _mm256_add_ps(tx, tx); | |
| ty = _mm256_add_ps(ty, ty); | |
| tz = _mm256_add_ps(tz, tz); | |
| __m256 cx = _mm256_sub_ps(_mm256_mul_ps(nqy, tz), _mm256_mul_ps(nqz, ty)); | |
| __m256 cy = _mm256_sub_ps(_mm256_mul_ps(nqz, tx), _mm256_mul_ps(nqx, tz)); | |
| __m256 cz = _mm256_sub_ps(_mm256_mul_ps(nqx, ty), _mm256_mul_ps(nqy, tx)); | |
| *rx = _mm256_add_ps(vx, _mm256_add_ps(_mm256_mul_ps(qw, tx), cx)); | |
| *ry = _mm256_add_ps(vy, _mm256_add_ps(_mm256_mul_ps(qw, ty), cy)); | |
| *rz = _mm256_add_ps(vz, _mm256_add_ps(_mm256_mul_ps(qw, tz), cz)); | |
| } | |
| static ALWAYS_INLINE void | |
| solve_dist(__m256 *p1x, __m256 *p1y, __m256 *p1z, __m256 invm1, | |
| __m256 *p2x, __m256 *p2y, __m256 *p2z, __m256 invm2, | |
| __m256 rest_len, __m256 compliance, __m256 *lambda, __m256 eps) { | |
| // XPBD Distance Constraint | |
| __m256 dx = _mm256_sub_ps(*p1x, *p2x); | |
| __m256 dy = _mm256_sub_ps(*p1y, *p2y); | |
| __m256 dz = _mm256_sub_ps(*p1z, *p2z); | |
| __m256 d2 = _mm256_fmadd_ps(dx, dx, _mm256_fmadd_ps(dy, dy, _mm256_mul_ps(dz, dz))); | |
| __m256 inv_d = _mm256_rsqrt_ps(_mm256_max_ps(d2, eps)); | |
| __m256 dist = _mm256_mul_ps(d2, inv_d); | |
| __m256 C = _mm256_sub_ps(dist, rest_len); | |
| __m256 w = _mm256_add_ps(invm1, invm2); | |
| __m256 dL = _mm256_div_ps( | |
| _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(C, _mm256_mul_ps(compliance, *lambda))), | |
| _mm256_add_ps(w, compliance) | |
| ); | |
| // Mask out 0-length dummy constraints | |
| __m256 mask = _mm256_cmp_ps(rest_len, _mm256_setzero_ps(), _CMP_GT_OQ); | |
| dL = _mm256_and_ps(dL, mask); | |
| *lambda = _mm256_add_ps(*lambda, dL); | |
| __m256 cx = _mm256_mul_ps(_mm256_mul_ps(dx, inv_d), dL); | |
| __m256 cy = _mm256_mul_ps(_mm256_mul_ps(dy, inv_d), dL); | |
| __m256 cz = _mm256_mul_ps(_mm256_mul_ps(dz, inv_d), dL); | |
| *p1x = _mm256_fmadd_ps(cx, invm1, *p1x); | |
| *p1y = _mm256_fmadd_ps(cy, invm1, *p1y); | |
| *p1z = _mm256_fmadd_ps(cz, invm1, *p1z); | |
| *p2x = _mm256_fnmadd_ps(cx, invm2, *p2x); | |
| *p2y = _mm256_fnmadd_ps(cy, invm2, *p2y); | |
| *p2z = _mm256_fnmadd_ps(cz, invm2, *p2z); | |
| } | |
| // XPBD Angle Constraint (Dot Product with Target) | |
| static ALWAYS_INLINE void | |
| solve_angle(__m256 *p0x, __m256 *p0y, __m256 *p0z, __m256 invm0, | |
| __m256 *p1x, __m256 *p1y, __m256 *p1z, __m256 invm1, | |
| __m256 *p2x, __m256 *p2y, __m256 *p2z, __m256 invm2, | |
| __m256 compliance, __m256 *lambda, __m256 target_cos, __m256 eps) { | |
| __m256 e1x = _mm256_sub_ps(*p1x, *p0x); | |
| __m256 e1y = _mm256_sub_ps(*p1y, *p0y); | |
| __m256 e1z = _mm256_sub_ps(*p1z, *p0z); | |
| __m256 l1_sq = _mm256_fmadd_ps(e1x, e1x, _mm256_fmadd_ps(e1y, e1y, _mm256_mul_ps(e1z, e1z))); | |
| __m256 inv_l1 = _mm256_rsqrt_ps(_mm256_max_ps(l1_sq, eps)); | |
| __m256 e2x = _mm256_sub_ps(*p2x, *p1x); | |
| __m256 e2y = _mm256_sub_ps(*p2y, *p1y); | |
| __m256 e2z = _mm256_sub_ps(*p2z, *p1z); | |
| __m256 l2_sq = _mm256_fmadd_ps(e2x, e2x, _mm256_fmadd_ps(e2y, e2y, _mm256_mul_ps(e2z, e2z))); | |
| __m256 inv_l2 = _mm256_rsqrt_ps(_mm256_max_ps(l2_sq, eps)); | |
| __m256 n1x = _mm256_mul_ps(e1x, inv_l1); | |
| __m256 n1y = _mm256_mul_ps(e1y, inv_l1); | |
| __m256 n1z = _mm256_mul_ps(e1z, inv_l1); | |
| __m256 n2x = _mm256_mul_ps(e2x, inv_l2); | |
| __m256 n2y = _mm256_mul_ps(e2y, inv_l2); | |
| __m256 n2z = _mm256_mul_ps(e2z, inv_l2); | |
| __m256 dot = _mm256_fmadd_ps(n1x, n2x, _mm256_fmadd_ps(n1y, n2y, _mm256_mul_ps(n1z, n2z))); | |
| __m256 C = _mm256_sub_ps(dot, target_cos); | |
| __m256 t0x = _mm256_fnmadd_ps(dot, n1x, n2x); | |
| __m256 t0y = _mm256_fnmadd_ps(dot, n1y, n2y); | |
| __m256 t0z = _mm256_fnmadd_ps(dot, n1z, n2z); | |
| __m256 gp0x = _mm256_mul_ps(t0x, inv_l1); | |
| __m256 gp0y = _mm256_mul_ps(t0y, inv_l1); | |
| __m256 gp0z = _mm256_mul_ps(t0z, inv_l1); | |
| __m256 t2x = _mm256_fnmadd_ps(dot, n2x, n1x); | |
| __m256 t2y = _mm256_fnmadd_ps(dot, n2y, n1y); | |
| __m256 t2z = _mm256_fnmadd_ps(dot, n2z, n1z); | |
| __m256 gp2x = _mm256_mul_ps(t2x, inv_l2); | |
| __m256 gp2y = _mm256_mul_ps(t2y, inv_l2); | |
| __m256 gp2z = _mm256_mul_ps(t2z, inv_l2); | |
| __m256 gp1x = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(gp0x, gp2x)); | |
| __m256 gp1y = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(gp0y, gp2y)); | |
| __m256 gp1z = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(gp0z, gp2z)); | |
| __m256 w0 = _mm256_mul_ps(invm0, _mm256_fmadd_ps(gp0x, gp0x, _mm256_fmadd_ps(gp0y, gp0y, _mm256_mul_ps(gp0z, gp0z)))); | |
| __m256 w1 = _mm256_mul_ps(invm1, _mm256_fmadd_ps(gp1x, gp1x, _mm256_fmadd_ps(gp1y, gp1y, _mm256_mul_ps(gp1z, gp1z)))); | |
| __m256 w2 = _mm256_mul_ps(invm2, _mm256_fmadd_ps(gp2x, gp2x, _mm256_fmadd_ps(gp2y, gp2y, _mm256_mul_ps(gp2z, gp2z)))); | |
| __m256 den = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(w0, w1), w2), compliance); | |
| __m256 num = _mm256_sub_ps(_mm256_setzero_ps(), _mm256_add_ps(C, _mm256_mul_ps(compliance, *lambda))); | |
| __m256 dL = _mm256_div_ps(num, _mm256_max_ps(den, eps)); | |
| // Valid chain check | |
| __m256 mask = _mm256_cmp_ps(l1_sq, eps, _CMP_GT_OQ); | |
| dL = _mm256_and_ps(dL, mask); | |
| *lambda = _mm256_add_ps(*lambda, dL); | |
| *p0x = _mm256_fmadd_ps(_mm256_mul_ps(gp0x, dL), invm0, *p0x); | |
| *p0y = _mm256_fmadd_ps(_mm256_mul_ps(gp0y, dL), invm0, *p0y); | |
| *p0z = _mm256_fmadd_ps(_mm256_mul_ps(gp0z, dL), invm0, *p0z); | |
| *p1x = _mm256_fmadd_ps(_mm256_mul_ps(gp1x, dL), invm1, *p1x); | |
| *p1y = _mm256_fmadd_ps(_mm256_mul_ps(gp1y, dL), invm1, *p1y); | |
| *p1z = _mm256_fmadd_ps(_mm256_mul_ps(gp1z, dL), invm1, *p1z); | |
| *p2x = _mm256_fmadd_ps(_mm256_mul_ps(gp2x, dL), invm2, *p2x); | |
| *p2y = _mm256_fmadd_ps(_mm256_mul_ps(gp2y, dL), invm2, *p2y); | |
| *p2z = _mm256_fmadd_ps(_mm256_mul_ps(gp2z, dL), invm2, *p2z); | |
| } | |
| extern void | |
| chn_sim_run(struct chn_sim *RESTRICT cns, float dt) { | |
| assert(cns); | |
| if (dt <= 1e-6f) { | |
| return; | |
| } | |
| const __m256 dt_vec = _mm256_set1_ps(dt); | |
| const __m256 dt2_vec = _mm256_set1_ps(dt * dt); | |
| const __m256 eps_vec = _mm256_set1_ps(1e-6f); | |
| const __m256 zero_vec = _mm256_setzero_ps(); | |
| const __m256 one_vec = _mm256_set1_ps(1.0f); | |
| const __m256i shift_left = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); | |
| const __m256i shift_right = _mm256_setr_epi32(7, 0, 1, 2, 3, 4, 5, 6); | |
| ALIGN_VAR(32) float batch_fx[8], batch_fy[8], batch_fz[8]; | |
| ALIGN_VAR(32) float batch_cvx[8], batch_cvy[8], batch_cvz[8]; | |
| for (int c_blk = 0; c_blk < MAX_CHNS; c_blk += 8) { | |
| /* --- PHASE 1: Batched Global Forces --- */ | |
| { | |
| __m256 qx = _mm256_load_ps(&cns->chn_quat_x[c_blk]); | |
| __m256 qy = _mm256_load_ps(&cns->chn_quat_y[c_blk]); | |
| __m256 qz = _mm256_load_ps(&cns->chn_quat_z[c_blk]); | |
| __m256 qw = _mm256_load_ps(&cns->chn_quat_w[c_blk]); | |
| __m256 fx = _mm256_add_ps(_mm256_load_ps(&cns->chn_wnd_x[c_blk]), _mm256_load_ps(&cns->chn_grav_x[c_blk])); | |
| __m256 fy = _mm256_add_ps(_mm256_load_ps(&cns->chn_wnd_y[c_blk]), _mm256_load_ps(&cns->chn_grav_y[c_blk])); | |
| __m256 fz = _mm256_add_ps(_mm256_load_ps(&cns->chn_wnd_z[c_blk]), _mm256_load_ps(&cns->chn_grav_z[c_blk])); | |
| __m256 lfx, lfy, lfz; | |
| rot_vec_inv_batch8(&lfx, &lfy, &lfz, qx, qy, qz, qw, fx, fy, fz); | |
| _mm256_store_ps(batch_fx, lfx); | |
| _mm256_store_ps(batch_fy, lfy); | |
| _mm256_store_ps(batch_fz, lfz); | |
| __m256 cx = _mm256_load_ps(&cns->chn_pos_x[c_blk]); | |
| __m256 cy = _mm256_load_ps(&cns->chn_pos_y[c_blk]); | |
| __m256 cz = _mm256_load_ps(&cns->chn_pos_z[c_blk]); | |
| __m256 px = _mm256_load_ps(&cns->chn_prev_pos_x[c_blk]); | |
| __m256 py = _mm256_load_ps(&cns->chn_prev_pos_y[c_blk]); | |
| __m256 pz = _mm256_load_ps(&cns->chn_prev_pos_z[c_blk]); | |
| _mm256_store_ps(batch_cvx, _mm256_sub_ps(cx, px)); | |
| _mm256_store_ps(batch_cvy, _mm256_sub_ps(cy, py)); | |
| _mm256_store_ps(batch_cvz, _mm256_sub_ps(cz, pz)); | |
| _mm256_store_ps(&cns->chn_prev_pos_x[c_blk], cx); | |
| _mm256_store_ps(&cns->chn_prev_pos_y[c_blk], cy); | |
| _mm256_store_ps(&cns->chn_prev_pos_z[c_blk], cz); | |
| _mm256_store_ps(&cns->chn_prev_quat_x[c_blk], qx); | |
| _mm256_store_ps(&cns->chn_prev_quat_y[c_blk], qy); | |
| _mm256_store_ps(&cns->chn_prev_quat_z[c_blk], qz); | |
| _mm256_store_ps(&cns->chn_prev_quat_w[c_blk], qw); | |
| } | |
| /* --- PHASE 2: Local Simulation --- */ | |
| for (int i = 0; i < 8; ++i) { | |
| int c = c_blk + i; | |
| struct chn_cfg *cfg = &cns->chn_cfg[c]; | |
| int p_base = c * PTC_PER_CHN; | |
| // Config | |
| __m256 damp = _mm256_set1_ps(cfg->damping); | |
| __m256 fric_coef = _mm256_set1_ps(cfg->friction); | |
| __m256 compliance = _mm256_div_ps(dt2_vec, _mm256_max_ps(_mm256_set1_ps(cfg->stiffness), eps_vec)); | |
| __m256 bend_compl = _mm256_div_ps(dt2_vec, _mm256_max_ps(_mm256_set1_ps(cfg->bend_stiffness), eps_vec)); | |
| __m256 root_blend = _mm256_set1_ps(cfg->root_motion_blend); | |
| // Art control: rest_curvature (0=straight 1.0, 0.5=60deg) | |
| __m256 target_cos = _mm256_set1_ps(1.0f - (cfg->rest_curvature * 0.5f)); | |
| // Forces | |
| __m256 force_x = _mm256_set1_ps(batch_fx[i]); | |
| __m256 force_y = _mm256_set1_ps(batch_fy[i]); | |
| __m256 force_z = _mm256_set1_ps(batch_fz[i]); | |
| __m256 root_vel_x = _mm256_set1_ps(batch_cvx[i]); | |
| __m256 root_vel_y = _mm256_set1_ps(batch_cvy[i]); | |
| __m256 root_vel_z = _mm256_set1_ps(batch_cvz[i]); | |
| __m256 inertia_drag = _mm256_set1_ps(1.0f - cfg->linear_inertia); | |
| __m256 drag_x = _mm256_mul_ps(root_vel_x, inertia_drag); | |
| __m256 drag_y = _mm256_mul_ps(root_vel_y, inertia_drag); | |
| __m256 drag_z = _mm256_mul_ps(root_vel_z, inertia_drag); | |
| __m256 p_pos[3][2], p_prev[3][2], inv_mass[2]; | |
| __m256 rl[2], lm[2], blm[2]; | |
| UNROLL_HINT | |
| for (int k=0; k<2; ++k) { | |
| int off = p_base + (k * 8); | |
| int r_off = (c * CON_PER_CHN) + (k * 8); | |
| p_pos[0][k] = _mm256_load_ps(&cns->ptc_pos_x[off]); | |
| p_pos[1][k] = _mm256_load_ps(&cns->ptc_pos_y[off]); | |
| p_pos[2][k] = _mm256_load_ps(&cns->ptc_pos_z[off]); | |
| p_prev[0][k] = _mm256_load_ps(&cns->ptc_prev_pos_x[off]); | |
| p_prev[1][k] = _mm256_load_ps(&cns->ptc_prev_pos_y[off]); | |
| p_prev[2][k] = _mm256_load_ps(&cns->ptc_prev_pos_z[off]); | |
| inv_mass[k] = _mm256_load_ps(&cns->ptc_inv_mass[off]); | |
| rl[k] = _mm256_load_ps(&cns->ptc_rest_len[r_off]); | |
| lm[k] = _mm256_load_ps(&cns->ptc_lambda[r_off]); | |
| blm[k] = _mm256_load_ps(&cns->ptc_bend_lambda[r_off]); | |
| } | |
| /* --- Verlet + Root Motion --- */ | |
| UNROLL_HINT | |
| for (int k=0; k<2; ++k) { | |
| __m256 vx = _mm256_sub_ps(p_pos[0][k], p_prev[0][k]); | |
| __m256 vy = _mm256_sub_ps(p_pos[1][k], p_prev[1][k]); | |
| __m256 vz = _mm256_sub_ps(p_pos[2][k], p_prev[2][k]); | |
| vx = _mm256_sub_ps(vx, drag_x); | |
| vy = _mm256_sub_ps(vy, drag_y); | |
| vz = _mm256_sub_ps(vz, drag_z); | |
| vx = _mm256_fmadd_ps(root_vel_x, root_blend, vx); | |
| vy = _mm256_fmadd_ps(root_vel_y, root_blend, vy); | |
| vz = _mm256_fmadd_ps(root_vel_z, root_blend, vz); | |
| p_prev[0][k] = p_pos[0][k]; | |
| p_prev[1][k] = p_pos[1][k]; | |
| p_prev[2][k] = p_pos[2][k]; | |
| p_pos[0][k] = _mm256_fmadd_ps(_mm256_mul_ps(force_x, inv_mass[k]), dt2_vec, _mm256_fmadd_ps(vx, damp, p_pos[0][k])); | |
| p_pos[1][k] = _mm256_fmadd_ps(_mm256_mul_ps(force_y, inv_mass[k]), dt2_vec, _mm256_fmadd_ps(vy, damp, p_pos[1][k])); | |
| p_pos[2][k] = _mm256_fmadd_ps(_mm256_mul_ps(force_z, inv_mass[k]), dt2_vec, _mm256_fmadd_ps(vz, damp, p_pos[2][k])); | |
| } | |
| /* --- Constraints --- */ | |
| for (int iter = 0; iter < MAX_ITR; ++iter) { | |
| // Distance | |
| UNROLL_HINT | |
| for (int k=0; k<2; ++k) { | |
| __m256 p2x, p2y, p2z, p2m; | |
| if (k == 0) { | |
| p2x = p_pos[0][1]; | |
| p2y = p_pos[1][1]; | |
| p2z = p_pos[2][1]; | |
| p2m = inv_mass[1]; | |
| } else { | |
| p2x = _mm256_permutevar8x32_ps(p_pos[0][0], shift_left); | |
| p2y = _mm256_permutevar8x32_ps(p_pos[1][0], shift_left); | |
| p2z = _mm256_permutevar8x32_ps(p_pos[2][0], shift_left); | |
| p2m = _mm256_permutevar8x32_ps(inv_mass[0], shift_left); | |
| } | |
| __m256 old_p2x = p2x, old_p2y = p2y, old_p2z = p2z; | |
| solve_dist(&p_pos[0][k], &p_pos[1][k], &p_pos[2][k], inv_mass[k], | |
| &p2x, &p2y, &p2z, p2m, rl[k], compliance, &lm[k], eps_vec); | |
| if (k == 0) { | |
| p_pos[0][1] = p2x; | |
| p_pos[1][1] = p2y; | |
| p_pos[2][1] = p2z; | |
| } else { | |
| __m256 dx = _mm256_sub_ps(p2x, old_p2x); | |
| __m256 dy = _mm256_sub_ps(p2y, old_p2y); | |
| __m256 dz = _mm256_sub_ps(p2z, old_p2z); | |
| p_pos[0][0] = _mm256_add_ps(p_pos[0][0], _mm256_permutevar8x32_ps(dx, shift_right)); | |
| p_pos[1][0] = _mm256_add_ps(p_pos[1][0], _mm256_permutevar8x32_ps(dy, shift_right)); | |
| p_pos[2][0] = _mm256_add_ps(p_pos[2][0], _mm256_permutevar8x32_ps(dz, shift_right)); | |
| } | |
| } | |
| // Bending (Group 1: Odd Center) | |
| { | |
| __m256 r_px = _mm256_permutevar8x32_ps(p_pos[0][0], shift_left); | |
| __m256 r_py = _mm256_permutevar8x32_ps(p_pos[1][0], shift_left); | |
| __m256 r_pz = _mm256_permutevar8x32_ps(p_pos[2][0], shift_left); | |
| __m256 r_pm = _mm256_permutevar8x32_ps(inv_mass[0], shift_left); | |
| __m256 old_r_px = r_px, old_r_py = r_py, old_r_pz = r_pz; | |
| solve_angle(&p_pos[0][0], &p_pos[1][0], &p_pos[2][0], inv_mass[0], | |
| &p_pos[0][1], &p_pos[1][1], &p_pos[2][1], inv_mass[1], | |
| &r_px, &r_py, &r_pz, r_pm, | |
| bend_compl, &blm[1], target_cos, eps_vec); | |
| __m256 dx = _mm256_sub_ps(r_px, old_r_px); | |
| __m256 dy = _mm256_sub_ps(r_py, old_r_py); | |
| __m256 dz = _mm256_sub_ps(r_pz, old_r_pz); | |
| p_pos[0][0] = _mm256_add_ps(p_pos[0][0], _mm256_permutevar8x32_ps(dx, shift_right)); | |
| p_pos[1][0] = _mm256_add_ps(p_pos[1][0], _mm256_permutevar8x32_ps(dy, shift_right)); | |
| p_pos[2][0] = _mm256_add_ps(p_pos[2][0], _mm256_permutevar8x32_ps(dz, shift_right)); | |
| } | |
| // Bending (Group 2: Even Center) | |
| { | |
| __m256 l_px = _mm256_permutevar8x32_ps(p_pos[0][1], shift_right); | |
| __m256 l_py = _mm256_permutevar8x32_ps(p_pos[1][1], shift_right); | |
| __m256 l_pz = _mm256_permutevar8x32_ps(p_pos[2][1], shift_right); | |
| __m256 l_pm = _mm256_permutevar8x32_ps(inv_mass[1], shift_right); | |
| __m256 old_l_px = l_px, old_l_py = l_py, old_l_pz = l_pz; | |
| solve_angle(&l_px, &l_py, &l_pz, l_pm, | |
| &p_pos[0][0], &p_pos[1][0], &p_pos[2][0], inv_mass[0], | |
| &p_pos[0][1], &p_pos[1][1], &p_pos[2][1], inv_mass[1], | |
| bend_compl, &blm[0], target_cos, eps_vec); | |
| __m256 dx = _mm256_sub_ps(l_px, old_l_px); | |
| __m256 dy = _mm256_sub_ps(l_py, old_l_py); | |
| __m256 dz = _mm256_sub_ps(l_pz, old_l_pz); | |
| p_pos[0][1] = _mm256_add_ps(p_pos[0][1], _mm256_permutevar8x32_ps(dx, shift_left)); | |
| p_pos[1][1] = _mm256_add_ps(p_pos[1][1], _mm256_permutevar8x32_ps(dy, shift_left)); | |
| p_pos[2][1] = _mm256_add_ps(p_pos[2][1], _mm256_permutevar8x32_ps(dz, shift_left)); | |
| } | |
| } | |
| /* --- Collisions --- */ | |
| int s_base = c * SPH_PER_CHN; | |
| for (int s = 0; s < SPH_PER_CHN; ++s) { | |
| __m256 sr = _mm256_broadcast_ss(&cns->sph_r[s_base + s]); | |
| if (_mm256_movemask_ps(_mm256_cmp_ps(sr, zero_vec, _CMP_GT_OQ)) == 0) { | |
| continue; | |
| } | |
| __m256 sx = _mm256_broadcast_ss(&cns->sph_x[s_base + s]); | |
| __m256 sy = _mm256_broadcast_ss(&cns->sph_y[s_base + s]); | |
| __m256 sz = _mm256_broadcast_ss(&cns->sph_z[s_base + s]); | |
| UNROLL_HINT | |
| for (int k=0; k<2; ++k) { | |
| __m256 dx = _mm256_sub_ps(p_pos[0][k], sx); | |
| __m256 dy = _mm256_sub_ps(p_pos[1][k], sy); | |
| __m256 dz = _mm256_sub_ps(p_pos[2][k], sz); | |
| __m256 d2 = _mm256_fmadd_ps(dx, dx, _mm256_fmadd_ps(dy, dy, _mm256_mul_ps(dz, dz))); | |
| __m256 id = _mm256_rsqrt_ps(_mm256_max_ps(d2, eps_vec)); | |
| __m256 pen = _mm256_sub_ps(sr, _mm256_mul_ps(d2, id)); | |
| __m256 mask = _mm256_cmp_ps(pen, zero_vec, _CMP_GT_OQ); | |
| __m256 push = _mm256_mul_ps(pen, mask); | |
| __m256 nx = _mm256_mul_ps(dx, id); | |
| __m256 ny = _mm256_mul_ps(dy, id); | |
| __m256 nz = _mm256_mul_ps(dz, id); | |
| p_pos[0][k] = _mm256_fmadd_ps(nx, push, p_pos[0][k]); | |
| p_pos[1][k] = _mm256_fmadd_ps(ny, push, p_pos[1][k]); | |
| p_pos[2][k] = _mm256_fmadd_ps(nz, push, p_pos[2][k]); | |
| // Friction | |
| __m256 vx = _mm256_sub_ps(p_pos[0][k], p_prev[0][k]); | |
| __m256 vy = _mm256_sub_ps(p_pos[1][k], p_prev[1][k]); | |
| __m256 vz = _mm256_sub_ps(p_pos[2][k], p_prev[2][k]); | |
| __m256 vdn = _mm256_fmadd_ps(vx, nx, _mm256_fmadd_ps(vy, ny, _mm256_mul_ps(vz, nz))); | |
| __m256 f_scale = _mm256_mul_ps(push, fric_coef); | |
| p_pos[0][k] = _mm256_fnmadd_ps(_mm256_fnmadd_ps(vdn, nx, vx), f_scale, p_pos[0][k]); | |
| p_pos[1][k] = _mm256_fnmadd_ps(_mm256_fnmadd_ps(vdn, ny, vy), f_scale, p_pos[1][k]); | |
| p_pos[2][k] = _mm256_fnmadd_ps(_mm256_fnmadd_ps(vdn, nz, vz), f_scale, p_pos[2][k]); | |
| } | |
| } | |
| /* --- Store --- */ | |
| UNROLL_HINT | |
| for (int k=0; k<2; ++k) { | |
| int off = p_base + (k * 8); | |
| int r_off = (c * CON_PER_CHN) + (k * 8); | |
| _mm256_store_ps(&cns->ptc_pos_x[off], p_pos[0][k]); | |
| _mm256_store_ps(&cns->ptc_pos_y[off], p_pos[1][k]); | |
| _mm256_store_ps(&cns->ptc_pos_z[off], p_pos[2][k]); | |
| _mm256_store_ps(&cns->ptc_prev_pos_x[off], p_prev[0][k]); | |
| _mm256_store_ps(&cns->ptc_prev_pos_y[off], p_prev[1][k]); | |
| _mm256_store_ps(&cns->ptc_prev_pos_z[off], p_prev[2][k]); | |
| _mm256_store_ps(&cns->ptc_lambda[r_off], lm[k]); | |
| _mm256_store_ps(&cns->ptc_bend_lambda[r_off], blm[k]); | |
| } | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <SDL2/SDL.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <math.h> | |
| #include "chain.c" | |
| #define WINDOW_WIDTH 800 | |
| #define WINDOW_HEIGHT 600 | |
| #define CHAIN_LENGTH 10 | |
| #define PARTICLE_SPACING 10.0f | |
| static void | |
| project(float x, float y, float z, int *screen_x, int *screen_y) { | |
| *screen_x = (int)(x + WINDOW_WIDTH / 2.0f); | |
| *screen_y = (int)(-y + WINDOW_HEIGHT / 2.0f); | |
| } | |
| // Helper: Rotate vector by inv_quat and translate by -chain_pos (scalar for simplicity) | |
| static void quat_rotate_inv_translate(float wx, float wy, float wz, // world pos | |
| float cx, float cy, float cz, // chain pos | |
| float qx, float qy, float qz, float qw, // chain quat | |
| float *mx, float *my, float *mz) { // out model pos | |
| // Translate: world - chain | |
| float tx = wx - cx, ty = wy - cy, tz = wz - cz; | |
| // Quat conj: (qw, -qx, -qy, -qz) | |
| float cqw = qw, cqx = -qx, cqy = -qy, cqz = -qz; | |
| // t = conj * (0, tx,ty,tz) | |
| float tw = cqx*tx + cqy*ty + cqz*tz; | |
| float t_x = cqw*tx + cqy*tz - cqz*ty; | |
| float t_y = cqw*ty + cqz*tx - cqx*tz; | |
| float t_z = cqw*tz + cqx*ty - cqy*tx; | |
| // model = t * quat (vec part) | |
| *mx = qw*t_x + qy*t_z - qz*t_y + tw*qx; | |
| *my = qw*t_y + qz*t_x - qx*t_z + tw*qy; | |
| *mz = qw*t_z + qx*t_y - qy*t_x + tw*qz; | |
| } | |
| extern int | |
| main(int argc, char *argv[]) { | |
| if (SDL_Init(SDL_INIT_VIDEO) < 0) { | |
| printf("SDL_Init failed: %s\n", SDL_GetError()); | |
| return 1; | |
| } | |
| SDL_Window *window = SDL_CreateWindow("Chain Simulation", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, WINDOW_WIDTH, WINDOW_HEIGHT, 0); | |
| SDL_Renderer *renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED); | |
| if (!window || !renderer) { | |
| printf("SDL_CreateWindow/Renderer failed: %s\n", SDL_GetError()); | |
| SDL_Quit(); | |
| return 1; | |
| } | |
| struct chn_sim sim; | |
| chn_sim_init(&sim); | |
| struct chn_cfg cfg = { | |
| .damping = 0.5f, | |
| .stiffness = 0.9f, | |
| .friction = 0.5f, | |
| .linear_inertia = 0.9f, // Tune 0.5f for more tangential lag | |
| .angular_inertia = 0.9f, | |
| .centrifugal_inertia = 0.9f, // Tune 0.5f for more outward bow | |
| .bend_stiffness = 0.8f, | |
| }; | |
| float rest_pos[CHAIN_LENGTH * 4]; | |
| for (int i = 0; i < CHAIN_LENGTH; i++) { | |
| rest_pos[i*4 + 0] = 0.0f; | |
| rest_pos[i*4 + 1] = -i * PARTICLE_SPACING; | |
| rest_pos[i*4 + 2] = 0.0f; | |
| rest_pos[i*4 + 3] = i == 0 ? 0.0f : 1.0f; | |
| } | |
| int chn = chn_sim_add(&sim, &cfg, rest_pos, CHAIN_LENGTH); | |
| chn_sim_grav(&sim, chn, (float[]){0.0f, -1.0f, 0.0f}); // Reduced for spin test | |
| chn_sim_tel(&sim, chn, (float[]){50.0f, 0.0f, 0.0f}, (float[]){0.0f, 0.0f, 0.0f, 1.0f}); // Start at circle edge | |
| // Fixed world spheres (triangle outside circle) | |
| float world_spheres[12] = { // 3 spheres * 4 floats (x,y,z,r) | |
| 60.0f, 0.0f, 0.0f, 10.0f, // Sphere 0: right | |
| -30.0f, 52.0f, 0.0f, 10.0f, // Sphere 1: top-left | |
| -30.0f,-52.0f, 0.0f, 10.0f // Sphere 2: bottom-left | |
| }; | |
| // Circular motion params | |
| float time = 0.0f; | |
| float circle_radius = 50.0f; | |
| float angular_speed = 2.0f * M_PI / 5.0f; // Full circle every 5s | |
| float dt = 0.016f; | |
| int running = 1; | |
| SDL_Event event; | |
| // Debug: Check gravity and masses | |
| int p_base = chn * PTC_PER_CHN; | |
| while (running) { | |
| while (SDL_PollEvent(&event)) { | |
| if (event.type == SDL_QUIT) { | |
| running = 0; | |
| } | |
| } | |
| time += dt; | |
| // Circular anchor motion | |
| float anchor_x = circle_radius * cosf(angular_speed * time); | |
| float anchor_y = circle_radius * sinf(angular_speed * time); | |
| chn_sim_mov(&sim, chn, (float[]){anchor_x, anchor_y, 0.0f}, (float[]){0.0f, 0.0f, 0.0f, 1.0f}); | |
| // Transform fixed world spheres to current model space | |
| float model_spheres[12]; | |
| for (int s = 0; s < 3; ++s) { | |
| float wx = world_spheres[s*4 + 0], wy = world_spheres[s*4 + 1], wz = world_spheres[s*4 + 2]; | |
| float r = world_spheres[s*4 + 3]; | |
| float cx = sim.chn_pos_x[chn], cy = sim.chn_pos_y[chn], cz = sim.chn_pos_z[chn]; | |
| float qx = sim.chn_quat_x[chn], qy = sim.chn_quat_y[chn], qz = sim.chn_quat_z[chn], qw = sim.chn_quat_w[chn]; | |
| float mx, my, mz; | |
| quat_rotate_inv_translate(wx, wy, wz, cx, cy, cz, qx, qy, qz, qw, &mx, &my, &mz); | |
| model_spheres[s*4 + 0] = mx; | |
| model_spheres[s*4 + 1] = my; | |
| model_spheres[s*4 + 2] = mz; | |
| model_spheres[s*4 + 3] = r; | |
| } | |
| chn_sim_set_sph(&sim, chn, model_spheres, 3); | |
| chn_sim_run(&sim, dt); | |
| SDL_SetRenderDrawColor(renderer, 0, 0, 0, 255); | |
| SDL_RenderClear(renderer); | |
| SDL_SetRenderDrawColor(renderer, 0, 255, 0, 255); // Green for chain & spheres | |
| float root_x = sim.chn_pos_x[chn]; // Current root world x/y | |
| float root_y = sim.chn_pos_y[chn]; | |
| // Draw chain segments | |
| for (int i = 0; i < CHAIN_LENGTH - 1; i++) { | |
| int idx0 = (i % 2 == 0 ? i / 2 : 8 + (i - 1) / 2); | |
| int idx1 = ((i + 1) % 2 == 0 ? (i + 1) / 2 : 8 + i / 2); | |
| float model_x0 = sim.ptc_pos_x[p_base + idx0], model_y0 = sim.ptc_pos_y[p_base + idx0]; | |
| float model_x1 = sim.ptc_pos_x[p_base + idx1], model_y1 = sim.ptc_pos_y[p_base + idx1]; | |
| float world_x0 = model_x0 + root_x; | |
| float world_y0 = model_y0 + root_y; | |
| float world_x1 = model_x1 + root_x; | |
| float world_y1 = model_y1 + root_y; | |
| int sx0, sy0, sx1, sy1; | |
| project(world_x0, world_y0, 0, &sx0, &sy0); | |
| project(world_x1, world_y1, 0, &sx1, &sy1); | |
| SDL_RenderDrawLine(renderer, sx0, sy0, sx1, sy1); | |
| } | |
| // Draw spheres (approx circle outline with 32 points) | |
| int p_base = chn * PTC_PER_CHN; | |
| int s_base = chn * SPH_PER_CHN; | |
| for (int s = 0; s < 3; ++s) { // Only draw active spheres | |
| float sph_model_x = sim.sph_x[s_base + s]; | |
| float sph_model_y = sim.sph_y[s_base + s]; | |
| float sph_r = sim.sph_r[s_base + s]; | |
| if (sph_r <= 0.0f) continue; | |
| float sph_world_x = sph_model_x + root_x; | |
| float sph_world_y = sph_model_y + root_y; | |
| int sph_sx, sph_sy; | |
| project(sph_world_x, sph_world_y, 0, &sph_sx, &sph_sy); | |
| // Parametric circle: 32 points | |
| const int num_points = 32; | |
| for (int pt = 0; pt < num_points; ++pt) { | |
| float angle0 = 2.0f * M_PI * pt / num_points; | |
| float angle1 = 2.0f * M_PI * (pt + 1) / num_points; | |
| float dx0 = sph_r * cosf(angle0), dy0 = sph_r * sinf(angle0); | |
| float dx1 = sph_r * cosf(angle1), dy1 = sph_r * sinf(angle1); | |
| int px0, py0, px1, py1; | |
| project(sph_world_x + dx0, sph_world_y + dy0, 0, &px0, &py0); | |
| project(sph_world_x + dx1, sph_world_y + dy1, 0, &px1, &py1); | |
| SDL_RenderDrawLine(renderer, px0, py0, px1, py1); | |
| } | |
| } | |
| SDL_RenderPresent(renderer); | |
| SDL_Delay(16); | |
| } | |
| SDL_DestroyRenderer(renderer); | |
| SDL_DestroyWindow(window); | |
| SDL_Quit(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment