parallel particles

2024-01-03 18:29:27 +00:00 · 2024-01-03 18:29:27 +00:00 · 7dfd8e6f9a
parent 4a4711e38f
commit 7dfd8e6f9a
5 changed files with 1206 additions and 39 deletions
--- a/source/engine/HandmadeMath.h
+++ b/source/engine/HandmadeMath.h
@ -819,8 +819,6 @@ static inline float HMM_DotV2(HMM_Vec2 Left, HMM_Vec2 Right) {
  return (Left.X * Right.X) + (Left.Y * Right.Y);
 }
 static inline HMM_Vec2 HMM_ProjV2(HMM_Vec2 a, HMM_Vec2 b)
 {
  return HMM_MulV2F(b, HMM_DotV2(a,b)/HMM_DotV2(b,b));
--- a/source/engine/particle.c
+++ b/source/engine/particle.c
@ -7,6 +7,9 @@
 #include "simplex.h"
 #include "pthread.h"
 #define SCHED_IMPLEMENTATION
 #include "sched.h"
 static emitter **emitters;
 static sg_shader par_shader;
@ -16,6 +19,10 @@ static int draw_count;
 #define MAX_PARTICLES 1000000
 struct scheduler sched;
 void *mem;
 struct par_vert {
  HMM_Vec2 pos;
  float angle;
@ -27,6 +34,11 @@ typedef struct par_vert par_vert;
 void particle_init()
 {
  sched_size needed;
  scheduler_init(&sched, &needed, 1, NULL);
  mem = calloc(needed, 1);
  scheduler_start(&sched,mem);
  par_shader = sg_make_shader(particle_shader_desc(sg_query_backend()));
  par_pipe = sg_make_pipeline(&(sg_pipeline_desc){
@ -117,9 +129,9 @@ int emitter_spawn(emitter *e)
 {
  particle p;
  p.life = e->life;
-  p.pos = (HMM_Vec3){0,0,0};
+  p.pos = (HMM_Vec4){0,0,0,0};
-  p.v = (HMM_Vec3){frand(1)-0.5,frand(1)-0.5,0};
+  p.v = (HMM_Vec4){frand(1)-0.5,frand(1)-0.5,0,0};
-  p.v = HMM_ScaleV3(HMM_NormV3(p.v), e->speed);
+  p.v = HMM_MulV4F(HMM_NormV4(p.v), e->speed);
  p.angle = 0;
  p.av = 1;
  arrput(e->particles,p);
@ -140,21 +152,28 @@ void emitters_step(double dt)
 static struct par_vert pv[MAX_PARTICLES];
 void parallel_pv(emitter *e, struct scheduler *sched, struct sched_task_partition t, sched_uint thread_num)
 {
  for (int i=t.start; i < t.end; i++) {
    particle *p = &e->particles[i];
    pv[i].pos = p->pos.xy;
    pv[i].angle = p->angle;
    pv[i].scale = HMM_ScaleV2(tex_get_dimensions(e->texture), p->scale);
    pv[i].color = vec2rgba(p->color);
  }
 }
 void emitters_draw()
 {
  if (arrlen(emitters) == 0) return;
  int draw_count = 0;
  for (int i = 0; i < arrlen(emitters); i++) {
    emitter *e = emitters[i];
    par_bind.fs.images[0] = e->texture->id;
-    #pragma omp parallel for
+    struct sched_task task;
-    for (int j = 0; j < arrlen(e->particles); j++) {
+    scheduler_add(&sched, &task, parallel_pv, e, arrlen(e->particles), arrlen(e->particles)/SCHED_DEFAULT);
-      particle *p = &e->particles[j];
+    scheduler_join(&sched, &task);
      pv[j].pos = p->pos.xy;
      pv[j].angle = p->angle;
      pv[j].scale = HMM_ScaleV2(tex_get_dimensions(e->texture), p->scale);
      pv[j].color = vec2rgba(p->color);
    }
    sg_append_buffer(par_bind.vertex_buffers[0], &(sg_range){.ptr=&pv, .size=sizeof(struct par_vert)*arrlen(e->particles)});
    draw_count += arrlen(e->particles);
@ -166,30 +185,35 @@ void emitters_draw()
  sg_draw(0, 4, draw_count);
 }
-void emitter_step(emitter *e, double dt) {
+static double dt;
-  #pragma omp parallel for
+static HMM_Vec4 g_accel;
-  for (int i = arrlen(e->particles)-1; i >= 0; i--) {
+
-    particle p = e->particles[i];
+void parallel_step(emitter *e, struct scheduler *shed, struct sched_task_partition t, sched_uint thread_num)
 {
  for (int i = t.end-1; i >=0; i--) {
    if (e->gravity) 
-      p.v = HMM_AddV3(p.v, HMM_MulV3F((HMM_Vec3){cpSpaceGetGravity(space).x, cpSpaceGetGravity(space).y, 0}, dt));
+      e->particles[i].v = HMM_AddV4(e->particles[i].v, g_accel);
    e->particles[i].v = HMM_AddV4(e->particles[i].v, HMM_MulV4F((HMM_Vec4){frand(2)-1, frand(2)-1, 0,0}, 1000*dt));
    e->particles[i].pos = HMM_AddV4(e->particles[i].pos, HMM_MulV4F(e->particles[i].v, dt));
    e->particles[i].angle += e->particles[i].av*dt;
    e->particles[i].life -= dt;
    e->particles[i].color = sample_sampler(&e->color, (e->life-e->particles[i].life)/e->life);
    e->particles[i].scale = e->scale;
-//    float freq = 1;
+   if (e->particles[i].life <= 0)
-//    p.v = HMM_AddV3(p.v, HMM_MulV3F((HMM_Vec3){Noise2D(p.pos.x*freq, p.pos.y*freq), Noise2D(p.pos.x*freq+5,p.pos.y*freq+5), 0}, 1000*dt));
+     arrdelswap(e->particles, i);
-    p.v = HMM_AddV3(p.v, HMM_MulV3F((HMM_Vec3){frand(2)-1, frand(2)-1, 0}, 1000*dt));
+   else if (query_point(e->particles[i].pos.xy))
-
+     arrdelswap(e->particles,i);
    p.pos = HMM_AddV3(p.pos, HMM_MulV3F(p.v, dt));
    p.angle += p.av*dt;
    p.life -= dt;
    p.color = sample_sampler(&e->color, (e->life-p.life)/e->life);
    p.scale = e->scale;
    e->particles[i] = p;    
 //    if (p.life <= 0)
 //      arrdelswap(e->particles,i);
 //    if (query_point(p.pos.xy))
 //      arrdelswap(e->particles,i);
  }
 }
 void emitter_step(emitter *e, double mdt) {
  dt = mdt;
  g_accel = HMM_MulV4F((HMM_Vec4){cpSpaceGetGravity(space).x, cpSpaceGetGravity(space).y, 0, 0}, dt);
  if (arrlen(e->particles) == 0) return;
  struct sched_task task;
  scheduler_add(&sched, &task, parallel_step, e, arrlen(e->particles), arrlen(e->particles));
  scheduler_join(&sched, &task);
  if (!e->on) return;
  e->tte-=dt;
--- a/source/engine/particle.h
+++ b/source/engine/particle.h
@ -7,8 +7,8 @@
 #include "anim.h"
 typedef struct particle {
-  HMM_Vec3 pos;
+  HMM_Vec4 pos;
-  HMM_Vec3 v; /* velocity */
+  HMM_Vec4 v; /* velocity */
  float angle;
  float av; /* angular velocity */
  float scale;
--- a/source/engine/sched.h
+++ b/source/engine/sched.h
--- a/source/engine/yugine.c
+++ b/source/engine/yugine.c
@ -12,9 +12,6 @@
 #include "particle.h"
 #include "simplex.h"
 #define FNL_IMPL
 #include "FastNoiseLite.h"
 #include "datastream.h"
 #include "timer.h"