From cc12740994232e2bf518f56ec203a6f1646f5bd1 Mon Sep 17 00:00:00 2001
From: OusmBlueNinja <89956790+OusmBlueNinja@users.noreply.github.com>
Date: Fri, 18 Apr 2025 22:15:06 -0500
Subject: [PATCH] Added Bex2d, Deep Profilerng, and better lua profileing

---
 .gitmodules                             |    1 +
 .vscode/settings.json                   |    3 +-
 remake.yaml                             |    9 +-
 remake/build.log                        |   10 +-
 src/src/Components/Component.h          |    1 +
 src/src/Components/PhysicsComponent.cpp |  102 +
 src/src/Components/PhysicsComponent.h   |   51 +
 src/src/Components/ScriptComponent.cpp  |  107 +-
 src/src/Components/ScriptComponent.h    |   13 +
 src/src/Engine.cpp                      |   50 +-
 src/src/Engine.h                        |    3 +
 src/src/Renderer.cpp                    |   96 +-
 src/src/utils/EngineConfig.cpp          |   33 +-
 src/src/utils/EngineConfig.h            |    1 +
 src/src/utils/PhysicsSystem.h           |   18 +
 src/src/utils/Profiler.cpp              |   11 +
 src/src/utils/Profiler.h                |   72 +-
 src/src/utils/Shader.cpp                |   39 +-
 src/src/utils/Shader.h                  |   38 +-
 src/vendor/box2d/LICENSE                |   21 +
 src/vendor/box2d/aabb.c                 |  132 +
 src/vendor/box2d/aabb.h                 |   56 +
 src/vendor/box2d/arena_allocator.c      |  112 +
 src/vendor/box2d/arena_allocator.h      |   48 +
 src/vendor/box2d/array.c                |    8 +
 src/vendor/box2d/array.h                |  179 ++
 src/vendor/box2d/atomic.h               |   79 +
 src/vendor/box2d/base.h                 |  131 +
 src/vendor/box2d/bitset.c               |   67 +
 src/vendor/box2d/bitset.h               |   65 +
 src/vendor/box2d/body.c                 | 1878 +++++++++++++
 src/vendor/box2d/body.h                 |  194 ++
 src/vendor/box2d/box2d.h                | 1221 +++++++++
 src/vendor/box2d/broad_phase.c          |  524 ++++
 src/vendor/box2d/broad_phase.h          |   83 +
 src/vendor/box2d/collision.h            |  830 ++++++
 src/vendor/box2d/constants.h            |   54 +
 src/vendor/box2d/constraint_graph.c     |  322 +++
 src/vendor/box2d/constraint_graph.h     |   58 +
 src/vendor/box2d/contact.c              |  650 +++++
 src/vendor/box2d/contact.h              |  148 +
 src/vendor/box2d/contact_solver.c       | 2120 +++++++++++++++
 src/vendor/box2d/contact_solver.h       |   54 +
 src/vendor/box2d/core.c                 |  178 ++
 src/vendor/box2d/core.h                 |  143 +
 src/vendor/box2d/ctz.h                  |  112 +
 src/vendor/box2d/distance.c             | 1415 ++++++++++
 src/vendor/box2d/distance_joint.c       |  556 ++++
 src/vendor/box2d/dynamic_tree.c         | 1989 ++++++++++++++
 src/vendor/box2d/geometry.c             | 1028 +++++++
 src/vendor/box2d/hull.c                 |  328 +++
 src/vendor/box2d/id.h                   |  144 +
 src/vendor/box2d/id_pool.c              |   79 +
 src/vendor/box2d/id_pool.h              |   35 +
 src/vendor/box2d/island.c               |  977 +++++++
 src/vendor/box2d/island.h               |   89 +
 src/vendor/box2d/joint.c                | 1268 +++++++++
 src/vendor/box2d/joint.h                |  335 +++
 src/vendor/box2d/manifold.c             | 1726 ++++++++++++
 src/vendor/box2d/math_functions.c       |  159 ++
 src/vendor/box2d/math_functions.h       |  761 ++++++
 src/vendor/box2d/motor_joint.c          |  283 ++
 src/vendor/box2d/mouse_joint.c          |  214 ++
 src/vendor/box2d/mover.c                |   73 +
 src/vendor/box2d/prismatic_joint.c      |  654 +++++
 src/vendor/box2d/revolute_joint.c       |  530 ++++
 src/vendor/box2d/sensor.c               |  389 +++
 src/vendor/box2d/sensor.h               |   36 +
 src/vendor/box2d/shape.c                | 1714 ++++++++++++
 src/vendor/box2d/shape.h                |  123 +
 src/vendor/box2d/solver.c               | 2038 ++++++++++++++
 src/vendor/box2d/solver.h               |  155 ++
 src/vendor/box2d/solver_set.c           |  613 +++++
 src/vendor/box2d/solver_set.h           |   57 +
 src/vendor/box2d/table.c                |  238 ++
 src/vendor/box2d/table.h                |   37 +
 src/vendor/box2d/timer.c                |  185 ++
 src/vendor/box2d/types.c                |  151 ++
 src/vendor/box2d/types.h                | 1457 ++++++++++
 src/vendor/box2d/weld_joint.c           |  310 +++
 src/vendor/box2d/wheel_joint.c          |  549 ++++
 src/vendor/box2d/world.c                | 3303 +++++++++++++++++++++++
 src/vendor/box2d/world.h                |  192 ++
 83 files changed, 34116 insertions(+), 199 deletions(-)
 create mode 100644 src/src/utils/PhysicsSystem.h
 create mode 100644 src/vendor/box2d/LICENSE
 create mode 100644 src/vendor/box2d/aabb.c
 create mode 100644 src/vendor/box2d/aabb.h
 create mode 100644 src/vendor/box2d/arena_allocator.c
 create mode 100644 src/vendor/box2d/arena_allocator.h
 create mode 100644 src/vendor/box2d/array.c
 create mode 100644 src/vendor/box2d/array.h
 create mode 100644 src/vendor/box2d/atomic.h
 create mode 100644 src/vendor/box2d/base.h
 create mode 100644 src/vendor/box2d/bitset.c
 create mode 100644 src/vendor/box2d/bitset.h
 create mode 100644 src/vendor/box2d/body.c
 create mode 100644 src/vendor/box2d/body.h
 create mode 100644 src/vendor/box2d/box2d.h
 create mode 100644 src/vendor/box2d/broad_phase.c
 create mode 100644 src/vendor/box2d/broad_phase.h
 create mode 100644 src/vendor/box2d/collision.h
 create mode 100644 src/vendor/box2d/constants.h
 create mode 100644 src/vendor/box2d/constraint_graph.c
 create mode 100644 src/vendor/box2d/constraint_graph.h
 create mode 100644 src/vendor/box2d/contact.c
 create mode 100644 src/vendor/box2d/contact.h
 create mode 100644 src/vendor/box2d/contact_solver.c
 create mode 100644 src/vendor/box2d/contact_solver.h
 create mode 100644 src/vendor/box2d/core.c
 create mode 100644 src/vendor/box2d/core.h
 create mode 100644 src/vendor/box2d/ctz.h
 create mode 100644 src/vendor/box2d/distance.c
 create mode 100644 src/vendor/box2d/distance_joint.c
 create mode 100644 src/vendor/box2d/dynamic_tree.c
 create mode 100644 src/vendor/box2d/geometry.c
 create mode 100644 src/vendor/box2d/hull.c
 create mode 100644 src/vendor/box2d/id.h
 create mode 100644 src/vendor/box2d/id_pool.c
 create mode 100644 src/vendor/box2d/id_pool.h
 create mode 100644 src/vendor/box2d/island.c
 create mode 100644 src/vendor/box2d/island.h
 create mode 100644 src/vendor/box2d/joint.c
 create mode 100644 src/vendor/box2d/joint.h
 create mode 100644 src/vendor/box2d/manifold.c
 create mode 100644 src/vendor/box2d/math_functions.c
 create mode 100644 src/vendor/box2d/math_functions.h
 create mode 100644 src/vendor/box2d/motor_joint.c
 create mode 100644 src/vendor/box2d/mouse_joint.c
 create mode 100644 src/vendor/box2d/mover.c
 create mode 100644 src/vendor/box2d/prismatic_joint.c
 create mode 100644 src/vendor/box2d/revolute_joint.c
 create mode 100644 src/vendor/box2d/sensor.c
 create mode 100644 src/vendor/box2d/sensor.h
 create mode 100644 src/vendor/box2d/shape.c
 create mode 100644 src/vendor/box2d/shape.h
 create mode 100644 src/vendor/box2d/solver.c
 create mode 100644 src/vendor/box2d/solver.h
 create mode 100644 src/vendor/box2d/solver_set.c
 create mode 100644 src/vendor/box2d/solver_set.h
 create mode 100644 src/vendor/box2d/table.c
 create mode 100644 src/vendor/box2d/table.h
 create mode 100644 src/vendor/box2d/timer.c
 create mode 100644 src/vendor/box2d/types.c
 create mode 100644 src/vendor/box2d/types.h
 create mode 100644 src/vendor/box2d/weld_joint.c
 create mode 100644 src/vendor/box2d/wheel_joint.c
 create mode 100644 src/vendor/box2d/world.c
 create mode 100644 src/vendor/box2d/world.h

diff --git a/.gitmodules b/.gitmodules
index 7384367..6ed3f96 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,5 @@
 [submodule "Remake"]
 	path = Remake
 	url = https://dock-it.dev/GigabiteStudios/Remake.git
+	branch = master
 
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 894df0c..d32c991 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -76,6 +76,7 @@
         "variant": "cpp",
         "fstream": "cpp",
         "codecvt": "cpp",
-        "*.inc": "cpp"
+        "*.inc": "cpp",
+        "future": "cpp"
     }
 }
\ No newline at end of file
diff --git a/remake.yaml b/remake.yaml
index 93f25cf..7062129 100644
--- a/remake.yaml
+++ b/remake.yaml
@@ -1,11 +1,10 @@
-# Remake Build Configuration
 
 # Source folders to recursively find .c/.cpp files
 src_dirs:
   - src/src
-  - src/vendor
   - src/include/lua
-  - src/vendor/box2d/src  # <- actual Box2D sources if you're compiling them
+  - src/vendor/imgui
+  - src/vendor/box2d
 
 # Include directories (-I)
 include_dirs:
@@ -13,7 +12,8 @@ include_dirs:
   - src/include/lua
   - src/vendor
   - src/vendor/imgui
-  - src/vendor/box2d/include  # ✅ correct Box2D C++ API include path
+  - src/vendor/box2d
+
 
   - C:/msys64/mingw64/include
 
@@ -42,6 +42,7 @@ cxxflags:
   - -std=c++20
   - -Wall
   - -g
+# - -DDISABLE_DEEP_PROFILING
 
 # Auto-detect libraries and headers
 auto_libs:
diff --git a/remake/build.log b/remake/build.log
index 7dd4292..60dc3d1 100644
--- a/remake/build.log
+++ b/remake/build.log
@@ -1,8 +1,2 @@
-[COMPILE] g++ -std=c++20 -Wall -g -Isrc/include -Isrc/include/lua -Isrc/vendor -Isrc/vendor/imgui -Isrc/vendor/box2d/include -IC:/msys64/mingw64/include -IC:\msys64\mingw64\lib\libyaml-cpp.a -Isrc\vendor\imgui -MMD -MP -c src\src\Components\PhysicsComponent.cpp -o src\build\Components\PhysicsComponent.o
-[ERROR] gcc -std=c99 -Wall -Isrc/include -Isrc/include/lua -Isrc/vendor -Isrc/vendor/imgui -Isrc/vendor/box2d/include -IC:/msys64/mingw64/include -IC:\msys64\mingw64\lib\libyaml-cpp.a -Isrc\vendor\imgui -MMD -MP -c src\vendor\box2d\benchmark\main.c -o src\build\box2d\benchmark\main.o
-cc1.exe: warning: C:\msys64\mingw64\lib\libyaml-cpp.a: not a directory
-src\vendor\box2d\benchmark\main.c:8:10: fatal error: TaskScheduler_c.h: No such file or directory
-    8 | #include "TaskScheduler_c.h"
-      |          ^~~~~~~~~~~~~~~~~~~
-compilation terminated.

-
+[LINK] g++ src\build\Engine.o src\build\main.o src\build\Renderer.o src\build\Components\CameraComponent.o src\build\Components\LightComponent.o src\build\Components\PhysicsComponent.o src\build\Components\ScriptComponent.o src\build\Components\SpriteComponent.o src\build\Components\TextComonent.o src\build\Components\TilemapComponent.o src\build\Entitys\Object.o src\build\utils\EngineConfig.o src\build\utils\ExceptionHandler.o src\build\utils\FileDialog.o src\build\utils\GameObjectsList.o src\build\utils\Logging.o src\build\utils\Profiler.o src\build\utils\Shader.o src\build\utils\UID.o src\build\utils\utils.o src\build\lapi.o src\build\lauxlib.o src\build\lbaselib.o src\build\lcode.o src\build\lcorolib.o src\build\lctype.o src\build\ldblib.o src\build\ldebug.o src\build\ldo.o src\build\ldump.o src\build\lfunc.o src\build\lgc.o src\build\linit.o src\build\liolib.o src\build\llex.o src\build\lmathlib.o src\build\lmem.o src\build\loadlib.o src\build\lobject.o src\build\lopcodes.o src\build\loslib.o src\build\lparser.o src\build\lstate.o src\build\lstring.o src\build\lstrlib.o src\build\ltable.o src\build\ltablib.o src\build\ltm.o src\build\lua.o src\build\luac.o src\build\lundump.o src\build\lutf8lib.o src\build\lvm.o src\build\lzio.o src\build\imgui.o src\build\imgui_demo.o src\build\imgui_draw.o src\build\imgui_impl_glfw.o src\build\imgui_impl_opengl3.o src\build\imgui_tables.o src\build\imgui_widgets.o src\build\aabb.o src\build\arena_allocator.o src\build\array.o src\build\bitset.o src\build\body.o src\build\broad_phase.o src\build\constraint_graph.o src\build\contact.o src\build\contact_solver.o src\build\core.o src\build\distance.o src\build\distance_joint.o src\build\dynamic_tree.o src\build\geometry.o src\build\hull.o src\build\id_pool.o src\build\island.o src\build\joint.o src\build\manifold.o src\build\math_functions.o src\build\motor_joint.o src\build\mouse_joint.o src\build\mover.o src\build\prismatic_joint.o src\build\revolute_joint.o src\build\sensor.o src\build\shape.o src\build\solver.o src\build\solver_set.o src\build\table.o src\build\timer.o src\build\types.o src\build\weld_joint.o src\build\wheel_joint.o src\build\world.o -o src\build\app.exe -LC:\msys64\mingw64\lib -lglfw3 -lglew32 -lopengl32 -lgdi32 -lyaml-cpp -lcomdlg32 -lssl -lcrypto
+[RUN] Executed app.exe successfully.
diff --git a/src/src/Components/Component.h b/src/src/Components/Component.h
index c1ccd25..62aae4e 100644
--- a/src/src/Components/Component.h
+++ b/src/src/Components/Component.h
@@ -2,6 +2,7 @@
 
 #include <string>
 #include <memory>
+#include <vector>
 #include <yaml-cpp/yaml.h>
 #include "../utils/ExceptionHandler.h"
 
diff --git a/src/src/Components/PhysicsComponent.cpp b/src/src/Components/PhysicsComponent.cpp
index e69de29..0bc999b 100644
--- a/src/src/Components/PhysicsComponent.cpp
+++ b/src/src/Components/PhysicsComponent.cpp
@@ -0,0 +1,102 @@
+#include "PhysicsComponent.h"
+#include "../Entitys/Object.h"
+#include "imgui.h"
+
+extern "C" {
+    #include "base.h"
+    #include "math_functions.h"
+    #include "types.h"
+    #include "body.h"
+    #include "shape.h"
+    #include "world.h"
+}
+
+PhysicsComponent::PhysicsComponent(Object* owner, BodyType type, float width, float height)
+    : Component(owner), type(type), width(width), height(height) {}
+
+PhysicsComponent::~PhysicsComponent() {
+    DestroyBody();
+}
+
+void PhysicsComponent::CreateBody(b2WorldId world, const glm::vec2& position) {
+    this->world = world;
+
+    b2BodyDef def = b2DefaultBodyDef();
+    def.type = (type == BodyType::Static)   ? b2_staticBody :
+               (type == BodyType::Dynamic)  ? b2_dynamicBody :
+                                              b2_kinematicBody;
+    def.position = { position.x, position.y };
+    def.fixedRotation = fixedRotation;
+
+    body = b2CreateBody(world, &def);
+
+    b2Polygon box = b2MakeBox(width * 0.5f, height * 0.5f);
+    b2ShapeDef shapeDef = b2DefaultShapeDef();
+    shapeDef.density = density;
+    
+
+    b2CreatePolygonShape(body, &shapeDef, &box);
+}
+
+void PhysicsComponent::DestroyBody() {
+    if (!B2_IS_NULL(body)) {
+        b2DestroyBody(body);
+        body = b2_nullBodyId;
+    }
+}
+
+void PhysicsComponent::SetVelocity(const glm::vec2& vel) {
+    if (!B2_IS_NULL(body))
+        b2Body_SetLinearVelocity(body, { vel.x, vel.y });
+}
+
+glm::vec2 PhysicsComponent::GetVelocity() const {
+    if (!B2_IS_NULL(body)) {
+        b2Vec2 v = b2Body_GetLinearVelocity(body);
+        return { v.x, v.y };
+    }
+    return {};
+}
+
+void PhysicsComponent::SetFriction(float f) {
+    friction = f;
+    // Not dynamically supported in Box2D C API. Requires fixture recreation.
+}
+
+void PhysicsComponent::SetDensity(float d) {
+    density = d;
+    // Same here, not dynamically applied.
+}
+
+void PhysicsComponent::SetFixedRotation(bool fixed) {
+    fixedRotation = fixed;
+    if (!B2_IS_NULL(body))
+        b2Body_SetFixedRotation(body, fixed);
+}
+
+void PhysicsComponent::SyncFromPhysics() {
+    if (!B2_IS_NULL(body)) {
+        b2Transform xf = b2Body_GetTransform(body);
+        owner->SetLocalPosition({ xf.p.x, xf.p.y });
+    }
+}
+
+void PhysicsComponent::SyncToPhysics() {
+    if (!B2_IS_NULL(body)) {
+        glm::vec2 pos = owner->GetLocalPosition();
+        b2Body_SetTransform(body, { pos.x, pos.y }, b2Body_GetRotation(body));
+    }
+}
+
+float PhysicsComponent::GetFriction() const { return friction; }
+float PhysicsComponent::GetDensity() const { return density; }
+bool PhysicsComponent::IsFixedRotation() const { return fixedRotation; }
+
+void PhysicsComponent::SetBodyType(BodyType t) { type = t; }
+PhysicsComponent::BodyType PhysicsComponent::GetBodyType() const { return type; }
+
+void PhysicsComponent::SetSize(float w, float h) { width = w; height = h; }
+float PhysicsComponent::GetWidth() const { return width; }
+float PhysicsComponent::GetHeight() const { return height; }
+
+bool PhysicsComponent::HasBody() const { return (!B2_IS_NULL(body)); }
diff --git a/src/src/Components/PhysicsComponent.h b/src/src/Components/PhysicsComponent.h
index e69de29..b33f043 100644
--- a/src/src/Components/PhysicsComponent.h
+++ b/src/src/Components/PhysicsComponent.h
@@ -0,0 +1,51 @@
+#pragma once
+#include "Component.h"
+#include <glm/glm.hpp>
+#include "box2d/box2d.h"
+
+class PhysicsComponent : public Component {
+public:
+    enum class BodyType { Static, Dynamic, Kinematic };
+
+    PhysicsComponent(Object* owner, BodyType type = BodyType::Dynamic, float width = 1.0f, float height = 1.0f);
+    ~PhysicsComponent();
+
+    void CreateBody(b2WorldId world, const glm::vec2& position);
+    void DestroyBody();
+
+    void SetVelocity(const glm::vec2& vel);
+    glm::vec2 GetVelocity() const;
+
+    void SetFriction(float friction);
+    float GetFriction() const;
+
+    void SetDensity(float density);
+    float GetDensity() const;
+
+    void SetFixedRotation(bool fixed);
+    bool IsFixedRotation() const;
+
+    void SetBodyType(BodyType type);
+    BodyType GetBodyType() const;
+
+    void SetSize(float width, float height);
+    float GetWidth() const;
+    float GetHeight() const;
+
+    bool HasBody() const;
+
+    void SyncFromPhysics();
+    void SyncToPhysics();
+
+    b2BodyId GetBodyId() const { return body; }
+    b2WorldId GetWorldId() const { return world; }
+
+private:
+    b2WorldId world = { 0 };
+    b2BodyId body = { 0 };
+    BodyType type;
+    float width, height;
+    float friction = 0.3f;
+    float density = 1.0f;
+    bool fixedRotation = false;
+};
diff --git a/src/src/Components/ScriptComponent.cpp b/src/src/Components/ScriptComponent.cpp
index d1c1d0e..4f5bbb0 100644
--- a/src/src/Components/ScriptComponent.cpp
+++ b/src/src/Components/ScriptComponent.cpp
@@ -45,39 +45,61 @@ void ScriptComponent::SetScriptPath(const std::string &path)
 }
 const std::string &ScriptComponent::GetScriptPath() const { return scriptPath; }
 
-// Logging bindings
+void ScriptComponent::Hook(lua_State* L, lua_Debug* ar) {
+    if (!g_engineConfig.settings.profile_deep) return;
+
+    lua_getinfo(L, "nS", ar);
+
+    ScriptComponent* self = *reinterpret_cast<ScriptComponent**>(lua_getextraspace(L));
+    if (!self) return;
+
+    std::string name = ar->name ? ar->name : "unknown";
+    std::string label = name + "()";
+
+    if (ar->event == LUA_HOOKCALL) {
+        self->luaCallStack.push_back(label);
+        profiler.BeginDeepSection(label);
+    } else if (ar->event == LUA_HOOKRET) {
+        if (!self->luaCallStack.empty()) {
+            profiler.EndDeepSection();
+            self->luaCallStack.pop_back();
+        }
+    }
+}
+
+
+
 static int Lua_LogInfo(lua_State *L)
 {
+    PROFILE_DEEP_SCOPE("Lua_LogInfo");
     Logger::LogInfo("[Lua] %s", lua_tostring(L, 1));
     return 0;
 }
 static int Lua_LogError(lua_State *L)
 {
+    PROFILE_DEEP_SCOPE("Lua_LogError");
     Logger::LogError("[Lua] %s", lua_tostring(L, 1));
     return 0;
 }
 static int Lua_LogDebug(lua_State *L)
 {
+    PROFILE_DEEP_SCOPE("Lua_LogDebug");
     Logger::LogDebug("[Lua] %s", lua_tostring(L, 1));
     return 0;
 }
 static int Lua_DebugLua(lua_State *L)
 {
+    PROFILE_DEEP_SCOPE("Lua_DebugLua");
     luaDebugEnabled = lua_toboolean(L, 1);
     if (old_state != luaDebugEnabled)
-    {
         Logger::LogInfo("[Lua] DebugLua(%s)", luaDebugEnabled ? "true" : "false");
-    }
     old_state = luaDebugEnabled;
-
     return 0;
 }
 
-// Component resolver
 static Component *GetComponentByName(Object *obj, const std::string &type)
 {
-    PROFILE_SCOPE("LUA_GetComponentByName");
-
+    PROFILE_DEEP_SCOPE("GetComponentByName");
     if (type == "SpriteComponent")
         return obj->GetComponent<SpriteComponent>().get();
     if (type == "CameraComponent")
@@ -93,30 +115,21 @@ static Component *GetComponentByName(Object *obj, const std::string &type)
     return nullptr;
 }
 
-// Object:GetComponent("Type")
 static int Lua_Object_GetComponent(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_Object_GetComponent");
-
+    PROFILE_DEEP_SCOPE("Object::GetComponent");
     auto *wrapper = (LuaObjectWrapper *)luaL_checkudata(L, 1, LUA_OBJECT_MT);
     const char *type = luaL_checkstring(L, 2);
-
     Component *comp = GetComponentByName(wrapper->obj, type);
-    if (comp)
-        lua_pushlightuserdata(L, comp);
-    else
-        lua_pushnil(L);
+    lua_pushlightuserdata(L, comp ? comp : nullptr);
     return 1;
 }
 
-// Object:GetPosition()
 static int Lua_Object_GetPosition(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_Object_GetPosition");
-
+    PROFILE_DEEP_SCOPE("Object::GetPosition");
     auto *wrapper = (LuaObjectWrapper *)luaL_checkudata(L, 1, LUA_OBJECT_MT);
     glm::vec2 pos = wrapper->obj->GetLocalPosition();
-
     LuaVector2 *vec = (LuaVector2 *)lua_newuserdata(L, sizeof(LuaVector2));
     vec->x = pos.x;
     vec->y = pos.y;
@@ -125,22 +138,18 @@ static int Lua_Object_GetPosition(lua_State *L)
     return 1;
 }
 
-// Object:SetPosition(Vector2)
 static int Lua_Object_SetPosition(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_Object_SetPosition");
-
+    PROFILE_DEEP_SCOPE("Object::SetPosition");
     auto *wrapper = (LuaObjectWrapper *)luaL_checkudata(L, 1, LUA_OBJECT_MT);
     auto *vec = (LuaVector2 *)luaL_checkudata(L, 2, LUA_VECTOR2_MT);
     wrapper->obj->SetLocalPosition({vec->x, vec->y});
     return 0;
 }
 
-// __index for Object
 static int Lua_Object_Index(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_Object_Index");
-
+    PROFILE_DEEP_SCOPE("Object::__index");
     const char *key = luaL_checkstring(L, 2);
     lua_getfield(L, lua_upvalueindex(1), key);
     return 1;
@@ -148,35 +157,33 @@ static int Lua_Object_Index(lua_State *L)
 
 void RegisterObjectType(lua_State *L)
 {
-    luaL_newmetatable(L, LUA_OBJECT_MT);
+    PROFILE_DEEP_SCOPE("RegisterObjectType");
 
-    lua_newtable(L); // method table
+    luaL_newmetatable(L, LUA_OBJECT_MT);
+    lua_newtable(L);
     lua_pushcfunction(L, Lua_Object_GetComponent);
     lua_setfield(L, -2, "GetComponent");
     lua_pushcfunction(L, Lua_Object_GetPosition);
     lua_setfield(L, -2, "GetPosition");
     lua_pushcfunction(L, Lua_Object_SetPosition);
     lua_setfield(L, -2, "SetPosition");
-
     lua_pushcclosure(L, Lua_Object_Index, 1);
     lua_setfield(L, -2, "__index");
-
     lua_pop(L, 1);
 }
 
 static void PushObject(lua_State *L, Object *obj)
 {
+    PROFILE_DEEP_SCOPE("PushObject");
     auto *wrapper = (LuaObjectWrapper *)lua_newuserdata(L, sizeof(LuaObjectWrapper));
     wrapper->obj = obj;
     luaL_getmetatable(L, LUA_OBJECT_MT);
     lua_setmetatable(L, -2);
 }
 
-// Engine.GetObjectByTag(name)
 static int Lua_GetObjectByTag(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_GetObjectByTag");
-
+    PROFILE_DEEP_SCOPE("GetObjectByTag");
     const char *name = luaL_checkstring(L, 1);
     for (const auto &obj : objects)
     {
@@ -191,25 +198,20 @@ static int Lua_GetObjectByTag(lua_State *L)
     return 1;
 }
 
-// Vector2(x, y)
 static int Lua_Vector2_New(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_Vector2_New");
-
+    PROFILE_DEEP_SCOPE("Vector2()");
     LuaVector2 *vec = static_cast<LuaVector2 *>(lua_newuserdata(L, sizeof(LuaVector2)));
-
     int nargs = lua_gettop(L);
     vec->x = nargs >= 1 ? (float)lua_tonumber(L, 1) : 0.0f;
     vec->y = nargs >= 2 ? (float)lua_tonumber(L, 2) : 0.0f;
-
     luaL_setmetatable(L, LUA_VECTOR2_MT);
     return 1;
 }
 
 static int Lua_Vector2_Index(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_Vector2_Index");
-
+    PROFILE_DEEP_SCOPE("Vector2::index");
     auto *vec = (LuaVector2 *)luaL_checkudata(L, 1, LUA_VECTOR2_MT);
     const char *key = luaL_checkstring(L, 2);
     if (strcmp(key, "x") == 0)
@@ -220,10 +222,10 @@ static int Lua_Vector2_Index(lua_State *L)
         lua_pushnil(L);
     return 1;
 }
+
 static int Lua_Vector2_NewIndex(lua_State *L)
 {
-    PROFILE_SCOPE("Lua_Vector2_NewIndex");
-
+    PROFILE_DEEP_SCOPE("Vector2::newindex");
     auto *vec = (LuaVector2 *)luaL_checkudata(L, 1, LUA_VECTOR2_MT);
     const char *key = luaL_checkstring(L, 2);
     float value = (float)luaL_checknumber(L, 3);
@@ -236,7 +238,7 @@ static int Lua_Vector2_NewIndex(lua_State *L)
 
 void RegisterVector2Type(lua_State *L)
 {
-
+    PROFILE_DEEP_SCOPE("RegisterVector2Type");
     luaL_newmetatable(L, LUA_VECTOR2_MT);
     lua_pushcfunction(L, Lua_Vector2_Index);
     lua_setfield(L, -2, "__index");
@@ -249,8 +251,9 @@ void RegisterVector2Type(lua_State *L)
 
 void ScriptComponent::RegisterEngineBindings()
 {
-    lua_newtable(L);
+    PROFILE_DEEP_SCOPE("RegisterEngineBindings");
 
+    lua_newtable(L);
     lua_pushcfunction(L, Lua_LogInfo);
     lua_setfield(L, -2, "LogInfo");
     lua_pushcfunction(L, Lua_LogError);
@@ -261,26 +264,28 @@ void ScriptComponent::RegisterEngineBindings()
     lua_setfield(L, -2, "GetObjectByTag");
     lua_pushcfunction(L, Lua_DebugLua);
     lua_setfield(L, -2, "DebugLua");
-
     lua_setglobal(L, "Engine");
 }
 
 void ScriptComponent::ReloadScript()
 {
+    PROFILE_DEEP_SCOPE("ScriptComponent::ReloadScript");
     if (scriptPath.empty())
         return;
-
     if (L)
         lua_close(L);
+
     L = luaL_newstate();
+    *(ScriptComponent**)lua_getextraspace(L) = this;
     luaL_openlibs(L);
+    lua_sethook(L, Hook, LUA_MASKCALL | LUA_MASKRET, 0);
+
 
     RegisterObjectType(L);
     RegisterVector2Type(L);
     RegisterEngineBindings();
 
     Logger::LogVerbose("[Lua] Loading Script from file.");
-
     if (luaL_dofile(L, scriptPath.c_str()))
     {
         Logger::LogError("[Lua] %s", lua_tostring(L, -1));
@@ -289,9 +294,7 @@ void ScriptComponent::ReloadScript()
     }
 
     if (luaDebugEnabled)
-    {
         Logger::LogVerbose("[Lua][call] OnInit()");
-    }
     lua_getglobal(L, "OnInit");
     if (lua_isfunction(L, -1))
     {
@@ -302,14 +305,12 @@ void ScriptComponent::ReloadScript()
         }
     }
     else
-    {
         lua_pop(L, 1);
-    }
 }
 
 void ScriptComponent::OnUpdate(float dt)
 {
-    PROFILE_SCOPE("ScriptComponent::OnUpdate");
+    PROFILE_DEEP_SCOPE("ScriptComponent::OnUpdate");
 
     if (!L)
         return;
@@ -324,9 +325,7 @@ void ScriptComponent::OnUpdate(float dt)
         }
     }
     else
-    {
         lua_pop(L, 1);
-    }
 }
 
 void ScriptComponent::Save(YAML::Emitter &out) const
@@ -354,4 +353,4 @@ void ScriptComponent::Load(const YAML::Node &node)
     {
         RecoverableError("YAML error in ScriptComponent::Load: " + std::string(e.what()), Create::Exceptions::ComponentLoad).Handle();
     }
-}
+}
\ No newline at end of file
diff --git a/src/src/Components/ScriptComponent.h b/src/src/Components/ScriptComponent.h
index 333c476..d97383b 100644
--- a/src/src/Components/ScriptComponent.h
+++ b/src/src/Components/ScriptComponent.h
@@ -1,6 +1,8 @@
 #include <string>
 #include "Component.h"
 #include <yaml-cpp/yaml.h>
+#include <chrono>
+
 
 extern "C" {
 #include <lua.h>
@@ -8,6 +10,12 @@ extern "C" {
 #include <lauxlib.h>
 }
 
+struct LuaCallInfo {
+    std::string name;
+    std::chrono::high_resolution_clock::time_point start;
+};
+
+
 class ScriptComponent : public Component {
 public:
     ScriptComponent(Object* owner);
@@ -27,5 +35,10 @@ private:
     std::string scriptPath;
     lua_State* L;
 
+    std::vector<std::string> luaCallStack;
+    static void Hook(lua_State* L, lua_Debug* ar);
+
+
+
     void RegisterEngineBindings();
 };
diff --git a/src/src/Engine.cpp b/src/src/Engine.cpp
index 70c76db..eb083f8 100644
--- a/src/src/Engine.cpp
+++ b/src/src/Engine.cpp
@@ -6,6 +6,7 @@
 #include "components/LightComponent.h"
 #include "components/TilemapComponent.h"
 #include "components/ScriptComponent.h"
+#include "components/PhysicsComponent.h"
 
 #include "utils/FileDialog.h"
 #include "utils/Logging.h"
@@ -15,7 +16,6 @@
 #include "utils/Profiler.h"
 #include "utils/utils.h"
 
-
 #include <GL/glew.h>
 #include <GLFW/glfw3.h>
 #include <imgui.h>
@@ -317,12 +317,15 @@ void ShowColorCorrectionWindow()
 void Engine::Init()
 {
 
-    if (DeleteLatestLogFile()) {
+    if (DeleteLatestLogFile())
+    {
         Logger::LogVerbose("Log file deleted");
-    } else {
+    }
+    else
+    {
         Logger::LogVerbose("Failed to delete log file");
     }
-    
+
     glfwInit();
     glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
     glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
@@ -355,6 +358,8 @@ void Engine::Init()
     m_toDraw.reserve(1024);
     m_scriptUpdates.reserve(256);
     m_collectStack.reserve(1024);
+    m_physicsUpdates.reserve(1024);
+
     Logger::LogInfo("Initialized Engine");
 }
 void DrawInspectorUI(std::shared_ptr<Object> selected)
@@ -564,6 +569,25 @@ void DrawInspectorUI(std::shared_ptr<Object> selected)
         if (ImGui::Button("Remove ScriptComponent"))
             selected->RemoveComponent<ScriptComponent>();
     }
+    if (auto phys = selected->GetComponent<PhysicsComponent>())
+    {
+        ImGui::SeparatorText("Physics Component");
+
+        glm::vec2 vel = phys->GetVelocity();
+        if (ImGui::DragFloat2("Velocity", &vel.x))
+            phys->SetVelocity(vel);
+
+        float friction = phys->GetFriction();
+        if (ImGui::DragFloat("Friction", &friction, 0.01f, 0.0f, 1.0f))
+            phys->SetFriction(friction);
+
+        bool fixed = phys->IsFixedRotation();
+        if (ImGui::Checkbox("Fixed Rotation", &fixed))
+            phys->SetFixedRotation(fixed);
+
+        if (ImGui::Button("Remove PhysicsComponent"))
+            selected->RemoveComponent<PhysicsComponent>();
+    }
 
     if (auto tilemap = selected->GetComponent<TilemapComponent>())
     {
@@ -589,6 +613,7 @@ void Engine::collectObjects(bool playing, const glm::vec2 &camPos, float camZoom
     m_scriptUpdates.clear();
     m_collectStack.clear();
     m_activeCamera = nullptr;
+    m_physicsUpdates.clear();
 
     for (auto &root : objects)
         if (!root->GetParent())
@@ -604,7 +629,6 @@ void Engine::collectObjects(bool playing, const glm::vec2 &camPos, float camZoom
 
         m_toDraw.push_back(obj.get());
 
-        // Handle light components
         if (auto light = obj->GetComponent<LightComponent>())
         {
             glm::vec2 world = obj->GetWorldPosition();
@@ -612,7 +636,6 @@ void Engine::collectObjects(bool playing, const glm::vec2 &camPos, float camZoom
             Renderer::AddLight(screen, light->GetColor(), light->GetIntensity(), light->GetRadius() * camZoom);
         }
 
-        // Handle primary camera
         if (!m_activeCamera)
         {
             if (auto camera = obj->GetComponent<CameraComponent>())
@@ -621,19 +644,23 @@ void Engine::collectObjects(bool playing, const glm::vec2 &camPos, float camZoom
                     m_activeCamera = camera;
             }
         }
+        
 
         if (playing)
         {
             if (auto script = obj->GetComponent<ScriptComponent>())
                 m_scriptUpdates.push_back(script.get());
+
+            if (auto physics = obj->GetComponent<PhysicsComponent>())
+                m_physicsUpdates.push_back(physics.get());
         }
 
-        // Add children to stack
         for (auto &child : obj->GetChildren())
             m_collectStack.push_back(child);
     }
 }
 
+
 void Engine::Run()
 {
     while (!glfwWindowShouldClose(window))
@@ -746,6 +773,8 @@ void Engine::Run()
                     if (g_engineConfig.settings.profile_enabled)
                     {
                         ImGui::Checkbox("Profile Engine", &g_engineConfig.settings.profile_editor);
+                        ImGui::Checkbox("Deep Profileing", &g_engineConfig.settings.profile_deep);
+
                     }
                     ImGui::EndMenu();
                 }
@@ -911,10 +940,6 @@ void Engine::Run()
         m_scriptUpdates.clear();
         profiler.EndEngineSection();
 
-
-        
-
-
         profiler.BeginEngineSection("Draw Editor Grid");
         Renderer::DrawEditorGrid(cameraPos, cameraZoom);
         profiler.EndEngineSection();
@@ -1122,8 +1147,7 @@ void Engine::DrawObjectNode(const std::shared_ptr<Object> &obj)
                 obj->AddChild(dragged);
             }
         }
-    ImGui::EndDragDropTarget();
-
+        ImGui::EndDragDropTarget();
     }
 
     // === Children ===
diff --git a/src/src/Engine.h b/src/src/Engine.h
index f490369..0bd9f04 100644
--- a/src/src/Engine.h
+++ b/src/src/Engine.h
@@ -8,6 +8,7 @@
 
 class Object;
 class ScriptComponent;
+class PhysicsComponent;
 
 class Engine
 {
@@ -37,4 +38,6 @@ private:
     int m_OnUpdateCalls;
 
     std::vector<std::shared_ptr<Object>>   m_collectStack;
+    std::vector<PhysicsComponent*> m_physicsUpdates;
+
 };
diff --git a/src/src/Renderer.cpp b/src/src/Renderer.cpp
index 2ef8230..e10adf6 100644
--- a/src/src/Renderer.cpp
+++ b/src/src/Renderer.cpp
@@ -29,7 +29,6 @@ int Renderer::s_DrawCalls = 0;
 int Renderer::s_LightsCount = 0;
 std::unique_ptr<ColorCorrection> Renderer::s_ColorCorrection = nullptr;
 
-
 std::vector<Light> Renderer::s_Lights;
 
 static Shader tilemapShader;
@@ -78,20 +77,18 @@ void Renderer::Init()
     if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
         Logger::LogError("[Renderer::Init] FBO incomplete");
 
-
     glBindFramebuffer(GL_FRAMEBUFFER, 0);
 
     InitQuad();
 
     // Load lit shader
-    
+
     spriteShader.LoadFromFile("src/assets/shaders/sprite.vert", "src/assets/shaders/sprite.frag");
     // Load unlit shader
     unlitShader.LoadFromFile("src/assets/shaders/unlit.vert", "src/assets/shaders/unlit.frag");
 
     SetColorCorrection(std::make_unique<ColorCorrection>());
 
-
     // Create a 1x1 flat normal map (RGB: 128,128,255)
     unsigned char flatNormal[3] = {128, 128, 255};
     glGenTextures(1, &defaultNormalMap);
@@ -124,7 +121,6 @@ void Renderer::Begin()
     {
         PROFILE_ENGINE_SCOPE("glBindFramebuffer");
         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
-
     }
 
     glViewport(0, 0, width, height);
@@ -138,8 +134,6 @@ void Renderer::Begin()
     ClearLights();
 }
 
-
-
 void Renderer::End()
 {
     glBindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -219,8 +213,14 @@ void Renderer::DrawTilemap(TilemapComponent *tilemap, const glm::vec2 &worldPos,
     glBindVertexArray(0);
 }
 
+
+
+
+
 void Renderer::DrawSprite(SpriteComponent *sprite, const glm::vec2 &pos, float zoom, glm::vec2 &CameraPos)
 {
+    PROFILE_DEEP_SCOPE("DrawSprite");
+
     if (!sprite->HasTexture())
     {
         static bool warned = false;
@@ -234,50 +234,94 @@ void Renderer::DrawSprite(SpriteComponent *sprite, const glm::vec2 &pos, float z
 
     Shader *shader = &unlitShader;
     bool useLighting = false;
-    if (g_engineConfig.settings.lighting_enabled && sprite->GetRenderType() == SpriteComponent::RenderType::Lit)
+
     {
-        shader = &spriteShader;
-        useLighting = true;
+        PROFILE_DEEP_SCOPE("ShaderSelect");
+        if (g_engineConfig.settings.lighting_enabled && sprite->GetRenderType() == SpriteComponent::RenderType::Lit)
+        {
+            shader = &spriteShader;
+            useLighting = true;
+        }
     }
 
-    shader->Use();
+    {
+        PROFILE_DEEP_SCOPE("ShaderUse");
+        shader->Use();
+    }
 
     glm::vec2 size = sprite->GetSize();
     glm::vec2 screenPos = (pos - CameraPos) * zoom + glm::vec2(width, height) * 0.5f - (size * zoom * 0.5f);
     float rotationDeg = sprite->GetOwner()->GetWorldRotation();
 
-    shader->SetVec2("uPos", screenPos);
-    shader->SetVec2("uSize", size * zoom);
-    shader->SetVec2("uScreen", glm::vec2(width, height));
-    shader->SetFloat("uRotation", glm::radians(rotationDeg));
-    shader->SetInt("uTex", 0);
+    {
+        PROFILE_DEEP_SCOPE("SetUniforms");
 
+        {
+            PROFILE_DEEP_SCOPE("Transform");
+            shader->SetVec2("uPos", screenPos);
+            shader->SetVec2("uSize", size * zoom);
+            shader->SetVec2("uScreen", glm::vec2(width, height));
+            shader->SetFloat("uRotation", glm::radians(rotationDeg));
+        }
 
-    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, sprite->GetTextureID());
+        {
+            PROFILE_DEEP_SCOPE("TexSlot");
+            shader->SetInt("uTex", 0);
+        }
+    }
+
+    {
+        PROFILE_DEEP_SCOPE("BindTex");
+        glActiveTexture(GL_TEXTURE0);
+        glBindTexture(GL_TEXTURE_2D, sprite->GetTextureID());
+    }
 
     if (useLighting)
     {
-        s_ColorCorrection->Upload(*shader);
+        PROFILE_DEEP_SCOPE("Lighting");
+
+        {
+            PROFILE_DEEP_SCOPE("ColorCorrection");
+            s_ColorCorrection->Upload(*shader);
+        }
 
         shader->SetInt("uLightCount", static_cast<int>(s_Lights.size()));
+
         for (size_t i = 0; i < s_Lights.size(); ++i)
         {
+            PROFILE_DEEP_SCOPE(("L" + std::to_string(i)).c_str());
+
             shader->SetVec2(("uLightPos[" + std::to_string(i) + "]").c_str(), s_Lights[i].screenPos);
             shader->SetVec3(("uLightColor[" + std::to_string(i) + "]").c_str(), s_Lights[i].color);
             shader->SetFloat(("uLightIntensity[" + std::to_string(i) + "]").c_str(), s_Lights[i].intensity);
             shader->SetFloat(("uLightRadius[" + std::to_string(i) + "]").c_str(), s_Lights[i].radius);
         }
 
-        shader->SetInt("uNormalMap", 1);
-        glActiveTexture(GL_TEXTURE1);
-        glBindTexture(GL_TEXTURE_2D, sprite->GetNormalMapID() ? sprite->GetNormalMapID() : defaultNormalMap);
+        {
+            PROFILE_DEEP_SCOPE("BindNorm");
+            shader->SetInt("uNormalMap", 1);
+            glActiveTexture(GL_TEXTURE1);
+            glBindTexture(GL_TEXTURE_2D, sprite->GetNormalMapID() ? sprite->GetNormalMapID() : defaultNormalMap);
+        }
     }
 
-    glBindVertexArray(quadVAO);
-    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-    glBindVertexArray(0);
-    s_DrawCalls++;
+    {
+        PROFILE_DEEP_SCOPE("Draw");
+
+        {
+            PROFILE_DEEP_SCOPE("VAO Bind");
+            glBindVertexArray(quadVAO);
+        }
+
+        {
+            PROFILE_DEEP_SCOPE("GL Draw");
+            glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+        }
+
+        glBindVertexArray(0);
+    }
+
+    ++s_DrawCalls;
 }
 
 int Renderer::GetDrawCallCount()
diff --git a/src/src/utils/EngineConfig.cpp b/src/src/utils/EngineConfig.cpp
index 80bacbe..853487e 100644
--- a/src/src/utils/EngineConfig.cpp
+++ b/src/src/utils/EngineConfig.cpp
@@ -16,13 +16,15 @@ EngineConfig g_engineConfig{
         .profile_editor = false,
         .profile_enabled = true,
         .show_color_correction_window = false,
-        .lighting_enabled = true
-    }
-};
+        .lighting_enabled = true,
+        .profile_deep = false,
+    }};
 
-static std::filesystem::path GetUserSettingsPath() {
+static std::filesystem::path GetUserSettingsPath()
+{
     char userPath[MAX_PATH];
-    if (SUCCEEDED(SHGetFolderPathA(NULL, CSIDL_PROFILE, NULL, 0, userPath))) {
+    if (SUCCEEDED(SHGetFolderPathA(NULL, CSIDL_PROFILE, NULL, 0, userPath)))
+    {
         std::filesystem::path path = std::filesystem::path(userPath) / ".CreateEngine" / ".user_settings.yaml";
         std::filesystem::create_directories(path.parent_path());
         Logger::LogVerbose("Settings Path: %s", path.string().c_str());
@@ -32,17 +34,20 @@ static std::filesystem::path GetUserSettingsPath() {
     return {};
 }
 
-void EngineConfig::SaveToFile() {
+void EngineConfig::SaveToFile()
+{
     Logger::LogVerbose("Saving User Settings");
 
     YAML::Emitter out;
     out << YAML::BeginMap;
 
-    out << YAML::Key << "draw_gizmos"                  << YAML::Value << settings.draw_gizmos;
-    out << YAML::Key << "profile_editor"               << YAML::Value << settings.profile_editor;
-    out << YAML::Key << "profile_enabled"              << YAML::Value << settings.profile_enabled;
+    out << YAML::Key << "draw_gizmos" << YAML::Value << settings.draw_gizmos;
+    out << YAML::Key << "profile_editor" << YAML::Value << settings.profile_editor;
+    out << YAML::Key << "profile_enabled" << YAML::Value << settings.profile_enabled;
     out << YAML::Key << "show_color_correction_window" << YAML::Value << settings.show_color_correction_window;
-    out << YAML::Key << "lighting_enabled"             << YAML::Value << settings.lighting_enabled;
+    out << YAML::Key << "lighting_enabled" << YAML::Value << settings.lighting_enabled;
+    out << YAML::Key << "profile_deep" << YAML::Value << settings.profile_deep;
+
 
     out << YAML::EndMap;
 
@@ -50,11 +55,13 @@ void EngineConfig::SaveToFile() {
     fout << out.c_str();
 }
 
-void EngineConfig::LoadFromFile() {
+void EngineConfig::LoadFromFile()
+{
     Logger::LogVerbose("Loading User Settings");
 
     auto path = GetUserSettingsPath();
-    if (!std::filesystem::exists(path)) return;
+    if (!std::filesystem::exists(path))
+        return;
 
     YAML::Node root = YAML::LoadFile(path.string());
 
@@ -68,4 +75,6 @@ void EngineConfig::LoadFromFile() {
         settings.show_color_correction_window = root["show_color_correction_window"].as<bool>();
     if (root["lighting_enabled"])
         settings.lighting_enabled = root["lighting_enabled"].as<bool>();
+    if (root["profile_deep"])
+        settings.profile_deep = root["profile_deep"].as<bool>();
 }
diff --git a/src/src/utils/EngineConfig.h b/src/src/utils/EngineConfig.h
index e220312..4ac9586 100644
--- a/src/src/utils/EngineConfig.h
+++ b/src/src/utils/EngineConfig.h
@@ -7,6 +7,7 @@ struct UserSettings {
     bool profile_enabled;
     bool show_color_correction_window;
     bool lighting_enabled;
+    bool profile_deep;
 };
 
 struct EngineConfig {
diff --git a/src/src/utils/PhysicsSystem.h b/src/src/utils/PhysicsSystem.h
new file mode 100644
index 0000000..5f5608e
--- /dev/null
+++ b/src/src/utils/PhysicsSystem.h
@@ -0,0 +1,18 @@
+// PhysicsSystem.h
+#pragma once
+
+extern "C" {
+    #include "box2d.h"
+}
+
+class PhysicsSystem {
+public:
+    void Initialize();
+    void Shutdown();
+    void Step(float deltaTime);
+
+    b2WorldId GetWorld() const { return world; }
+
+private:
+    b2WorldId world = b2_nullWorldId;
+};
diff --git a/src/src/utils/Profiler.cpp b/src/src/utils/Profiler.cpp
index 8fd973b..c5f1131 100644
--- a/src/src/utils/Profiler.cpp
+++ b/src/src/utils/Profiler.cpp
@@ -79,6 +79,17 @@ void HierarchicalProfiler::EndEngineSection() {
         EndSection();
 }
 
+void HierarchicalProfiler::BeginDeepSection(const std::string& name) {
+    if (g_engineConfig.settings.profile_deep)
+        BeginSection(name);
+}
+
+void HierarchicalProfiler::EndDeepSection() {
+    if (g_engineConfig.settings.profile_deep)
+        EndSection();
+}
+
+
 void HierarchicalProfiler::EndFrame() {
     if (!g_engineConfig.settings.profile_enabled)
         return;
diff --git a/src/src/utils/Profiler.h b/src/src/utils/Profiler.h
index b003b55..89aeafb 100644
--- a/src/src/utils/Profiler.h
+++ b/src/src/utils/Profiler.h
@@ -4,8 +4,7 @@
 #include <chrono>
 #include "EngineConfig.h"
 
-struct ProfileNode
-{
+struct ProfileNode {
     std::string name;
     double startMs = 0.0;
     double durationMs = 0.0;
@@ -13,38 +12,34 @@ struct ProfileNode
     double visualDurationMs = 0.0;
     std::vector<ProfileNode> children;
 
-
-    
     ProfileNode() = default;
-    
-
-    ProfileNode(const std::string &n, double start)
-        : name(n), startMs(start)
-    {
+    ProfileNode(const std::string& n, double start) : name(n), startMs(start) {
         children.reserve(8);
     }
 };
 
-class HierarchicalProfiler
-{
+class HierarchicalProfiler {
 public:
     void BeginFrame();
     void EndFrame();
 
-    void BeginSection(const std::string &name);
+    void BeginSection(const std::string& name);
     void EndSection();
 
-    void BeginEngineSection(const std::string &name);
+    void BeginEngineSection(const std::string& name);
     void EndEngineSection();
 
-    const std::vector<ProfileNode> &GetFrames() const;
-    const ProfileNode *GetLatestFrame() const;
+    void BeginDeepSection(const std::string& name);
+    void EndDeepSection();
+
+    const std::vector<ProfileNode>& GetFrames() const;
+    const ProfileNode* GetLatestFrame() const;
 
 private:
     using Clock = std::chrono::high_resolution_clock;
     Clock::time_point startTime;
     std::vector<Clock::time_point> sectionStartTimes;
-    std::vector<ProfileNode *> currentStack;
+    std::vector<ProfileNode*> currentStack;
     ProfileNode root{"Frame", 0.0};
 
     std::vector<ProfileNode> frameHistory;
@@ -55,40 +50,49 @@ private:
 
 extern HierarchicalProfiler profiler;
 
-// RAII Scoped Profiling (Zero-overhead when disabled)
-struct ScopedProfile
-{
-    ScopedProfile(const std::string &name)
-    {
+struct ScopedProfile {
+    ScopedProfile(const std::string& name) {
         if (g_engineConfig.settings.profile_enabled)
             profiler.BeginSection(name);
     }
-    ~ScopedProfile()
-    {
+    ~ScopedProfile() {
         if (g_engineConfig.settings.profile_enabled)
             profiler.EndSection();
     }
 };
 
-struct ScopedEngineProfile
-{
-    ScopedEngineProfile(const std::string &name)
-    {
+struct ScopedEngineProfile {
+    ScopedEngineProfile(const std::string& name) {
         if (g_engineConfig.settings.profile_enabled && g_engineConfig.settings.profile_editor)
             profiler.BeginSection(name);
     }
-    ~ScopedEngineProfile()
-    {
+    ~ScopedEngineProfile() {
         if (g_engineConfig.settings.profile_enabled && g_engineConfig.settings.profile_editor)
             profiler.EndSection();
     }
 };
 
+#ifndef DISABLE_DEEP_PROFILING
+struct ScopedDeepProfile {
+    ScopedDeepProfile(const std::string& name) {
+        if (g_engineConfig.settings.profile_deep)
+            profiler.BeginDeepSection(name);
+    }
+    ~ScopedDeepProfile() {
+        if (g_engineConfig.settings.profile_deep)
+            profiler.EndDeepSection();
+    }
+};
+#else
+struct ScopedDeepProfile {
+    ScopedDeepProfile(const std::string&) {}
+    ~ScopedDeepProfile() {}
+};
+#endif
+
 #define CONCAT_IMPL(a, b) a##b
 #define CONCAT(a, b) CONCAT_IMPL(a, b)
 
-#define PROFILE_SCOPE(label) \
-    ScopedProfile CONCAT(_scopedProfile_, __LINE__)(label)
-
-#define PROFILE_ENGINE_SCOPE(label) \
-    ScopedEngineProfile CONCAT(_scopedEngineProfile_, __LINE__)(label)
+#define PROFILE_SCOPE(name) ScopedProfile CONCAT(_prof_, __LINE__)(name)
+#define PROFILE_ENGINE_SCOPE(name) ScopedEngineProfile CONCAT(_eprof_, __LINE__)(name)
+#define PROFILE_DEEP_SCOPE(name) ScopedDeepProfile CONCAT(_dprof_, __LINE__)(name)
diff --git a/src/src/utils/Shader.cpp b/src/src/utils/Shader.cpp
index ec8815f..d55fe0b 100644
--- a/src/src/utils/Shader.cpp
+++ b/src/src/utils/Shader.cpp
@@ -29,8 +29,6 @@ std::string Shader::ReadFile(const std::string& path) {
     return buffer.str();
 }
 
-
-
 GLuint Shader::Compile(GLenum type, const std::string& source) {
     Logger::LogVerbose("[Shader] Compiling %s shader...",
                        type == GL_VERTEX_SHADER ? "Vertex" : "Fragment");
@@ -55,7 +53,6 @@ GLuint Shader::Compile(GLenum type, const std::string& source) {
     return shader;
 }
 
-
 bool Shader::LoadFromFile(const std::string& vertexPath, const std::string& fragmentPath) {
     Logger::LogVerbose("[Shader] Loading from file: %s + %s", vertexPath.c_str(), fragmentPath.c_str());
     return LoadFromSource(ReadFile(vertexPath), ReadFile(fragmentPath));
@@ -97,40 +94,14 @@ GLuint Shader::GetID() const {
 }
 
 GLint Shader::GetUniformLocation(const std::string& name) {
-    if (uniformCache.contains(name)) return uniformCache[name];
+    auto it = uniformCache.find(name);
+    if (it != uniformCache.end())
+        return it->second;
+
     GLint loc = glGetUniformLocation(id, name.c_str());
     if (loc == -1)
         Logger::LogWarning("[Shader] Uniform not found: %s", name.c_str());
+
     uniformCache[name] = loc;
     return loc;
 }
-
-// === Uniform Setters ===
-
-void Shader::SetBool(const std::string& name, bool value) {
-    glUniform1i(GetUniformLocation(name), (int)value);
-}
-
-void Shader::SetInt(const std::string& name, int value) {
-    glUniform1i(GetUniformLocation(name), value);
-}
-
-void Shader::SetFloat(const std::string& name, float value) {
-    glUniform1f(GetUniformLocation(name), value);
-}
-
-void Shader::SetVec2(const std::string& name, const glm::vec2& value) {
-    glUniform2fv(GetUniformLocation(name), 1, &value[0]);
-}
-
-void Shader::SetVec3(const std::string& name, const glm::vec3& value) {
-    glUniform3fv(GetUniformLocation(name), 1, &value[0]);
-}
-
-void Shader::SetVec4(const std::string& name, const glm::vec4& value) {
-    glUniform4fv(GetUniformLocation(name), 1, &value[0]);
-}
-
-void Shader::SetMat4(const std::string& name, const glm::mat4& mat) {
-    glUniformMatrix4fv(GetUniformLocation(name), 1, GL_FALSE, &mat[0][0]);
-}
diff --git a/src/src/utils/Shader.h b/src/src/utils/Shader.h
index 3b45fe2..84ba135 100644
--- a/src/src/utils/Shader.h
+++ b/src/src/utils/Shader.h
@@ -1,9 +1,8 @@
 #pragma once
-
 #include <string>
 #include <unordered_map>
 #include <glm/glm.hpp>
-#include <gl/glew.h>
+#include <GL/glew.h>
 
 class Shader {
 public:
@@ -12,25 +11,38 @@ public:
 
     bool LoadFromFile(const std::string& vertexPath, const std::string& fragmentPath);
     bool LoadFromSource(const std::string& vertexSrc, const std::string& fragmentSrc);
-
     void Use() const;
     GLuint GetID() const;
+    void Clear();
 
-    // Setters
-    void SetBool(const std::string& name, bool value);
-    void SetInt(const std::string& name, int value);
-    void SetFloat(const std::string& name, float value);
-    void SetVec2(const std::string& name, const glm::vec2& value);
-    void SetVec3(const std::string& name, const glm::vec3& value);
-    void SetVec4(const std::string& name, const glm::vec4& value);
-    void SetMat4(const std::string& name, const glm::mat4& mat);
+    // === Uniform Setters ===
+    inline void SetBool(const std::string& name, bool value) {
+        glUniform1i(GetUniformLocation(name), static_cast<int>(value));
+    }
+    inline void SetInt(const std::string& name, int value) {
+        glUniform1i(GetUniformLocation(name), value);
+    }
+    inline void SetFloat(const std::string& name, float value) {
+        glUniform1f(GetUniformLocation(name), value);
+    }
+    inline void SetVec2(const std::string& name, const glm::vec2& value) {
+        glUniform2fv(GetUniformLocation(name), 1, &value[0]);
+    }
+    inline void SetVec3(const std::string& name, const glm::vec3& value) {
+        glUniform3fv(GetUniformLocation(name), 1, &value[0]);
+    }
+    inline void SetVec4(const std::string& name, const glm::vec4& value) {
+        glUniform4fv(GetUniformLocation(name), 1, &value[0]);
+    }
+    inline void SetMat4(const std::string& name, const glm::mat4& mat) {
+        glUniformMatrix4fv(GetUniformLocation(name), 1, GL_FALSE, &mat[0][0]);
+    }
 
 private:
     GLuint id = 0;
     std::unordered_map<std::string, GLint> uniformCache;
 
+    GLint GetUniformLocation(const std::string& name);
     std::string ReadFile(const std::string& path);
     GLuint Compile(GLenum type, const std::string& source);
-    GLint GetUniformLocation(const std::string& name);
-    void Clear();
 };
diff --git a/src/vendor/box2d/LICENSE b/src/vendor/box2d/LICENSE
new file mode 100644
index 0000000..e32198e
--- /dev/null
+++ b/src/vendor/box2d/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Erin Catto
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/vendor/box2d/aabb.c b/src/vendor/box2d/aabb.c
new file mode 100644
index 0000000..bfa1e70
--- /dev/null
+++ b/src/vendor/box2d/aabb.c
@@ -0,0 +1,132 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "aabb.h"
+
+#include "box2d/math_functions.h"
+
+#include <float.h>
+
+bool b2IsValidAABB( b2AABB a )
+{
+	b2Vec2 d = b2Sub( a.upperBound, a.lowerBound );
+	bool valid = d.x >= 0.0f && d.y >= 0.0f;
+	valid = valid && b2IsValidVec2( a.lowerBound ) && b2IsValidVec2( a.upperBound );
+	return valid;
+}
+
+// From Real-time Collision Detection, p179.
+b2CastOutput b2AABB_RayCast( b2AABB a, b2Vec2 p1, b2Vec2 p2 )
+{
+	// Radius not handled
+	b2CastOutput output = { 0 };
+
+	float tmin = -FLT_MAX;
+	float tmax = FLT_MAX;
+
+	b2Vec2 p = p1;
+	b2Vec2 d = b2Sub( p2, p1 );
+	b2Vec2 absD = b2Abs( d );
+
+	b2Vec2 normal = b2Vec2_zero;
+
+	// x-coordinate
+	if ( absD.x < FLT_EPSILON )
+	{
+		// parallel
+		if ( p.x < a.lowerBound.x || a.upperBound.x < p.x )
+		{
+			return output;
+		}
+	}
+	else
+	{
+		float inv_d = 1.0f / d.x;
+		float t1 = ( a.lowerBound.x - p.x ) * inv_d;
+		float t2 = ( a.upperBound.x - p.x ) * inv_d;
+
+		// Sign of the normal vector.
+		float s = -1.0f;
+
+		if ( t1 > t2 )
+		{
+			float tmp = t1;
+			t1 = t2;
+			t2 = tmp;
+			s = 1.0f;
+		}
+
+		// Push the min up
+		if ( t1 > tmin )
+		{
+			normal.y = 0.0f;
+			normal.x = s;
+			tmin = t1;
+		}
+
+		// Pull the max down
+		tmax = b2MinFloat( tmax, t2 );
+
+		if ( tmin > tmax )
+		{
+			return output;
+		}
+	}
+
+	// y-coordinate
+	if ( absD.y < FLT_EPSILON )
+	{
+		// parallel
+		if ( p.y < a.lowerBound.y || a.upperBound.y < p.y )
+		{
+			return output;
+		}
+	}
+	else
+	{
+		float inv_d = 1.0f / d.y;
+		float t1 = ( a.lowerBound.y - p.y ) * inv_d;
+		float t2 = ( a.upperBound.y - p.y ) * inv_d;
+
+		// Sign of the normal vector.
+		float s = -1.0f;
+
+		if ( t1 > t2 )
+		{
+			float tmp = t1;
+			t1 = t2;
+			t2 = tmp;
+			s = 1.0f;
+		}
+
+		// Push the min up
+		if ( t1 > tmin )
+		{
+			normal.x = 0.0f;
+			normal.y = s;
+			tmin = t1;
+		}
+
+		// Pull the max down
+		tmax = b2MinFloat( tmax, t2 );
+
+		if ( tmin > tmax )
+		{
+			return output;
+		}
+	}
+
+	// Does the ray start inside the box?
+	// Does the ray intersect beyond the max fraction?
+	if ( tmin < 0.0f || 1.0f < tmin )
+	{
+		return output;
+	}
+
+	// Intersection.
+	output.fraction = tmin;
+	output.normal = normal;
+	output.point = b2Lerp( p1, p2, tmin );
+	output.hit = true;
+	return output;
+}
diff --git a/src/vendor/box2d/aabb.h b/src/vendor/box2d/aabb.h
new file mode 100644
index 0000000..834c8ea
--- /dev/null
+++ b/src/vendor/box2d/aabb.h
@@ -0,0 +1,56 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "box2d/types.h"
+
+// Ray cast an AABB
+b2CastOutput b2AABB_RayCast( b2AABB a, b2Vec2 p1, b2Vec2 p2 );
+
+// Get surface area of an AABB (the perimeter length)
+static inline float b2Perimeter( b2AABB a )
+{
+	float wx = a.upperBound.x - a.lowerBound.x;
+	float wy = a.upperBound.y - a.lowerBound.y;
+	return 2.0f * ( wx + wy );
+}
+
+/// Enlarge a to contain b
+/// @return true if the AABB grew
+static inline bool b2EnlargeAABB( b2AABB* a, b2AABB b )
+{
+	bool changed = false;
+	if ( b.lowerBound.x < a->lowerBound.x )
+	{
+		a->lowerBound.x = b.lowerBound.x;
+		changed = true;
+	}
+
+	if ( b.lowerBound.y < a->lowerBound.y )
+	{
+		a->lowerBound.y = b.lowerBound.y;
+		changed = true;
+	}
+
+	if ( a->upperBound.x < b.upperBound.x )
+	{
+		a->upperBound.x = b.upperBound.x;
+		changed = true;
+	}
+
+	if ( a->upperBound.y < b.upperBound.y )
+	{
+		a->upperBound.y = b.upperBound.y;
+		changed = true;
+	}
+
+	return changed;
+}
+
+/// Do a and b overlap
+static inline bool b2AABB_Overlaps( b2AABB a, b2AABB b )
+{
+	return !( b.lowerBound.x > a.upperBound.x || b.lowerBound.y > a.upperBound.y || a.lowerBound.x > b.upperBound.x ||
+			  a.lowerBound.y > b.upperBound.y );
+}
diff --git a/src/vendor/box2d/arena_allocator.c b/src/vendor/box2d/arena_allocator.c
new file mode 100644
index 0000000..000f204
--- /dev/null
+++ b/src/vendor/box2d/arena_allocator.c
@@ -0,0 +1,112 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "arena_allocator.h"
+
+#include "array.h"
+#include "core.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+
+B2_ARRAY_SOURCE( b2ArenaEntry, b2ArenaEntry )
+
+b2ArenaAllocator b2CreateArenaAllocator( int capacity )
+{
+	B2_ASSERT( capacity >= 0 );
+	b2ArenaAllocator allocator = { 0 };
+	allocator.capacity = capacity;
+	allocator.data = b2Alloc( capacity );
+	allocator.allocation = 0;
+	allocator.maxAllocation = 0;
+	allocator.index = 0;
+	allocator.entries = b2ArenaEntryArray_Create( 32 );
+	return allocator;
+}
+
+void b2DestroyArenaAllocator( b2ArenaAllocator* allocator )
+{
+	b2ArenaEntryArray_Destroy( &allocator->entries );
+	b2Free( allocator->data, allocator->capacity );
+}
+
+void* b2AllocateArenaItem( b2ArenaAllocator* alloc, int size, const char* name )
+{
+	// ensure allocation is 32 byte aligned to support 256-bit SIMD
+	int size32 = ( ( size - 1 ) | 0x1F ) + 1;
+
+	b2ArenaEntry entry;
+	entry.size = size32;
+	entry.name = name;
+	if ( alloc->index + size32 > alloc->capacity )
+	{
+		// fall back to the heap (undesirable)
+		entry.data = b2Alloc( size32 );
+		entry.usedMalloc = true;
+
+		B2_ASSERT( ( (uintptr_t)entry.data & 0x1F ) == 0 );
+	}
+	else
+	{
+		entry.data = alloc->data + alloc->index;
+		entry.usedMalloc = false;
+		alloc->index += size32;
+
+		B2_ASSERT( ( (uintptr_t)entry.data & 0x1F ) == 0 );
+	}
+
+	alloc->allocation += size32;
+	if ( alloc->allocation > alloc->maxAllocation )
+	{
+		alloc->maxAllocation = alloc->allocation;
+	}
+
+	b2ArenaEntryArray_Push( &alloc->entries, entry );
+	return entry.data;
+}
+
+void b2FreeArenaItem( b2ArenaAllocator* alloc, void* mem )
+{
+	int entryCount = alloc->entries.count;
+	B2_ASSERT( entryCount > 0 );
+	b2ArenaEntry* entry = alloc->entries.data + ( entryCount - 1 );
+	B2_ASSERT( mem == entry->data );
+	if ( entry->usedMalloc )
+	{
+		b2Free( mem, entry->size );
+	}
+	else
+	{
+		alloc->index -= entry->size;
+	}
+	alloc->allocation -= entry->size;
+	b2ArenaEntryArray_Pop( &alloc->entries );
+}
+
+void b2GrowArena( b2ArenaAllocator* alloc )
+{
+	// Stack must not be in use
+	B2_ASSERT( alloc->allocation == 0 );
+
+	if ( alloc->maxAllocation > alloc->capacity )
+	{
+		b2Free( alloc->data, alloc->capacity );
+		alloc->capacity = alloc->maxAllocation + alloc->maxAllocation / 2;
+		alloc->data = b2Alloc( alloc->capacity );
+	}
+}
+
+int b2GetArenaCapacity( b2ArenaAllocator* alloc )
+{
+	return alloc->capacity;
+}
+
+int b2GetArenaAllocation( b2ArenaAllocator* alloc )
+{
+	return alloc->allocation;
+}
+
+int b2GetMaxArenaAllocation( b2ArenaAllocator* alloc )
+{
+	return alloc->maxAllocation;
+}
diff --git a/src/vendor/box2d/arena_allocator.h b/src/vendor/box2d/arena_allocator.h
new file mode 100644
index 0000000..bc67180
--- /dev/null
+++ b/src/vendor/box2d/arena_allocator.h
@@ -0,0 +1,48 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+
+B2_ARRAY_DECLARE( b2ArenaEntry, b2ArenaEntry );
+
+typedef struct b2ArenaEntry
+{
+	char* data;
+	const char* name;
+	int size;
+	bool usedMalloc;
+} b2ArenaEntry;
+
+// This is a stack-like arena allocator used for fast per step allocations.
+// You must nest allocate/free pairs. The code will B2_ASSERT
+// if you try to interleave multiple allocate/free pairs.
+// This allocator uses the heap if space is insufficient.
+// I could remove the need to free entries individually.
+typedef struct b2ArenaAllocator
+{
+	char* data;
+	int capacity;
+	int index;
+
+	int allocation;
+	int maxAllocation;
+
+	b2ArenaEntryArray entries;
+} b2ArenaAllocator;
+
+b2ArenaAllocator b2CreateArenaAllocator( int capacity );
+void b2DestroyArenaAllocator( b2ArenaAllocator* allocator );
+
+void* b2AllocateArenaItem( b2ArenaAllocator* alloc, int size, const char* name );
+void b2FreeArenaItem( b2ArenaAllocator* alloc, void* mem );
+
+// Grow the arena based on usage
+void b2GrowArena( b2ArenaAllocator* alloc );
+
+int b2GetArenaCapacity( b2ArenaAllocator* alloc );
+int b2GetArenaAllocation( b2ArenaAllocator* alloc );
+int b2GetMaxArenaAllocation( b2ArenaAllocator* alloc );
+
+B2_ARRAY_INLINE( b2ArenaEntry, b2ArenaEntry )
diff --git a/src/vendor/box2d/array.c b/src/vendor/box2d/array.c
new file mode 100644
index 0000000..b9c0c1a
--- /dev/null
+++ b/src/vendor/box2d/array.c
@@ -0,0 +1,8 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "array.h"
+
+#include <stddef.h>
+
+B2_ARRAY_SOURCE( int, b2Int )
diff --git a/src/vendor/box2d/array.h b/src/vendor/box2d/array.h
new file mode 100644
index 0000000..6744fab
--- /dev/null
+++ b/src/vendor/box2d/array.h
@@ -0,0 +1,179 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "core.h"
+
+// Macro generated functions for dynamic arrays
+// Pros
+// - type safe
+// - array data debuggable (visible count and capacity)
+// - bounds checking
+// - forward declaration
+// - simple implementation
+// - generates functions (like C++ templates)
+// - functions have https://en.wikipedia.org/wiki/Sequence_point
+// - avoids stretchy buffer dropped pointer update bugs
+// Cons
+// - cannot debug
+// - breaks code navigation
+
+// todo_erin consider code-gen: https://github.com/IbrahimHindawi/haikal
+
+// Array declaration that doesn't need the type T to be defined
+#define B2_ARRAY_DECLARE( T, PREFIX )                                                                                            \
+	typedef struct                                                                                                               \
+	{                                                                                                                            \
+		struct T* data;                                                                                                          \
+		int count;                                                                                                               \
+		int capacity;                                                                                                            \
+	} PREFIX##Array;                                                                                                             \
+	PREFIX##Array PREFIX##Array_Create( int capacity );                                                                          \
+	void PREFIX##Array_Reserve( PREFIX##Array* a, int newCapacity );                                                             \
+	void PREFIX##Array_Destroy( PREFIX##Array* a )
+
+#define B2_DECLARE_ARRAY_NATIVE( T, PREFIX )                                                                                     \
+	typedef struct                                                                                                               \
+	{                                                                                                                            \
+		T* data;                                                                                                                 \
+		int count;                                                                                                               \
+		int capacity;                                                                                                            \
+	} PREFIX##Array;                                                                                                             \
+	/* Create array with initial capacity. Zero initialization is also supported */                                              \
+	PREFIX##Array PREFIX##Array_Create( int capacity );                                                                          \
+	void PREFIX##Array_Reserve( PREFIX##Array* a, int newCapacity );                                                             \
+	void PREFIX##Array_Destroy( PREFIX##Array* a )
+
+// Inline array functions that need the type T to be defined
+#define B2_ARRAY_INLINE( T, PREFIX )                                                                                             \
+	/* Resize */                                                                                                                 \
+	static inline void PREFIX##Array_Resize( PREFIX##Array* a, int count )                                                       \
+	{                                                                                                                            \
+		PREFIX##Array_Reserve( a, count );                                                                                       \
+		a->count = count;                                                                                                        \
+	}                                                                                                                            \
+	/* Get */                                                                                                                    \
+	static inline T* PREFIX##Array_Get( PREFIX##Array* a, int index )                                                            \
+	{                                                                                                                            \
+		B2_ASSERT( 0 <= index && index < a->count );                                                                             \
+		return a->data + index;                                                                                                  \
+	}                                                                                                                            \
+	/* Add */                                                                                                                    \
+	static inline T* PREFIX##Array_Add( PREFIX##Array* a )                                                                       \
+	{                                                                                                                            \
+		if ( a->count == a->capacity )                                                                                           \
+		{                                                                                                                        \
+			int newCapacity = a->capacity < 2 ? 2 : a->capacity + ( a->capacity >> 1 );                                          \
+			PREFIX##Array_Reserve( a, newCapacity );                                                                             \
+		}                                                                                                                        \
+		a->count += 1;                                                                                                           \
+		return a->data + ( a->count - 1 );                                                                                       \
+	}                                                                                                                            \
+	/* Push */                                                                                                                   \
+	static inline void PREFIX##Array_Push( PREFIX##Array* a, T value )                                                           \
+	{                                                                                                                            \
+		if ( a->count == a->capacity )                                                                                           \
+		{                                                                                                                        \
+			int newCapacity = a->capacity < 2 ? 2 : a->capacity + ( a->capacity >> 1 );                                          \
+			PREFIX##Array_Reserve( a, newCapacity );                                                                             \
+		}                                                                                                                        \
+		a->data[a->count] = value;                                                                                               \
+		a->count += 1;                                                                                                           \
+	}                                                                                                                            \
+	/* Set */                                                                                                                    \
+	static inline void PREFIX##Array_Set( PREFIX##Array* a, int index, T value )                                                 \
+	{                                                                                                                            \
+		B2_ASSERT( 0 <= index && index < a->count );                                                                             \
+		a->data[index] = value;                                                                                                  \
+	}                                                                                                                            \
+	/* RemoveSwap */                                                                                                             \
+	static inline int PREFIX##Array_RemoveSwap( PREFIX##Array* a, int index )                                                    \
+	{                                                                                                                            \
+		B2_ASSERT( 0 <= index && index < a->count );                                                                             \
+		int movedIndex = B2_NULL_INDEX;                                                                                          \
+		if ( index != a->count - 1 )                                                                                             \
+		{                                                                                                                        \
+			movedIndex = a->count - 1;                                                                                           \
+			a->data[index] = a->data[movedIndex];                                                                                \
+		}                                                                                                                        \
+		a->count -= 1;                                                                                                           \
+		return movedIndex;                                                                                                       \
+	}                                                                                                                            \
+	/* Pop */                                                                                                                    \
+	static inline T PREFIX##Array_Pop( PREFIX##Array* a )                                                                        \
+	{                                                                                                                            \
+		B2_ASSERT( a->count > 0 );                                                                                               \
+		T value = a->data[a->count - 1];                                                                                         \
+		a->count -= 1;                                                                                                           \
+		return value;                                                                                                            \
+	}                                                                                                                            \
+	/* Clear */                                                                                                                  \
+	static inline void PREFIX##Array_Clear( PREFIX##Array* a )                                                                   \
+	{                                                                                                                            \
+		a->count = 0;                                                                                                            \
+	}                                                                                                                            \
+	/* ByteCount */                                                                                                              \
+	static inline int PREFIX##Array_ByteCount( PREFIX##Array* a )                                                                \
+	{                                                                                                                            \
+		return (int)( a->capacity * sizeof( T ) );                                                                               \
+	}
+
+// Array implementations to be instantiated in a source file where the type T is known
+#define B2_ARRAY_SOURCE( T, PREFIX )                                                                                             \
+	/* Create */                                                                                                                 \
+	PREFIX##Array PREFIX##Array_Create( int capacity )                                                                           \
+	{                                                                                                                            \
+		PREFIX##Array a = { 0 };                                                                                                 \
+		if ( capacity > 0 )                                                                                                      \
+		{                                                                                                                        \
+			a.data = b2Alloc( capacity * sizeof( T ) );                                                                          \
+			a.capacity = capacity;                                                                                               \
+		}                                                                                                                        \
+		return a;                                                                                                                \
+	}                                                                                                                            \
+	/* Reserve */                                                                                                                \
+	void PREFIX##Array_Reserve( PREFIX##Array* a, int newCapacity )                                                              \
+	{                                                                                                                            \
+		if ( newCapacity <= a->capacity )                                                                                        \
+		{                                                                                                                        \
+			return;                                                                                                              \
+		}                                                                                                                        \
+		a->data = b2GrowAlloc( a->data, a->capacity * sizeof( T ), newCapacity * sizeof( T ) );                                  \
+		a->capacity = newCapacity;                                                                                               \
+	}                                                                                                                            \
+	/* Destroy */                                                                                                                \
+	void PREFIX##Array_Destroy( PREFIX##Array* a )                                                                               \
+	{                                                                                                                            \
+		b2Free( a->data, a->capacity * sizeof( T ) );                                                                            \
+		a->data = NULL;                                                                                                          \
+		a->count = 0;                                                                                                            \
+		a->capacity = 0;                                                                                                         \
+	}
+
+B2_DECLARE_ARRAY_NATIVE( int, b2Int );
+B2_ARRAY_INLINE( int, b2Int )
+
+// Declare all the arrays
+B2_ARRAY_DECLARE( b2Body, b2Body );
+B2_ARRAY_DECLARE( b2BodyMoveEvent, b2BodyMoveEvent );
+B2_ARRAY_DECLARE( b2BodySim, b2BodySim );
+B2_ARRAY_DECLARE( b2BodyState, b2BodyState );
+B2_ARRAY_DECLARE( b2ChainShape, b2ChainShape );
+B2_ARRAY_DECLARE( b2Contact, b2Contact );
+B2_ARRAY_DECLARE( b2ContactBeginTouchEvent, b2ContactBeginTouchEvent );
+B2_ARRAY_DECLARE( b2ContactEndTouchEvent, b2ContactEndTouchEvent );
+B2_ARRAY_DECLARE( b2ContactHitEvent, b2ContactHitEvent );
+B2_ARRAY_DECLARE( b2ContactSim, b2ContactSim );
+B2_ARRAY_DECLARE( b2Island, b2Island );
+B2_ARRAY_DECLARE( b2IslandSim, b2IslandSim );
+B2_ARRAY_DECLARE( b2Joint, b2Joint );
+B2_ARRAY_DECLARE( b2JointSim, b2JointSim );
+B2_ARRAY_DECLARE( b2Sensor, b2Sensor );
+B2_ARRAY_DECLARE( b2SensorBeginTouchEvent, b2SensorBeginTouchEvent );
+B2_ARRAY_DECLARE( b2SensorEndTouchEvent, b2SensorEndTouchEvent );
+B2_ARRAY_DECLARE( b2SensorTaskContext, b2SensorTaskContext );
+B2_ARRAY_DECLARE( b2Shape, b2Shape );
+B2_ARRAY_DECLARE( b2ShapeRef, b2ShapeRef );
+B2_ARRAY_DECLARE( b2SolverSet, b2SolverSet );
+B2_ARRAY_DECLARE( b2TaskContext, b2TaskContext );
diff --git a/src/vendor/box2d/atomic.h b/src/vendor/box2d/atomic.h
new file mode 100644
index 0000000..139a919
--- /dev/null
+++ b/src/vendor/box2d/atomic.h
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "core.h"
+
+#include <stdint.h>
+
+#if defined( _MSC_VER )
+#include <intrin.h>
+#endif
+
+static inline void b2AtomicStoreInt( b2AtomicInt* a, int value )
+{
+#if defined( _MSC_VER )
+	(void)_InterlockedExchange( (long*)&a->value, value );
+#elif defined( __GNUC__ ) || defined( __clang__ )
+	__atomic_store_n( &a->value, value, __ATOMIC_SEQ_CST );
+#else
+#error "Unsupported platform"
+#endif
+}
+
+static inline int b2AtomicLoadInt( b2AtomicInt* a )
+{
+#if defined( _MSC_VER )
+	return _InterlockedOr( (long*)&a->value, 0 );
+#elif defined( __GNUC__ ) || defined( __clang__ )
+	return __atomic_load_n( &a->value, __ATOMIC_SEQ_CST );
+#else
+#error "Unsupported platform"
+#endif
+}
+
+static inline int b2AtomicFetchAddInt( b2AtomicInt* a, int increment )
+{
+#if defined( _MSC_VER )
+	return _InterlockedExchangeAdd( (long*)&a->value, (long)increment );
+#elif defined( __GNUC__ ) || defined( __clang__ )
+	return __atomic_fetch_add( &a->value, increment, __ATOMIC_SEQ_CST );
+#else
+#error "Unsupported platform"
+#endif
+}
+
+static inline bool b2AtomicCompareExchangeInt( b2AtomicInt* a, int expected, int desired )
+{
+#if defined( _MSC_VER )
+	return _InterlockedCompareExchange( (long*)&a->value, (long)desired, (long)expected ) == expected;
+#elif defined( __GNUC__ ) || defined( __clang__ )
+	// The value written to expected is ignored
+	return __atomic_compare_exchange_n( &a->value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST );
+#else
+#error "Unsupported platform"
+#endif
+}
+
+static inline void b2AtomicStoreU32( b2AtomicU32* a, uint32_t value )
+{
+#if defined( _MSC_VER )
+	(void)_InterlockedExchange( (long*)&a->value, value );
+#elif defined( __GNUC__ ) || defined( __clang__ )
+	__atomic_store_n( &a->value, value, __ATOMIC_SEQ_CST );
+#else
+#error "Unsupported platform"
+#endif
+}
+
+static inline uint32_t b2AtomicLoadU32( b2AtomicU32* a )
+{
+#if defined( _MSC_VER )
+	return (uint32_t)_InterlockedOr( (long*)&a->value, 0 );
+#elif defined( __GNUC__ ) || defined( __clang__ )
+	return __atomic_load_n( &a->value, __ATOMIC_SEQ_CST );
+#else
+#error "Unsupported platform"
+#endif
+}
diff --git a/src/vendor/box2d/base.h b/src/vendor/box2d/base.h
new file mode 100644
index 0000000..2af347a
--- /dev/null
+++ b/src/vendor/box2d/base.h
@@ -0,0 +1,131 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <stdint.h>
+
+// clang-format off
+// 
+// Shared library macros
+#if defined( _MSC_VER ) && defined( box2d_EXPORTS )
+	// build the Windows DLL
+	#define BOX2D_EXPORT __declspec( dllexport )
+#elif defined( _MSC_VER ) && defined( BOX2D_DLL )
+	// using the Windows DLL
+	#define BOX2D_EXPORT __declspec( dllimport )
+#elif defined( box2d_EXPORTS )
+	// building or using the shared library
+	#define BOX2D_EXPORT __attribute__( ( visibility( "default" ) ) )
+#else
+	// static library
+	#define BOX2D_EXPORT
+#endif
+
+// C++ macros
+#ifdef __cplusplus
+	#define B2_API extern "C" BOX2D_EXPORT
+	#define B2_INLINE inline
+	#define B2_LITERAL(T) T
+	#define B2_ZERO_INIT {}
+#else
+	#define B2_API BOX2D_EXPORT
+	#define B2_INLINE static inline
+	/// Used for C literals like (b2Vec2){1.0f, 2.0f} where C++ requires b2Vec2{1.0f, 2.0f}
+	#define B2_LITERAL(T) (T)
+	#define B2_ZERO_INIT {0}
+#endif
+// clang-format on
+
+/**
+ * @defgroup base Base
+ * Base functionality
+ * @{
+ */
+
+/// Prototype for user allocation function
+/// @param size the allocation size in bytes
+/// @param alignment the required alignment, guaranteed to be a power of 2
+typedef void* b2AllocFcn( unsigned int size, int alignment );
+
+/// Prototype for user free function
+/// @param mem the memory previously allocated through `b2AllocFcn`
+typedef void b2FreeFcn( void* mem );
+
+/// Prototype for the user assert callback. Return 0 to skip the debugger break.
+typedef int b2AssertFcn( const char* condition, const char* fileName, int lineNumber );
+
+/// This allows the user to override the allocation functions. These should be
+/// set during application startup.
+B2_API void b2SetAllocator( b2AllocFcn* allocFcn, b2FreeFcn* freeFcn );
+
+/// @return the total bytes allocated by Box2D
+B2_API int b2GetByteCount( void );
+
+/// Override the default assert callback
+/// @param assertFcn a non-null assert callback
+B2_API void b2SetAssertFcn( b2AssertFcn* assertFcn );
+
+/// Version numbering scheme.
+/// See https://semver.org/
+typedef struct b2Version
+{
+	/// Significant changes
+	int major;
+
+	/// Incremental changes
+	int minor;
+
+	/// Bug fixes
+	int revision;
+} b2Version;
+
+/// Get the current version of Box2D
+B2_API b2Version b2GetVersion( void );
+
+/**@}*/
+
+//! @cond
+
+// see https://github.com/scottt/debugbreak
+#if defined( _MSC_VER )
+#define B2_BREAKPOINT __debugbreak()
+#elif defined( __GNUC__ ) || defined( __clang__ )
+#define B2_BREAKPOINT __builtin_trap()
+#else
+// Unknown compiler
+#include <assert.h>
+#define B2_BREAKPOINT assert( 0 )
+#endif
+
+#if !defined( NDEBUG ) || defined( B2_ENABLE_ASSERT )
+B2_API int b2InternalAssertFcn( const char* condition, const char* fileName, int lineNumber );
+#define B2_ASSERT( condition )                                                                                                   \
+	do                                                                                                                           \
+	{                                                                                                                            \
+		if ( !( condition ) && b2InternalAssertFcn( #condition, __FILE__, (int)__LINE__ ) )                                          \
+			B2_BREAKPOINT;                                                                                                       \
+	}                                                                                                                            \
+	while ( 0 )
+#else
+#define B2_ASSERT( ... ) ( (void)0 )
+#endif
+
+/// Get the absolute number of system ticks. The value is platform specific.
+B2_API uint64_t b2GetTicks( void );
+
+/// Get the milliseconds passed from an initial tick value.
+B2_API float b2GetMilliseconds( uint64_t ticks );
+
+/// Get the milliseconds passed from an initial tick value. Resets the passed in
+/// value to the current tick value.
+B2_API float b2GetMillisecondsAndReset( uint64_t* ticks );
+
+/// Yield to be used in a busy loop.
+B2_API void b2Yield( void );
+
+/// Simple djb2 hash function for determinism testing
+#define B2_HASH_INIT 5381
+B2_API uint32_t b2Hash( uint32_t hash, const uint8_t* data, int count );
+
+//! @endcond
diff --git a/src/vendor/box2d/bitset.c b/src/vendor/box2d/bitset.c
new file mode 100644
index 0000000..a30464c
--- /dev/null
+++ b/src/vendor/box2d/bitset.c
@@ -0,0 +1,67 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "bitset.h"
+
+#include <string.h>
+
+b2BitSet b2CreateBitSet( uint32_t bitCapacity )
+{
+	b2BitSet bitSet = { 0 };
+
+	bitSet.blockCapacity = ( bitCapacity + sizeof( uint64_t ) * 8 - 1 ) / ( sizeof( uint64_t ) * 8 );
+	bitSet.blockCount = 0;
+	bitSet.bits = b2Alloc( bitSet.blockCapacity * sizeof( uint64_t ) );
+	memset( bitSet.bits, 0, bitSet.blockCapacity * sizeof( uint64_t ) );
+	return bitSet;
+}
+
+void b2DestroyBitSet( b2BitSet* bitSet )
+{
+	b2Free( bitSet->bits, bitSet->blockCapacity * sizeof( uint64_t ) );
+	bitSet->blockCapacity = 0;
+	bitSet->blockCount = 0;
+	bitSet->bits = NULL;
+}
+
+void b2SetBitCountAndClear( b2BitSet* bitSet, uint32_t bitCount )
+{
+	uint32_t blockCount = ( bitCount + sizeof( uint64_t ) * 8 - 1 ) / ( sizeof( uint64_t ) * 8 );
+	if ( bitSet->blockCapacity < blockCount )
+	{
+		b2DestroyBitSet( bitSet );
+		uint32_t newBitCapacity = bitCount + ( bitCount >> 1 );
+		*bitSet = b2CreateBitSet( newBitCapacity );
+	}
+
+	bitSet->blockCount = blockCount;
+	memset( bitSet->bits, 0, bitSet->blockCount * sizeof( uint64_t ) );
+}
+
+void b2GrowBitSet( b2BitSet* bitSet, uint32_t blockCount )
+{
+	B2_ASSERT( blockCount > bitSet->blockCount );
+	if ( blockCount > bitSet->blockCapacity )
+	{
+		uint32_t oldCapacity = bitSet->blockCapacity;
+		bitSet->blockCapacity = blockCount + blockCount / 2;
+		uint64_t* newBits = b2Alloc( bitSet->blockCapacity * sizeof( uint64_t ) );
+		memset( newBits, 0, bitSet->blockCapacity * sizeof( uint64_t ) );
+		B2_ASSERT( bitSet->bits != NULL );
+		memcpy( newBits, bitSet->bits, oldCapacity * sizeof( uint64_t ) );
+		b2Free( bitSet->bits, oldCapacity * sizeof( uint64_t ) );
+		bitSet->bits = newBits;
+	}
+
+	bitSet->blockCount = blockCount;
+}
+
+void b2InPlaceUnion( b2BitSet* B2_RESTRICT setA, const b2BitSet* B2_RESTRICT setB )
+{
+	B2_ASSERT( setA->blockCount == setB->blockCount );
+	uint32_t blockCount = setA->blockCount;
+	for ( uint32_t i = 0; i < blockCount; ++i )
+	{
+		setA->bits[i] |= setB->bits[i];
+	}
+}
diff --git a/src/vendor/box2d/bitset.h b/src/vendor/box2d/bitset.h
new file mode 100644
index 0000000..848b486
--- /dev/null
+++ b/src/vendor/box2d/bitset.h
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "core.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+// Bit set provides fast operations on large arrays of bits.
+typedef struct b2BitSet
+{
+	uint64_t* bits;
+	uint32_t blockCapacity;
+	uint32_t blockCount;
+} b2BitSet;
+
+b2BitSet b2CreateBitSet( uint32_t bitCapacity );
+void b2DestroyBitSet( b2BitSet* bitSet );
+void b2SetBitCountAndClear( b2BitSet* bitSet, uint32_t bitCount );
+void b2InPlaceUnion( b2BitSet* setA, const b2BitSet* setB );
+void b2GrowBitSet( b2BitSet* bitSet, uint32_t blockCount );
+
+static inline void b2SetBit( b2BitSet* bitSet, uint32_t bitIndex )
+{
+	uint32_t blockIndex = bitIndex / 64;
+	B2_ASSERT( blockIndex < bitSet->blockCount );
+	bitSet->bits[blockIndex] |= ( (uint64_t)1 << bitIndex % 64 );
+}
+
+static inline void b2SetBitGrow( b2BitSet* bitSet, uint32_t bitIndex )
+{
+	uint32_t blockIndex = bitIndex / 64;
+	if ( blockIndex >= bitSet->blockCount )
+	{
+		b2GrowBitSet( bitSet, blockIndex + 1 );
+	}
+	bitSet->bits[blockIndex] |= ( (uint64_t)1 << bitIndex % 64 );
+}
+
+static inline void b2ClearBit( b2BitSet* bitSet, uint32_t bitIndex )
+{
+	uint32_t blockIndex = bitIndex / 64;
+	if ( blockIndex >= bitSet->blockCount )
+	{
+		return;
+	}
+	bitSet->bits[blockIndex] &= ~( (uint64_t)1 << bitIndex % 64 );
+}
+
+static inline bool b2GetBit( const b2BitSet* bitSet, uint32_t bitIndex )
+{
+	uint32_t blockIndex = bitIndex / 64;
+	if ( blockIndex >= bitSet->blockCount )
+	{
+		return false;
+	}
+	return ( bitSet->bits[blockIndex] & ( (uint64_t)1 << bitIndex % 64 ) ) != 0;
+}
+
+static inline int b2GetBitSetBytes( b2BitSet* bitSet )
+{
+	return bitSet->blockCapacity * sizeof( uint64_t );
+}
diff --git a/src/vendor/box2d/body.c b/src/vendor/box2d/body.c
new file mode 100644
index 0000000..0703211
--- /dev/null
+++ b/src/vendor/box2d/body.c
@@ -0,0 +1,1878 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "body.h"
+
+#include "aabb.h"
+#include "array.h"
+#include "contact.h"
+#include "core.h"
+#include "id_pool.h"
+#include "island.h"
+#include "joint.h"
+#include "shape.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "sensor.h"
+
+#include "box2d/box2d.h"
+#include "box2d/id.h"
+
+#include <string.h>
+
+// Implement functions for b2BodyArray
+B2_ARRAY_SOURCE( b2Body, b2Body )
+B2_ARRAY_SOURCE( b2BodySim, b2BodySim )
+B2_ARRAY_SOURCE( b2BodyState, b2BodyState )
+
+// Get a validated body from a world using an id.
+b2Body* b2GetBodyFullId( b2World* world, b2BodyId bodyId )
+{
+	B2_ASSERT( b2Body_IsValid( bodyId ) );
+
+	// id index starts at one so that zero can represent null
+	return b2BodyArray_Get( &world->bodies, bodyId.index1 - 1 );
+}
+
+b2Transform b2GetBodyTransformQuick( b2World* world, b2Body* body )
+{
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, body->setIndex );
+	b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, body->localIndex );
+	return bodySim->transform;
+}
+
+b2Transform b2GetBodyTransform( b2World* world, int bodyId )
+{
+	b2Body* body = b2BodyArray_Get( &world->bodies, bodyId );
+	return b2GetBodyTransformQuick( world, body );
+}
+
+// Create a b2BodyId from a raw id.
+b2BodyId b2MakeBodyId( b2World* world, int bodyId )
+{
+	b2Body* body = b2BodyArray_Get( &world->bodies, bodyId );
+	return (b2BodyId){ bodyId + 1, world->worldId, body->generation };
+}
+
+b2BodySim* b2GetBodySim( b2World* world, b2Body* body )
+{
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, body->setIndex );
+	b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, body->localIndex );
+	return bodySim;
+}
+
+b2BodyState* b2GetBodyState( b2World* world, b2Body* body )
+{
+	if ( body->setIndex == b2_awakeSet )
+	{
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+		return b2BodyStateArray_Get( &set->bodyStates, body->localIndex );
+	}
+
+	return NULL;
+}
+
+static void b2CreateIslandForBody( b2World* world, int setIndex, b2Body* body )
+{
+	B2_ASSERT( body->islandId == B2_NULL_INDEX );
+	B2_ASSERT( body->islandPrev == B2_NULL_INDEX );
+	B2_ASSERT( body->islandNext == B2_NULL_INDEX );
+	B2_ASSERT( setIndex != b2_disabledSet );
+
+	b2Island* island = b2CreateIsland( world, setIndex );
+
+	body->islandId = island->islandId;
+	island->headBody = body->id;
+	island->tailBody = body->id;
+	island->bodyCount = 1;
+}
+
+static void b2RemoveBodyFromIsland( b2World* world, b2Body* body )
+{
+	if ( body->islandId == B2_NULL_INDEX )
+	{
+		B2_ASSERT( body->islandPrev == B2_NULL_INDEX );
+		B2_ASSERT( body->islandNext == B2_NULL_INDEX );
+		return;
+	}
+
+	int islandId = body->islandId;
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+	// Fix the island's linked list of sims
+	if ( body->islandPrev != B2_NULL_INDEX )
+	{
+		b2Body* prevBody = b2BodyArray_Get( &world->bodies, body->islandPrev );
+		prevBody->islandNext = body->islandNext;
+	}
+
+	if ( body->islandNext != B2_NULL_INDEX )
+	{
+		b2Body* nextBody = b2BodyArray_Get( &world->bodies, body->islandNext );
+		nextBody->islandPrev = body->islandPrev;
+	}
+
+	B2_ASSERT( island->bodyCount > 0 );
+	island->bodyCount -= 1;
+	bool islandDestroyed = false;
+
+	if ( island->headBody == body->id )
+	{
+		island->headBody = body->islandNext;
+
+		if ( island->headBody == B2_NULL_INDEX )
+		{
+			// Destroy empty island
+			B2_ASSERT( island->tailBody == body->id );
+			B2_ASSERT( island->bodyCount == 0 );
+			B2_ASSERT( island->contactCount == 0 );
+			B2_ASSERT( island->jointCount == 0 );
+
+			// Free the island
+			b2DestroyIsland( world, island->islandId );
+			islandDestroyed = true;
+		}
+	}
+	else if ( island->tailBody == body->id )
+	{
+		island->tailBody = body->islandPrev;
+	}
+
+	if ( islandDestroyed == false )
+	{
+		b2ValidateIsland( world, islandId );
+	}
+
+	body->islandId = B2_NULL_INDEX;
+	body->islandPrev = B2_NULL_INDEX;
+	body->islandNext = B2_NULL_INDEX;
+}
+
+static void b2DestroyBodyContacts( b2World* world, b2Body* body, bool wakeBodies )
+{
+	// Destroy the attached contacts
+	int edgeKey = body->headContactKey;
+	while ( edgeKey != B2_NULL_INDEX )
+	{
+		int contactId = edgeKey >> 1;
+		int edgeIndex = edgeKey & 1;
+
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+		edgeKey = contact->edges[edgeIndex].nextKey;
+		b2DestroyContact( world, contact, wakeBodies );
+	}
+
+	b2ValidateSolverSets( world );
+}
+
+b2BodyId b2CreateBody( b2WorldId worldId, const b2BodyDef* def )
+{
+	B2_CHECK_DEF( def );
+	B2_ASSERT( b2IsValidVec2( def->position ) );
+	B2_ASSERT( b2IsValidRotation( def->rotation ) );
+	B2_ASSERT( b2IsValidVec2( def->linearVelocity ) );
+	B2_ASSERT( b2IsValidFloat( def->angularVelocity ) );
+	B2_ASSERT( b2IsValidFloat( def->linearDamping ) && def->linearDamping >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( def->angularDamping ) && def->angularDamping >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( def->sleepThreshold ) && def->sleepThreshold >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( def->gravityScale ) );
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return b2_nullBodyId;
+	}
+
+	bool isAwake = ( def->isAwake || def->enableSleep == false ) && def->isEnabled;
+
+	// determine the solver set
+	int setId;
+	if ( def->isEnabled == false )
+	{
+		// any body type can be disabled
+		setId = b2_disabledSet;
+	}
+	else if ( def->type == b2_staticBody )
+	{
+		setId = b2_staticSet;
+	}
+	else if ( isAwake == true )
+	{
+		setId = b2_awakeSet;
+	}
+	else
+	{
+		// new set for a sleeping body in its own island
+		setId = b2AllocId( &world->solverSetIdPool );
+		if ( setId == world->solverSets.count )
+		{
+			// Create a zero initialized solver set. All sub-arrays are also zero initialized.
+			b2SolverSetArray_Push( &world->solverSets, (b2SolverSet){ 0 } );
+		}
+		else
+		{
+			B2_ASSERT( world->solverSets.data[setId].setIndex == B2_NULL_INDEX );
+		}
+
+		world->solverSets.data[setId].setIndex = setId;
+	}
+
+	B2_ASSERT( 0 <= setId && setId < world->solverSets.count );
+
+	int bodyId = b2AllocId( &world->bodyIdPool );
+
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setId );
+	b2BodySim* bodySim = b2BodySimArray_Add( &set->bodySims );
+	*bodySim = (b2BodySim){ 0 };
+	bodySim->transform.p = def->position;
+	bodySim->transform.q = def->rotation;
+	bodySim->center = def->position;
+	bodySim->rotation0 = bodySim->transform.q;
+	bodySim->center0 = bodySim->center;
+	bodySim->minExtent = B2_HUGE;
+	bodySim->maxExtent = 0.0f;
+	bodySim->linearDamping = def->linearDamping;
+	bodySim->angularDamping = def->angularDamping;
+	bodySim->gravityScale = def->gravityScale;
+	bodySim->bodyId = bodyId;
+	bodySim->isBullet = def->isBullet;
+	bodySim->allowFastRotation = def->allowFastRotation;
+
+	if ( setId == b2_awakeSet )
+	{
+		b2BodyState* bodyState = b2BodyStateArray_Add( &set->bodyStates );
+		B2_ASSERT( ( (uintptr_t)bodyState & 0x1F ) == 0 );
+
+		*bodyState = (b2BodyState){ 0 };
+		bodyState->linearVelocity = def->linearVelocity;
+		bodyState->angularVelocity = def->angularVelocity;
+		bodyState->deltaRotation = b2Rot_identity;
+	}
+
+	if ( bodyId == world->bodies.count )
+	{
+		b2BodyArray_Push( &world->bodies, (b2Body){ 0 } );
+	}
+	else
+	{
+		B2_ASSERT( world->bodies.data[bodyId].id == B2_NULL_INDEX );
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, bodyId );
+
+	if ( def->name )
+	{
+		for ( int i = 0; i < 31; ++i )
+		{
+			body->name[i] = def->name[i];
+		}
+
+		body->name[31] = 0;
+	}
+	else
+	{
+		memset( body->name, 0, 32 * sizeof( char ) );
+	}
+
+	body->userData = def->userData;
+	body->setIndex = setId;
+	body->localIndex = set->bodySims.count - 1;
+	body->generation += 1;
+	body->headShapeId = B2_NULL_INDEX;
+	body->shapeCount = 0;
+	body->headChainId = B2_NULL_INDEX;
+	body->headContactKey = B2_NULL_INDEX;
+	body->contactCount = 0;
+	body->headJointKey = B2_NULL_INDEX;
+	body->jointCount = 0;
+	body->islandId = B2_NULL_INDEX;
+	body->islandPrev = B2_NULL_INDEX;
+	body->islandNext = B2_NULL_INDEX;
+	body->bodyMoveIndex = B2_NULL_INDEX;
+	body->id = bodyId;
+	body->mass = 0.0f;
+	body->inertia = 0.0f;
+	body->sleepThreshold = def->sleepThreshold;
+	body->sleepTime = 0.0f;
+	body->type = def->type;
+	body->enableSleep = def->enableSleep;
+	body->fixedRotation = def->fixedRotation;
+	body->isSpeedCapped = false;
+	body->isMarked = false;
+
+	// dynamic and kinematic bodies that are enabled need a island
+	if ( setId >= b2_awakeSet )
+	{
+		b2CreateIslandForBody( world, setId, body );
+	}
+
+	b2ValidateSolverSets( world );
+
+	b2BodyId id = { bodyId + 1, world->worldId, body->generation };
+	return id;
+}
+
+bool b2WakeBody( b2World* world, b2Body* body )
+{
+	if ( body->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeSolverSet( world, body->setIndex );
+		return true;
+	}
+
+	return false;
+}
+
+void b2DestroyBody( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	// Wake bodies attached to this body, even if this body is static.
+	bool wakeBodies = true;
+
+	// Destroy the attached joints
+	int edgeKey = body->headJointKey;
+	while ( edgeKey != B2_NULL_INDEX )
+	{
+		int jointId = edgeKey >> 1;
+		int edgeIndex = edgeKey & 1;
+
+		b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+		edgeKey = joint->edges[edgeIndex].nextKey;
+
+		// Careful because this modifies the list being traversed
+		b2DestroyJointInternal( world, joint, wakeBodies );
+	}
+
+	// Destroy all contacts attached to this body.
+	b2DestroyBodyContacts( world, body, wakeBodies );
+
+	// Destroy the attached shapes and their broad-phase proxies.
+	int shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+		if ( shape->sensorIndex != B2_NULL_INDEX )
+		{
+			b2DestroySensor( world, shape );
+		}
+
+		b2DestroyShapeProxy( shape, &world->broadPhase );
+
+		// Return shape to free list.
+		b2FreeId( &world->shapeIdPool, shapeId );
+		shape->id = B2_NULL_INDEX;
+
+		shapeId = shape->nextShapeId;
+	}
+
+	// Destroy the attached chains. The associated shapes have already been destroyed above.
+	int chainId = body->headChainId;
+	while ( chainId != B2_NULL_INDEX )
+	{
+		b2ChainShape* chain = b2ChainShapeArray_Get( &world->chainShapes, chainId );
+
+		b2FreeChainData( chain );
+
+		// Return chain to free list.
+		b2FreeId( &world->chainIdPool, chainId );
+		chain->id = B2_NULL_INDEX;
+
+		chainId = chain->nextChainId;
+	}
+
+	b2RemoveBodyFromIsland( world, body );
+
+	// Remove body sim from solver set that owns it
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, body->setIndex );
+	int movedIndex = b2BodySimArray_RemoveSwap( &set->bodySims, body->localIndex );
+	if ( movedIndex != B2_NULL_INDEX )
+	{
+		// Fix moved body index
+		b2BodySim* movedSim = set->bodySims.data + body->localIndex;
+		int movedId = movedSim->bodyId;
+		b2Body* movedBody = b2BodyArray_Get( &world->bodies, movedId );
+		B2_ASSERT( movedBody->localIndex == movedIndex );
+		movedBody->localIndex = body->localIndex;
+	}
+
+	// Remove body state from awake set
+	if ( body->setIndex == b2_awakeSet )
+	{
+		int result = b2BodyStateArray_RemoveSwap( &set->bodyStates, body->localIndex );
+		B2_ASSERT( result == movedIndex );
+		B2_UNUSED( result );
+	}
+	else if ( set->setIndex >= b2_firstSleepingSet && set->bodySims.count == 0 )
+	{
+		// Remove solver set if it's now an orphan.
+		b2DestroySolverSet( world, set->setIndex );
+	}
+
+	// Free body and id (preserve body generation)
+	b2FreeId( &world->bodyIdPool, body->id );
+
+	body->setIndex = B2_NULL_INDEX;
+	body->localIndex = B2_NULL_INDEX;
+	body->id = B2_NULL_INDEX;
+
+	b2ValidateSolverSets( world );
+}
+
+int b2Body_GetContactCapacity( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	// Conservative and fast
+	return body->contactCount;
+}
+
+int b2Body_GetContactData( b2BodyId bodyId, b2ContactData* contactData, int capacity )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	int contactKey = body->headContactKey;
+	int index = 0;
+	while ( contactKey != B2_NULL_INDEX && index < capacity )
+	{
+		int contactId = contactKey >> 1;
+		int edgeIndex = contactKey & 1;
+
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+
+		// Is contact touching?
+		if ( contact->flags & b2_contactTouchingFlag )
+		{
+			b2Shape* shapeA = b2ShapeArray_Get( &world->shapes, contact->shapeIdA );
+			b2Shape* shapeB = b2ShapeArray_Get( &world->shapes, contact->shapeIdB );
+
+			contactData[index].shapeIdA = (b2ShapeId){ shapeA->id + 1, bodyId.world0, shapeA->generation };
+			contactData[index].shapeIdB = (b2ShapeId){ shapeB->id + 1, bodyId.world0, shapeB->generation };
+
+			b2ContactSim* contactSim = b2GetContactSim( world, contact );
+			contactData[index].manifold = contactSim->manifold;
+
+			index += 1;
+		}
+
+		contactKey = contact->edges[edgeIndex].nextKey;
+	}
+
+	B2_ASSERT( index <= capacity );
+
+	return index;
+}
+
+b2AABB b2Body_ComputeAABB( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return (b2AABB){ 0 };
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	if ( body->headShapeId == B2_NULL_INDEX )
+	{
+		b2Transform transform = b2GetBodyTransform( world, body->id );
+		return (b2AABB){ transform.p, transform.p };
+	}
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, body->headShapeId );
+	b2AABB aabb = shape->aabb;
+	while ( shape->nextShapeId != B2_NULL_INDEX )
+	{
+		shape = b2ShapeArray_Get( &world->shapes, shape->nextShapeId );
+		aabb = b2AABB_Union( aabb, shape->aabb );
+	}
+
+	return aabb;
+}
+
+void b2UpdateBodyMassData( b2World* world, b2Body* body )
+{
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+
+	// Compute mass data from shapes. Each shape has its own density.
+	body->mass = 0.0f;
+	body->inertia = 0.0f;
+
+	bodySim->invMass = 0.0f;
+	bodySim->invInertia = 0.0f;
+	bodySim->localCenter = b2Vec2_zero;
+	bodySim->minExtent = B2_HUGE;
+	bodySim->maxExtent = 0.0f;
+
+	// Static and kinematic sims have zero mass.
+	if ( body->type != b2_dynamicBody )
+	{
+		bodySim->center = bodySim->transform.p;
+
+		// Need extents for kinematic bodies for sleeping to work correctly.
+		if ( body->type == b2_kinematicBody )
+		{
+			int shapeId = body->headShapeId;
+			while ( shapeId != B2_NULL_INDEX )
+			{
+				const b2Shape* s = b2ShapeArray_Get( &world->shapes, shapeId );
+
+				b2ShapeExtent extent = b2ComputeShapeExtent( s, b2Vec2_zero );
+				bodySim->minExtent = b2MinFloat( bodySim->minExtent, extent.minExtent );
+				bodySim->maxExtent = b2MaxFloat( bodySim->maxExtent, extent.maxExtent );
+
+				shapeId = s->nextShapeId;
+			}
+		}
+
+		return;
+	}
+
+	// Accumulate mass over all shapes.
+	b2Vec2 localCenter = b2Vec2_zero;
+	int shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		const b2Shape* s = b2ShapeArray_Get( &world->shapes, shapeId );
+		shapeId = s->nextShapeId;
+
+		if ( s->density == 0.0f )
+		{
+			continue;
+		}
+
+		b2MassData massData = b2ComputeShapeMass( s );
+		body->mass += massData.mass;
+		localCenter = b2MulAdd( localCenter, massData.mass, massData.center );
+		body->inertia += massData.rotationalInertia;
+	}
+
+	// Compute center of mass.
+	if ( body->mass > 0.0f )
+	{
+		bodySim->invMass = 1.0f / body->mass;
+		localCenter = b2MulSV( bodySim->invMass, localCenter );
+	}
+
+	if ( body->inertia > 0.0f && body->fixedRotation == false )
+	{
+		// Center the inertia about the center of mass.
+		body->inertia -= body->mass * b2Dot( localCenter, localCenter );
+		B2_ASSERT( body->inertia > 0.0f );
+		bodySim->invInertia = 1.0f / body->inertia;
+	}
+	else
+	{
+		body->inertia = 0.0f;
+		bodySim->invInertia = 0.0f;
+	}
+
+	// Move center of mass.
+	b2Vec2 oldCenter = bodySim->center;
+	bodySim->localCenter = localCenter;
+	bodySim->center = b2TransformPoint( bodySim->transform, bodySim->localCenter );
+	bodySim->center0 = bodySim->center;
+
+	// Update center of mass velocity
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state != NULL )
+	{
+		b2Vec2 deltaLinear = b2CrossSV( state->angularVelocity, b2Sub( bodySim->center, oldCenter ) );
+		state->linearVelocity = b2Add( state->linearVelocity, deltaLinear );
+	}
+
+	// Compute body extents relative to center of mass
+	shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		const b2Shape* s = b2ShapeArray_Get( &world->shapes, shapeId );
+
+		b2ShapeExtent extent = b2ComputeShapeExtent( s, localCenter );
+		bodySim->minExtent = b2MinFloat( bodySim->minExtent, extent.minExtent );
+		bodySim->maxExtent = b2MaxFloat( bodySim->maxExtent, extent.maxExtent );
+
+		shapeId = s->nextShapeId;
+	}
+}
+
+b2Vec2 b2Body_GetPosition( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	return transform.p;
+}
+
+b2Rot b2Body_GetRotation( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	return transform.q;
+}
+
+b2Transform b2Body_GetTransform( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return b2GetBodyTransformQuick( world, body );
+}
+
+b2Vec2 b2Body_GetLocalPoint( b2BodyId bodyId, b2Vec2 worldPoint )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	return b2InvTransformPoint( transform, worldPoint );
+}
+
+b2Vec2 b2Body_GetWorldPoint( b2BodyId bodyId, b2Vec2 localPoint )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	return b2TransformPoint( transform, localPoint );
+}
+
+b2Vec2 b2Body_GetLocalVector( b2BodyId bodyId, b2Vec2 worldVector )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	return b2InvRotateVector( transform.q, worldVector );
+}
+
+b2Vec2 b2Body_GetWorldVector( b2BodyId bodyId, b2Vec2 localVector )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	return b2RotateVector( transform.q, localVector );
+}
+
+void b2Body_SetTransform( b2BodyId bodyId, b2Vec2 position, b2Rot rotation )
+{
+	B2_ASSERT( b2IsValidVec2( position ) );
+	B2_ASSERT( b2IsValidRotation( rotation ) );
+	B2_ASSERT( b2Body_IsValid( bodyId ) );
+	b2World* world = b2GetWorld( bodyId.world0 );
+	B2_ASSERT( world->locked == false );
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+
+	bodySim->transform.p = position;
+	bodySim->transform.q = rotation;
+	bodySim->center = b2TransformPoint( bodySim->transform, bodySim->localCenter );
+
+	bodySim->rotation0 = bodySim->transform.q;
+	bodySim->center0 = bodySim->center;
+
+	b2BroadPhase* broadPhase = &world->broadPhase;
+
+	b2Transform transform = bodySim->transform;
+	const float margin = B2_AABB_MARGIN;
+	const float speculativeDistance = B2_SPECULATIVE_DISTANCE;
+
+	int shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		b2AABB aabb = b2ComputeShapeAABB( shape, transform );
+		aabb.lowerBound.x -= speculativeDistance;
+		aabb.lowerBound.y -= speculativeDistance;
+		aabb.upperBound.x += speculativeDistance;
+		aabb.upperBound.y += speculativeDistance;
+		shape->aabb = aabb;
+
+		if ( b2AABB_Contains( shape->fatAABB, aabb ) == false )
+		{
+			b2AABB fatAABB;
+			fatAABB.lowerBound.x = aabb.lowerBound.x - margin;
+			fatAABB.lowerBound.y = aabb.lowerBound.y - margin;
+			fatAABB.upperBound.x = aabb.upperBound.x + margin;
+			fatAABB.upperBound.y = aabb.upperBound.y + margin;
+			shape->fatAABB = fatAABB;
+
+			// They body could be disabled
+			if ( shape->proxyKey != B2_NULL_INDEX )
+			{
+				b2BroadPhase_MoveProxy( broadPhase, shape->proxyKey, fatAABB );
+			}
+		}
+
+		shapeId = shape->nextShapeId;
+	}
+}
+
+b2Vec2 b2Body_GetLinearVelocity( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state != NULL )
+	{
+		return state->linearVelocity;
+	}
+	return b2Vec2_zero;
+}
+
+float b2Body_GetAngularVelocity( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state != NULL )
+	{
+		return state->angularVelocity;
+	}
+	return 0.0;
+}
+
+void b2Body_SetLinearVelocity( b2BodyId bodyId, b2Vec2 linearVelocity )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( body->type == b2_staticBody )
+	{
+		return;
+	}
+
+	if ( b2LengthSquared( linearVelocity ) > 0.0f )
+	{
+		b2WakeBody( world, body );
+	}
+
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state == NULL )
+	{
+		return;
+	}
+
+	state->linearVelocity = linearVelocity;
+}
+
+void b2Body_SetAngularVelocity( b2BodyId bodyId, float angularVelocity )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( body->type == b2_staticBody || body->fixedRotation )
+	{
+		return;
+	}
+
+	if ( angularVelocity != 0.0f )
+	{
+		b2WakeBody( world, body );
+	}
+
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state == NULL )
+	{
+		return;
+	}
+
+	state->angularVelocity = angularVelocity;
+}
+
+void b2Body_SetTargetTransform( b2BodyId bodyId, b2Transform target, float timeStep )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( body->type == b2_staticBody || timeStep <= 0.0f )
+	{
+		return;
+	}
+
+	b2BodySim* sim = b2GetBodySim( world, body );
+
+	// Compute linear velocity
+	b2Vec2 center1 = sim->center;
+	b2Vec2 center2 = b2TransformPoint( target, sim->localCenter );
+	float invTimeStep = 1.0f / timeStep;
+	b2Vec2 linearVelocity = b2MulSV( invTimeStep, b2Sub( center2, center1 ) );
+
+	// Compute angular velocity
+	float angularVelocity = 0.0f;
+	if ( body->fixedRotation == false )
+	{
+		b2Rot q1 = sim->transform.q;
+		b2Rot q2 = target.q;
+		float deltaAngle = b2RelativeAngle( q2, q1 );
+		angularVelocity = invTimeStep * deltaAngle;
+	}
+
+	// Return if velocity would be zero
+	if ( b2LengthSquared( linearVelocity ) == 0.0f && b2AbsFloat( angularVelocity ) == 0.0f )
+	{
+		return;
+	}
+
+	// Must wake for state to exist
+	b2WakeBody( world, body );
+
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state == NULL )
+	{
+		return;
+	}
+
+	state->linearVelocity = linearVelocity;
+	state->angularVelocity = angularVelocity;
+}
+
+b2Vec2 b2Body_GetLocalPointVelocity( b2BodyId bodyId, b2Vec2 localPoint )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state == NULL )
+	{
+		return b2Vec2_zero;
+	}
+
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, body->setIndex );
+	b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, body->localIndex );
+
+	b2Vec2 r = b2RotateVector( bodySim->transform.q, b2Sub( localPoint, bodySim->localCenter ) );
+	b2Vec2 v = b2Add( state->linearVelocity, b2CrossSV( state->angularVelocity, r ) );
+	return v;
+}
+
+b2Vec2 b2Body_GetWorldPointVelocity( b2BodyId bodyId, b2Vec2 worldPoint )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodyState* state = b2GetBodyState( world, body );
+	if ( state == NULL )
+	{
+		return b2Vec2_zero;
+	}
+
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, body->setIndex );
+	b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, body->localIndex );
+
+	b2Vec2 r = b2Sub( worldPoint, bodySim->center );
+	b2Vec2 v = b2Add( state->linearVelocity, b2CrossSV( state->angularVelocity, r ) );
+	return v;
+}
+
+void b2Body_ApplyForce( b2BodyId bodyId, b2Vec2 force, b2Vec2 point, bool wake )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( wake && body->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeBody( world, body );
+	}
+
+	if ( body->setIndex == b2_awakeSet )
+	{
+		b2BodySim* bodySim = b2GetBodySim( world, body );
+		bodySim->force = b2Add( bodySim->force, force );
+		bodySim->torque += b2Cross( b2Sub( point, bodySim->center ), force );
+	}
+}
+
+void b2Body_ApplyForceToCenter( b2BodyId bodyId, b2Vec2 force, bool wake )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( wake && body->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeBody( world, body );
+	}
+
+	if ( body->setIndex == b2_awakeSet )
+	{
+		b2BodySim* bodySim = b2GetBodySim( world, body );
+		bodySim->force = b2Add( bodySim->force, force );
+	}
+}
+
+void b2Body_ApplyTorque( b2BodyId bodyId, float torque, bool wake )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( wake && body->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeBody( world, body );
+	}
+
+	if ( body->setIndex == b2_awakeSet )
+	{
+		b2BodySim* bodySim = b2GetBodySim( world, body );
+		bodySim->torque += torque;
+	}
+}
+
+void b2Body_ApplyLinearImpulse( b2BodyId bodyId, b2Vec2 impulse, b2Vec2 point, bool wake )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( wake && body->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeBody( world, body );
+	}
+
+	if ( body->setIndex == b2_awakeSet )
+	{
+		int localIndex = body->localIndex;
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+		b2BodyState* state = b2BodyStateArray_Get( &set->bodyStates, localIndex );
+		b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, localIndex );
+		state->linearVelocity = b2MulAdd( state->linearVelocity, bodySim->invMass, impulse );
+		state->angularVelocity += bodySim->invInertia * b2Cross( b2Sub( point, bodySim->center ), impulse );
+	}
+}
+
+void b2Body_ApplyLinearImpulseToCenter( b2BodyId bodyId, b2Vec2 impulse, bool wake )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( wake && body->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeBody( world, body );
+	}
+
+	if ( body->setIndex == b2_awakeSet )
+	{
+		int localIndex = body->localIndex;
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+		b2BodyState* state = b2BodyStateArray_Get( &set->bodyStates, localIndex );
+		b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, localIndex );
+		state->linearVelocity = b2MulAdd( state->linearVelocity, bodySim->invMass, impulse );
+	}
+}
+
+void b2Body_ApplyAngularImpulse( b2BodyId bodyId, float impulse, bool wake )
+{
+	B2_ASSERT( b2Body_IsValid( bodyId ) );
+	b2World* world = b2GetWorld( bodyId.world0 );
+
+	int id = bodyId.index1 - 1;
+	b2Body* body = b2BodyArray_Get( &world->bodies, id );
+	B2_ASSERT( body->generation == bodyId.generation );
+
+	if ( wake && body->setIndex >= b2_firstSleepingSet )
+	{
+		// this will not invalidate body pointer
+		b2WakeBody( world, body );
+	}
+
+	if ( body->setIndex == b2_awakeSet )
+	{
+		int localIndex = body->localIndex;
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+		b2BodyState* state = b2BodyStateArray_Get( &set->bodyStates, localIndex );
+		b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, localIndex );
+		state->angularVelocity += bodySim->invInertia * impulse;
+	}
+}
+
+b2BodyType b2Body_GetType( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->type;
+}
+
+// Changing the body type is quite complex mainly due to joints.
+// Considerations:
+// - body and joints must be moved to the correct set
+// - islands must be updated
+// - graph coloring must be correct
+// - any body connected to a joint may be disabled
+// - joints between static bodies must go into the static set
+void b2Body_SetType( b2BodyId bodyId, b2BodyType type )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	b2BodyType originalType = body->type;
+	if ( originalType == type )
+	{
+		return;
+	}
+
+	if ( body->setIndex == b2_disabledSet )
+	{
+		// Disabled bodies don't change solver sets or islands when they change type.
+		body->type = type;
+
+		// Body type affects the mass
+		b2UpdateBodyMassData( world, body );
+		return;
+	}
+
+	// Destroy all contacts but don't wake bodies.
+	bool wakeBodies = false;
+	b2DestroyBodyContacts( world, body, wakeBodies );
+
+	// Wake this body because we assume below that it is awake or static.
+	b2WakeBody( world, body );
+
+	// Unlink all joints and wake attached bodies.
+	{
+		int jointKey = body->headJointKey;
+		while ( jointKey != B2_NULL_INDEX )
+		{
+			int jointId = jointKey >> 1;
+			int edgeIndex = jointKey & 1;
+
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+			if ( joint->islandId != B2_NULL_INDEX )
+			{
+				b2UnlinkJoint( world, joint );
+			}
+
+			// A body going from static to dynamic or kinematic goes to the awake set
+			// and other attached bodies must be awake as well. For consistency, this is
+			// done for all cases.
+			b2Body* bodyA = b2BodyArray_Get( &world->bodies, joint->edges[0].bodyId );
+			b2Body* bodyB = b2BodyArray_Get( &world->bodies, joint->edges[1].bodyId );
+			b2WakeBody( world, bodyA );
+			b2WakeBody( world, bodyB );
+
+			jointKey = joint->edges[edgeIndex].nextKey;
+		}
+	}
+
+	body->type = type;
+
+	if ( originalType == b2_staticBody )
+	{
+		// Body is going from static to dynamic or kinematic. It only makes sense to move it to the awake set.
+		B2_ASSERT( body->setIndex == b2_staticSet );
+
+		b2SolverSet* staticSet = b2SolverSetArray_Get( &world->solverSets, b2_staticSet );
+		b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+
+		// Transfer body to awake set
+		b2TransferBody( world, awakeSet, staticSet, body );
+
+		// Create island for body
+		b2CreateIslandForBody( world, b2_awakeSet, body );
+
+		// Transfer static joints to awake set
+		int jointKey = body->headJointKey;
+		while ( jointKey != B2_NULL_INDEX )
+		{
+			int jointId = jointKey >> 1;
+			int edgeIndex = jointKey & 1;
+
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+
+			// Transfer the joint if it is in the static set
+			if ( joint->setIndex == b2_staticSet )
+			{
+				b2TransferJoint( world, awakeSet, staticSet, joint );
+			}
+			else if ( joint->setIndex == b2_awakeSet )
+			{
+				// In this case the joint must be re-inserted into the constraint graph to ensure the correct
+				// graph color.
+
+				// First transfer to the static set.
+				b2TransferJoint( world, staticSet, awakeSet, joint );
+
+				// Now transfer it back to the awake set and into the graph coloring.
+				b2TransferJoint( world, awakeSet, staticSet, joint );
+			}
+			else
+			{
+				// Otherwise the joint must be disabled.
+				B2_ASSERT( joint->setIndex == b2_disabledSet );
+			}
+
+			jointKey = joint->edges[edgeIndex].nextKey;
+		}
+
+		// Recreate shape proxies in movable tree.
+		b2Transform transform = b2GetBodyTransformQuick( world, body );
+		int shapeId = body->headShapeId;
+		while ( shapeId != B2_NULL_INDEX )
+		{
+			b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+			shapeId = shape->nextShapeId;
+			b2DestroyShapeProxy( shape, &world->broadPhase );
+			bool forcePairCreation = true;
+			b2BodyType proxyType = type;
+			b2CreateShapeProxy( shape, &world->broadPhase, proxyType, transform, forcePairCreation );
+		}
+	}
+	else if ( type == b2_staticBody )
+	{
+		// The body is going from dynamic/kinematic to static. It should be awake.
+		B2_ASSERT( body->setIndex == b2_awakeSet );
+
+		b2SolverSet* staticSet = b2SolverSetArray_Get( &world->solverSets, b2_staticSet );
+		b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+
+		// Transfer body to static set
+		b2TransferBody( world, staticSet, awakeSet, body );
+
+		// Remove body from island.
+		b2RemoveBodyFromIsland( world, body );
+
+		b2BodySim* bodySim = b2BodySimArray_Get( &staticSet->bodySims, body->localIndex );
+		bodySim->isFast = false;
+
+		// Maybe transfer joints to static set.
+		int jointKey = body->headJointKey;
+		while ( jointKey != B2_NULL_INDEX )
+		{
+			int jointId = jointKey >> 1;
+			int edgeIndex = jointKey & 1;
+
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+			jointKey = joint->edges[edgeIndex].nextKey;
+
+			int otherEdgeIndex = edgeIndex ^ 1;
+			b2Body* otherBody = b2BodyArray_Get( &world->bodies, joint->edges[otherEdgeIndex].bodyId );
+
+			// Skip disabled joint
+			if ( joint->setIndex == b2_disabledSet )
+			{
+				// Joint is disable, should be connected to a disabled body
+				B2_ASSERT( otherBody->setIndex == b2_disabledSet );
+				continue;
+			}
+
+			// Since the body was not static, the joint must be awake.
+			B2_ASSERT( joint->setIndex == b2_awakeSet );
+
+			// Only transfer joint to static set if both bodies are static.
+			if ( otherBody->setIndex == b2_staticSet )
+			{
+				b2TransferJoint( world, staticSet, awakeSet, joint );
+			}
+			else
+			{
+				// The other body must be awake.
+				B2_ASSERT( otherBody->setIndex == b2_awakeSet );
+
+				// The joint must live in a graph color.
+				B2_ASSERT( 0 <= joint->colorIndex && joint->colorIndex < B2_GRAPH_COLOR_COUNT );
+
+				// In this case the joint must be re-inserted into the constraint graph to ensure the correct
+				// graph color.
+
+				// First transfer to the static set.
+				b2TransferJoint( world, staticSet, awakeSet, joint );
+
+				// Now transfer it back to the awake set and into the graph coloring.
+				b2TransferJoint( world, awakeSet, staticSet, joint );
+			}
+		}
+
+		// Recreate shape proxies in static tree.
+		b2Transform transform = b2GetBodyTransformQuick( world, body );
+		int shapeId = body->headShapeId;
+		while ( shapeId != B2_NULL_INDEX )
+		{
+			b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+			shapeId = shape->nextShapeId;
+			b2DestroyShapeProxy( shape, &world->broadPhase );
+			bool forcePairCreation = true;
+			b2CreateShapeProxy( shape, &world->broadPhase, b2_staticBody, transform, forcePairCreation );
+		}
+	}
+	else
+	{
+		B2_ASSERT( originalType == b2_dynamicBody || originalType == b2_kinematicBody );
+		B2_ASSERT( type == b2_dynamicBody || type == b2_kinematicBody );
+
+		// Recreate shape proxies in static tree.
+		b2Transform transform = b2GetBodyTransformQuick( world, body );
+		int shapeId = body->headShapeId;
+		while ( shapeId != B2_NULL_INDEX )
+		{
+			b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+			shapeId = shape->nextShapeId;
+			b2DestroyShapeProxy( shape, &world->broadPhase );
+			b2BodyType proxyType = type;
+			bool forcePairCreation = true;
+			b2CreateShapeProxy( shape, &world->broadPhase, proxyType, transform, forcePairCreation );
+		}
+	}
+
+	// Relink all joints
+	{
+		int jointKey = body->headJointKey;
+		while ( jointKey != B2_NULL_INDEX )
+		{
+			int jointId = jointKey >> 1;
+			int edgeIndex = jointKey & 1;
+
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+			jointKey = joint->edges[edgeIndex].nextKey;
+
+			int otherEdgeIndex = edgeIndex ^ 1;
+			int otherBodyId = joint->edges[otherEdgeIndex].bodyId;
+			b2Body* otherBody = b2BodyArray_Get( &world->bodies, otherBodyId );
+
+			if ( otherBody->setIndex == b2_disabledSet )
+			{
+				continue;
+			}
+
+			if ( body->type == b2_staticBody && otherBody->type == b2_staticBody )
+			{
+				continue;
+			}
+
+			b2LinkJoint( world, joint, false );
+		}
+
+		b2MergeAwakeIslands( world );
+	}
+
+	// Body type affects the mass
+	b2UpdateBodyMassData( world, body );
+
+	b2ValidateSolverSets( world );
+}
+
+void b2Body_SetName( b2BodyId bodyId, const char* name )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( name != NULL )
+	{
+		for ( int i = 0; i < 31; ++i )
+		{
+			body->name[i] = name[i];
+		}
+
+		body->name[31] = 0;
+	}
+	else
+	{
+		memset( body->name, 0, 32 * sizeof( char ) );
+	}
+}
+
+const char* b2Body_GetName( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->name;
+}
+
+void b2Body_SetUserData( b2BodyId bodyId, void* userData )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	body->userData = userData;
+}
+
+void* b2Body_GetUserData( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->userData;
+}
+
+float b2Body_GetMass( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->mass;
+}
+
+float b2Body_GetRotationalInertia( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->inertia;
+}
+
+b2Vec2 b2Body_GetLocalCenterOfMass( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	return bodySim->localCenter;
+}
+
+b2Vec2 b2Body_GetWorldCenterOfMass( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	return bodySim->center;
+}
+
+void b2Body_SetMassData( b2BodyId bodyId, b2MassData massData )
+{
+	B2_ASSERT( b2IsValidFloat( massData.mass ) && massData.mass >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( massData.rotationalInertia ) && massData.rotationalInertia >= 0.0f );
+	B2_ASSERT( b2IsValidVec2( massData.center ) );
+
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+
+	body->mass = massData.mass;
+	body->inertia = massData.rotationalInertia;
+	bodySim->localCenter = massData.center;
+
+	b2Vec2 center = b2TransformPoint( bodySim->transform, massData.center );
+	bodySim->center = center;
+	bodySim->center0 = center;
+
+	bodySim->invMass = body->mass > 0.0f ? 1.0f / body->mass : 0.0f;
+	bodySim->invInertia = body->inertia > 0.0f ? 1.0f / body->inertia : 0.0f;
+}
+
+b2MassData b2Body_GetMassData( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	b2MassData massData = { body->mass, bodySim->localCenter, body->inertia };
+	return massData;
+}
+
+void b2Body_ApplyMassFromShapes( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2UpdateBodyMassData( world, body );
+}
+
+void b2Body_SetLinearDamping( b2BodyId bodyId, float linearDamping )
+{
+	B2_ASSERT( b2IsValidFloat( linearDamping ) && linearDamping >= 0.0f );
+
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	bodySim->linearDamping = linearDamping;
+}
+
+float b2Body_GetLinearDamping( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	return bodySim->linearDamping;
+}
+
+void b2Body_SetAngularDamping( b2BodyId bodyId, float angularDamping )
+{
+	B2_ASSERT( b2IsValidFloat( angularDamping ) && angularDamping >= 0.0f );
+
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	bodySim->angularDamping = angularDamping;
+}
+
+float b2Body_GetAngularDamping( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	return bodySim->angularDamping;
+}
+
+void b2Body_SetGravityScale( b2BodyId bodyId, float gravityScale )
+{
+	B2_ASSERT( b2Body_IsValid( bodyId ) );
+	B2_ASSERT( b2IsValidFloat( gravityScale ) );
+
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	bodySim->gravityScale = gravityScale;
+}
+
+float b2Body_GetGravityScale( b2BodyId bodyId )
+{
+	B2_ASSERT( b2Body_IsValid( bodyId ) );
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	return bodySim->gravityScale;
+}
+
+bool b2Body_IsAwake( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->setIndex == b2_awakeSet;
+}
+
+void b2Body_SetAwake( b2BodyId bodyId, bool awake )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+
+	if ( awake && body->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeBody( world, body );
+	}
+	else if ( awake == false && body->setIndex == b2_awakeSet )
+	{
+		b2Island* island = b2IslandArray_Get( &world->islands, body->islandId );
+		if ( island->constraintRemoveCount > 0 )
+		{
+			// Must split the island before sleeping. This is expensive.
+			b2SplitIsland( world, body->islandId );
+		}
+
+		b2TrySleepIsland( world, body->islandId );
+	}
+}
+
+bool b2Body_IsEnabled( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->setIndex != b2_disabledSet;
+}
+
+bool b2Body_IsSleepEnabled( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->enableSleep;
+}
+
+void b2Body_SetSleepThreshold( b2BodyId bodyId, float sleepThreshold )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	body->sleepThreshold = sleepThreshold;
+}
+
+float b2Body_GetSleepThreshold( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->sleepThreshold;
+}
+
+void b2Body_EnableSleep( b2BodyId bodyId, bool enableSleep )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	body->enableSleep = enableSleep;
+
+	if ( enableSleep == false )
+	{
+		b2WakeBody( world, body );
+	}
+}
+
+// Disabling a body requires a lot of detailed bookkeeping, but it is a valuable feature.
+// The most challenging aspect that joints may connect to bodies that are not disabled.
+void b2Body_Disable( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	if ( body->setIndex == b2_disabledSet )
+	{
+		return;
+	}
+
+	// Destroy contacts and wake bodies touching this body. This avoid floating bodies.
+	// This is necessary even for static bodies.
+	bool wakeBodies = true;
+	b2DestroyBodyContacts( world, body, wakeBodies );
+
+	// Disabled bodies are not in an island.
+	b2RemoveBodyFromIsland( world, body );
+
+	// Remove shapes from broad-phase
+	int shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shapeId = shape->nextShapeId;
+		b2DestroyShapeProxy( shape, &world->broadPhase );
+	}
+
+	// Transfer simulation data to disabled set
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, body->setIndex );
+	b2SolverSet* disabledSet = b2SolverSetArray_Get( &world->solverSets, b2_disabledSet );
+
+	// Transfer body sim
+	b2TransferBody( world, disabledSet, set, body );
+
+	// Unlink joints and transfer
+	int jointKey = body->headJointKey;
+	while ( jointKey != B2_NULL_INDEX )
+	{
+		int jointId = jointKey >> 1;
+		int edgeIndex = jointKey & 1;
+
+		b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+		jointKey = joint->edges[edgeIndex].nextKey;
+
+		// joint may already be disabled by other body
+		if ( joint->setIndex == b2_disabledSet )
+		{
+			continue;
+		}
+
+		B2_ASSERT( joint->setIndex == set->setIndex || set->setIndex == b2_staticSet );
+
+		// Remove joint from island
+		if ( joint->islandId != B2_NULL_INDEX )
+		{
+			b2UnlinkJoint( world, joint );
+		}
+
+		// Transfer joint to disabled set
+		b2SolverSet* jointSet = b2SolverSetArray_Get( &world->solverSets, joint->setIndex );
+		b2TransferJoint( world, disabledSet, jointSet, joint );
+	}
+
+	b2ValidateConnectivity( world );
+	b2ValidateSolverSets( world );
+}
+
+void b2Body_Enable( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	if ( body->setIndex != b2_disabledSet )
+	{
+		return;
+	}
+
+	b2SolverSet* disabledSet = b2SolverSetArray_Get( &world->solverSets, b2_disabledSet );
+	int setId = body->type == b2_staticBody ? b2_staticSet : b2_awakeSet;
+	b2SolverSet* targetSet = b2SolverSetArray_Get( &world->solverSets, setId );
+
+	b2TransferBody( world, targetSet, disabledSet, body );
+
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	// Add shapes to broad-phase
+	b2BodyType proxyType = body->type;
+	bool forcePairCreation = true;
+	int shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shapeId = shape->nextShapeId;
+
+		b2CreateShapeProxy( shape, &world->broadPhase, proxyType, transform, forcePairCreation );
+	}
+
+	if ( setId != b2_staticSet )
+	{
+		b2CreateIslandForBody( world, setId, body );
+	}
+
+	// Transfer joints. If the other body is disabled, don't transfer.
+	// If the other body is sleeping, wake it.
+	bool mergeIslands = false;
+	int jointKey = body->headJointKey;
+	while ( jointKey != B2_NULL_INDEX )
+	{
+		int jointId = jointKey >> 1;
+		int edgeIndex = jointKey & 1;
+
+		b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+		B2_ASSERT( joint->setIndex == b2_disabledSet );
+		B2_ASSERT( joint->islandId == B2_NULL_INDEX );
+
+		jointKey = joint->edges[edgeIndex].nextKey;
+
+		b2Body* bodyA = b2BodyArray_Get( &world->bodies, joint->edges[0].bodyId );
+		b2Body* bodyB = b2BodyArray_Get( &world->bodies, joint->edges[1].bodyId );
+
+		if ( bodyA->setIndex == b2_disabledSet || bodyB->setIndex == b2_disabledSet )
+		{
+			// one body is still disabled
+			continue;
+		}
+
+		// Transfer joint first
+		int jointSetId;
+		if ( bodyA->setIndex == b2_staticSet && bodyB->setIndex == b2_staticSet )
+		{
+			jointSetId = b2_staticSet;
+		}
+		else if ( bodyA->setIndex == b2_staticSet )
+		{
+			jointSetId = bodyB->setIndex;
+		}
+		else
+		{
+			jointSetId = bodyA->setIndex;
+		}
+
+		b2SolverSet* jointSet = b2SolverSetArray_Get( &world->solverSets, jointSetId );
+		b2TransferJoint( world, jointSet, disabledSet, joint );
+
+		// Now that the joint is in the correct set, I can link the joint in the island.
+		if ( jointSetId != b2_staticSet )
+		{
+			b2LinkJoint( world, joint, mergeIslands );
+		}
+	}
+
+	// Now merge islands
+	b2MergeAwakeIslands( world );
+
+	b2ValidateSolverSets( world );
+}
+
+void b2Body_SetFixedRotation( b2BodyId bodyId, bool flag )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	if ( body->fixedRotation != flag )
+	{
+		body->fixedRotation = flag;
+
+		b2BodyState* state = b2GetBodyState( world, body );
+		if ( state != NULL )
+		{
+			state->angularVelocity = 0.0f;
+		}
+		b2UpdateBodyMassData( world, body );
+	}
+}
+
+bool b2Body_IsFixedRotation( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->fixedRotation;
+}
+
+void b2Body_SetBullet( b2BodyId bodyId, bool flag )
+{
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	bodySim->isBullet = flag;
+}
+
+bool b2Body_IsBullet( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	return bodySim->isBullet;
+}
+
+void b2Body_EnableContactEvents( b2BodyId bodyId, bool flag )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	int shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shape->enableContactEvents = flag;
+		shapeId = shape->nextShapeId;
+	}
+}
+
+void b2Body_EnableHitEvents( b2BodyId bodyId, bool flag )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	int shapeId = body->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shape->enableHitEvents = flag;
+		shapeId = shape->nextShapeId;
+	}
+}
+
+b2WorldId b2Body_GetWorld( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	return (b2WorldId){ bodyId.world0 + 1, world->generation };
+}
+
+int b2Body_GetShapeCount( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->shapeCount;
+}
+
+int b2Body_GetShapes( b2BodyId bodyId, b2ShapeId* shapeArray, int capacity )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	int shapeId = body->headShapeId;
+	int shapeCount = 0;
+	while ( shapeId != B2_NULL_INDEX && shapeCount < capacity )
+	{
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		b2ShapeId id = { shape->id + 1, bodyId.world0, shape->generation };
+		shapeArray[shapeCount] = id;
+		shapeCount += 1;
+
+		shapeId = shape->nextShapeId;
+	}
+
+	return shapeCount;
+}
+
+int b2Body_GetJointCount( b2BodyId bodyId )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	return body->jointCount;
+}
+
+int b2Body_GetJoints( b2BodyId bodyId, b2JointId* jointArray, int capacity )
+{
+	b2World* world = b2GetWorld( bodyId.world0 );
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	int jointKey = body->headJointKey;
+
+	int jointCount = 0;
+	while ( jointKey != B2_NULL_INDEX && jointCount < capacity )
+	{
+		int jointId = jointKey >> 1;
+		int edgeIndex = jointKey & 1;
+
+		b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+
+		b2JointId id = { jointId + 1, bodyId.world0, joint->generation };
+		jointArray[jointCount] = id;
+		jointCount += 1;
+
+		jointKey = joint->edges[edgeIndex].nextKey;
+	}
+
+	return jointCount;
+}
+
+bool b2ShouldBodiesCollide( b2World* world, b2Body* bodyA, b2Body* bodyB )
+{
+	if ( bodyA->type != b2_dynamicBody && bodyB->type != b2_dynamicBody )
+	{
+		return false;
+	}
+
+	int jointKey;
+	int otherBodyId;
+	if ( bodyA->jointCount < bodyB->jointCount )
+	{
+		jointKey = bodyA->headJointKey;
+		otherBodyId = bodyB->id;
+	}
+	else
+	{
+		jointKey = bodyB->headJointKey;
+		otherBodyId = bodyA->id;
+	}
+
+	while ( jointKey != B2_NULL_INDEX )
+	{
+		int jointId = jointKey >> 1;
+		int edgeIndex = jointKey & 1;
+		int otherEdgeIndex = edgeIndex ^ 1;
+
+		b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+		if ( joint->collideConnected == false && joint->edges[otherEdgeIndex].bodyId == otherBodyId )
+		{
+			return false;
+		}
+
+		jointKey = joint->edges[edgeIndex].nextKey;
+	}
+
+	return true;
+}
diff --git a/src/vendor/box2d/body.h b/src/vendor/box2d/body.h
new file mode 100644
index 0000000..7f27866
--- /dev/null
+++ b/src/vendor/box2d/body.h
@@ -0,0 +1,194 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+
+#include "box2d/math_functions.h"
+#include "box2d/types.h"
+
+typedef struct b2World b2World;
+
+// Body organizational details that are not used in the solver.
+typedef struct b2Body
+{
+	char name[32];
+
+	void* userData;
+
+	// index of solver set stored in b2World
+	// may be B2_NULL_INDEX
+	int setIndex;
+
+	// body sim and state index within set
+	// may be B2_NULL_INDEX
+	int localIndex;
+
+	// [31 : contactId | 1 : edgeIndex]
+	int headContactKey;
+	int contactCount;
+
+	// todo maybe move this to the body sim
+	int headShapeId;
+	int shapeCount;
+
+	int headChainId;
+
+	// [31 : jointId | 1 : edgeIndex]
+	int headJointKey;
+	int jointCount;
+
+	// All enabled dynamic and kinematic bodies are in an island.
+	int islandId;
+
+	// doubly-linked island list
+	int islandPrev;
+	int islandNext;
+
+	float mass;
+
+	// Rotational inertia about the center of mass.
+	float inertia;
+
+	float sleepThreshold;
+	float sleepTime;
+
+	// this is used to adjust the fellAsleep flag in the body move array
+	int bodyMoveIndex;
+
+	int id;
+
+	b2BodyType type;
+
+	// This is monotonically advanced when a body is allocated in this slot
+	// Used to check for invalid b2BodyId
+	uint16_t generation;
+
+	bool enableSleep;
+	bool fixedRotation;
+	bool isSpeedCapped;
+	bool isMarked;
+} b2Body;
+
+// Body State
+// The body state is designed for fast conversion to and from SIMD via scatter-gather.
+// Only awake dynamic and kinematic bodies have a body state.
+// This is used in the performance critical constraint solver
+//
+// The solver operates on the body state. The body state array does not hold static bodies. Static bodies are shared
+// across worker threads. It would be okay to read their states, but writing to them would cause cache thrashing across
+// workers, even if the values don't change.
+// This causes some trouble when computing anchors. I rotate joint anchors using the body rotation every sub-step. For static
+// bodies the anchor doesn't rotate. Body A or B could be static and this can lead to lots of branching. This branching
+// should be minimized.
+//
+// Solution 1:
+// Use delta rotations. This means anchors need to be prepared in world space. The delta rotation for static bodies will be
+// identity using a dummy state. Base separation and angles need to be computed. Manifolds will be behind a frame, but that
+// is probably best if bodies move fast.
+//
+// Solution 2:
+// Use full rotation. The anchors for static bodies will be in world space while the anchors for dynamic bodies will be in local
+// space. Potentially confusing and bug prone.
+//
+// Note:
+// I rotate joint anchors each sub-step but not contact anchors. Joint stability improves a lot by rotating joint anchors
+// according to substep progress. Contacts have reduced stability when anchors are rotated during substeps, especially for
+// round shapes.
+
+// 32 bytes
+typedef struct b2BodyState
+{
+	b2Vec2 linearVelocity; // 8
+	float angularVelocity; // 4
+	int flags;			   // 4
+
+	// Using delta position reduces round-off error far from the origin
+	b2Vec2 deltaPosition; // 8
+
+	// Using delta rotation because I cannot access the full rotation on static bodies in
+	// the solver and must use zero delta rotation for static bodies (c,s) = (1,0)
+	b2Rot deltaRotation; // 8
+} b2BodyState;
+
+// Identity body state, notice the deltaRotation is {1, 0}
+static const b2BodyState b2_identityBodyState = { { 0.0f, 0.0f }, 0.0f, 0, { 0.0f, 0.0f }, { 1.0f, 0.0f } };
+
+// Body simulation data used for integration of position and velocity
+// Transform data used for collision and solver preparation.
+typedef struct b2BodySim
+{
+	// todo better to have transform in sim or in base body? Try both!
+	// transform for body origin
+	b2Transform transform;
+
+	// center of mass position in world space
+	b2Vec2 center;
+
+	// previous rotation and COM for TOI
+	b2Rot rotation0;
+	b2Vec2 center0;
+
+	// location of center of mass relative to the body origin
+	b2Vec2 localCenter;
+
+	b2Vec2 force;
+	float torque;
+
+	// inverse inertia
+	float invMass;
+	float invInertia;
+
+	float minExtent;
+	float maxExtent;
+	float linearDamping;
+	float angularDamping;
+	float gravityScale;
+
+	// body data can be moved around, the id is stable (used in b2BodyId)
+	int bodyId;
+
+	// This flag is used for debug draw
+	bool isFast;
+
+	bool isBullet;
+	bool isSpeedCapped;
+	bool allowFastRotation;
+	bool enlargeAABB;
+} b2BodySim;
+
+// Get a validated body from a world using an id.
+b2Body* b2GetBodyFullId( b2World* world, b2BodyId bodyId );
+
+b2Transform b2GetBodyTransformQuick( b2World* world, b2Body* body );
+b2Transform b2GetBodyTransform( b2World* world, int bodyId );
+
+// Create a b2BodyId from a raw id.
+b2BodyId b2MakeBodyId( b2World* world, int bodyId );
+
+bool b2ShouldBodiesCollide( b2World* world, b2Body* bodyA, b2Body* bodyB );
+
+b2BodySim* b2GetBodySim( b2World* world, b2Body* body );
+b2BodyState* b2GetBodyState( b2World* world, b2Body* body );
+
+// careful calling this because it can invalidate body, state, joint, and contact pointers
+bool b2WakeBody( b2World* world, b2Body* body );
+
+void b2UpdateBodyMassData( b2World* world, b2Body* body );
+
+static inline b2Sweep b2MakeSweep( const b2BodySim* bodySim )
+{
+	b2Sweep s;
+	s.c1 = bodySim->center0;
+	s.c2 = bodySim->center;
+	s.q1 = bodySim->rotation0;
+	s.q2 = bodySim->transform.q;
+	s.localCenter = bodySim->localCenter;
+	return s;
+}
+
+// Define inline functions for arrays
+B2_ARRAY_INLINE( b2Body, b2Body )
+B2_ARRAY_INLINE( b2BodySim, b2BodySim )
+B2_ARRAY_INLINE( b2BodyState, b2BodyState )
diff --git a/src/vendor/box2d/box2d.h b/src/vendor/box2d/box2d.h
new file mode 100644
index 0000000..d854f49
--- /dev/null
+++ b/src/vendor/box2d/box2d.h
@@ -0,0 +1,1221 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "base.h"
+#include "collision.h"
+#include "id.h"
+#include "types.h"
+
+#include <stdbool.h>
+
+/**
+ * @defgroup world World
+ * These functions allow you to create a simulation world.
+ *
+ * You can add rigid bodies and joint constraints to the world and run the simulation. You can get contact
+ * information to get contact points and normals as well as events. You can query to world, checking for overlaps and casting rays
+ * or shapes. There is also debugging information such as debug draw, timing information, and counters. You can find documentation
+ * here: https://box2d.org/
+ * @{
+ */
+
+/// Create a world for rigid body simulation. A world contains bodies, shapes, and constraints. You make create
+/// up to 128 worlds. Each world is completely independent and may be simulated in parallel.
+/// @return the world id.
+B2_API b2WorldId b2CreateWorld( const b2WorldDef* def );
+
+/// Destroy a world
+B2_API void b2DestroyWorld( b2WorldId worldId );
+
+/// World id validation. Provides validation for up to 64K allocations.
+B2_API bool b2World_IsValid( b2WorldId id );
+
+/// Simulate a world for one time step. This performs collision detection, integration, and constraint solution.
+/// @param worldId The world to simulate
+/// @param timeStep The amount of time to simulate, this should be a fixed number. Usually 1/60.
+/// @param subStepCount The number of sub-steps, increasing the sub-step count can increase accuracy. Usually 4.
+B2_API void b2World_Step( b2WorldId worldId, float timeStep, int subStepCount );
+
+/// Call this to draw shapes and other debug draw data
+B2_API void b2World_Draw( b2WorldId worldId, b2DebugDraw* draw );
+
+/// Get the body events for the current time step. The event data is transient. Do not store a reference to this data.
+B2_API b2BodyEvents b2World_GetBodyEvents( b2WorldId worldId );
+
+/// Get sensor events for the current time step. The event data is transient. Do not store a reference to this data.
+B2_API b2SensorEvents b2World_GetSensorEvents( b2WorldId worldId );
+
+/// Get contact events for this current time step. The event data is transient. Do not store a reference to this data.
+B2_API b2ContactEvents b2World_GetContactEvents( b2WorldId worldId );
+
+/// Overlap test for all shapes that *potentially* overlap the provided AABB
+B2_API b2TreeStats b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn,
+										void* context );
+
+/// Overlap test for all shapes that overlap the provided shape proxy.
+B2_API b2TreeStats b2World_OverlapShape( b2WorldId worldId, const b2ShapeProxy* proxy, b2QueryFilter filter,
+										 b2OverlapResultFcn* fcn, void* context );
+
+/// Cast a ray into the world to collect shapes in the path of the ray.
+/// Your callback function controls whether you get the closest point, any point, or n-points.
+/// The ray-cast ignores shapes that contain the starting point.
+/// @note The callback function may receive shapes in any order
+/// @param worldId The world to cast the ray against
+/// @param origin The start point of the ray
+/// @param translation The translation of the ray from the start point to the end point
+/// @param filter Contains bit flags to filter unwanted shapes from the results
+/// @param fcn A user implemented callback function
+/// @param context A user context that is passed along to the callback function
+///	@return traversal performance counters
+B2_API b2TreeStats b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter,
+									b2CastResultFcn* fcn, void* context );
+
+/// Cast a ray into the world to collect the closest hit. This is a convenience function.
+/// This is less general than b2World_CastRay() and does not allow for custom filtering.
+B2_API b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter );
+
+/// Cast a shape through the world. Similar to a cast ray except that a shape is cast instead of a point.
+///	@see b2World_CastRay
+B2_API b2TreeStats b2World_CastShape( b2WorldId worldId, const b2ShapeProxy* proxy, b2Vec2 translation, b2QueryFilter filter,
+									  b2CastResultFcn* fcn, void* context );
+
+/// Cast a capsule mover through the world. This is a special shape cast that handles sliding along other shapes while reducing
+/// clipping.
+B2_API float b2World_CastMover( b2WorldId worldId, const b2Capsule* mover, b2Vec2 translation, b2QueryFilter filter );
+
+/// Collide a capsule mover with the world, gathering collision planes that can be fed to b2SolvePlanes. Useful for
+/// kinematic character movement.
+B2_API void b2World_CollideMover( b2WorldId worldId, const b2Capsule* mover, b2QueryFilter filter, b2PlaneResultFcn* fcn,
+								  void* context );
+
+/// Enable/disable sleep. If your application does not need sleeping, you can gain some performance
+/// by disabling sleep completely at the world level.
+/// @see b2WorldDef
+B2_API void b2World_EnableSleeping( b2WorldId worldId, bool flag );
+
+/// Is body sleeping enabled?
+B2_API bool b2World_IsSleepingEnabled( b2WorldId worldId );
+
+/// Enable/disable continuous collision between dynamic and static bodies. Generally you should keep continuous
+/// collision enabled to prevent fast moving objects from going through static objects. The performance gain from
+/// disabling continuous collision is minor.
+/// @see b2WorldDef
+B2_API void b2World_EnableContinuous( b2WorldId worldId, bool flag );
+
+/// Is continuous collision enabled?
+B2_API bool b2World_IsContinuousEnabled( b2WorldId worldId );
+
+/// Adjust the restitution threshold. It is recommended not to make this value very small
+/// because it will prevent bodies from sleeping. Usually in meters per second.
+/// @see b2WorldDef
+B2_API void b2World_SetRestitutionThreshold( b2WorldId worldId, float value );
+
+/// Get the the restitution speed threshold. Usually in meters per second.
+B2_API float b2World_GetRestitutionThreshold( b2WorldId worldId );
+
+/// Adjust the hit event threshold. This controls the collision speed needed to generate a b2ContactHitEvent.
+/// Usually in meters per second.
+/// @see b2WorldDef::hitEventThreshold
+B2_API void b2World_SetHitEventThreshold( b2WorldId worldId, float value );
+
+/// Get the the hit event speed threshold. Usually in meters per second.
+B2_API float b2World_GetHitEventThreshold( b2WorldId worldId );
+
+/// Register the custom filter callback. This is optional.
+B2_API void b2World_SetCustomFilterCallback( b2WorldId worldId, b2CustomFilterFcn* fcn, void* context );
+
+/// Register the pre-solve callback. This is optional.
+B2_API void b2World_SetPreSolveCallback( b2WorldId worldId, b2PreSolveFcn* fcn, void* context );
+
+/// Set the gravity vector for the entire world. Box2D has no concept of an up direction and this
+/// is left as a decision for the application. Usually in m/s^2.
+/// @see b2WorldDef
+B2_API void b2World_SetGravity( b2WorldId worldId, b2Vec2 gravity );
+
+/// Get the gravity vector
+B2_API b2Vec2 b2World_GetGravity( b2WorldId worldId );
+
+/// Apply a radial explosion
+/// @param worldId The world id
+/// @param explosionDef The explosion definition
+B2_API void b2World_Explode( b2WorldId worldId, const b2ExplosionDef* explosionDef );
+
+/// Adjust contact tuning parameters
+/// @param worldId The world id
+/// @param hertz The contact stiffness (cycles per second)
+/// @param dampingRatio The contact bounciness with 1 being critical damping (non-dimensional)
+/// @param pushSpeed The maximum contact constraint push out speed (meters per second)
+/// @note Advanced feature
+B2_API void b2World_SetContactTuning( b2WorldId worldId, float hertz, float dampingRatio, float pushSpeed );
+
+/// Adjust joint tuning parameters
+/// @param worldId The world id
+/// @param hertz The contact stiffness (cycles per second)
+/// @param dampingRatio The contact bounciness with 1 being critical damping (non-dimensional)
+/// @note Advanced feature
+B2_API void b2World_SetJointTuning( b2WorldId worldId, float hertz, float dampingRatio );
+
+/// Set the maximum linear speed. Usually in m/s.
+B2_API void b2World_SetMaximumLinearSpeed( b2WorldId worldId, float maximumLinearSpeed );
+
+/// Get the maximum linear speed. Usually in m/s.
+B2_API float b2World_GetMaximumLinearSpeed( b2WorldId worldId );
+
+/// Enable/disable constraint warm starting. Advanced feature for testing. Disabling
+/// warm starting greatly reduces stability and provides no performance gain.
+B2_API void b2World_EnableWarmStarting( b2WorldId worldId, bool flag );
+
+/// Is constraint warm starting enabled?
+B2_API bool b2World_IsWarmStartingEnabled( b2WorldId worldId );
+
+/// Get the number of awake bodies.
+B2_API int b2World_GetAwakeBodyCount( b2WorldId worldId );
+
+/// Get the current world performance profile
+B2_API b2Profile b2World_GetProfile( b2WorldId worldId );
+
+/// Get world counters and sizes
+B2_API b2Counters b2World_GetCounters( b2WorldId worldId );
+
+/// Set the user data pointer.
+B2_API void b2World_SetUserData( b2WorldId worldId, void* userData );
+
+/// Get the user data pointer.
+B2_API void* b2World_GetUserData( b2WorldId worldId );
+
+/// Set the friction callback. Passing NULL resets to default.
+B2_API void b2World_SetFrictionCallback( b2WorldId worldId, b2FrictionCallback* callback );
+
+/// Set the restitution callback. Passing NULL resets to default.
+B2_API void b2World_SetRestitutionCallback( b2WorldId worldId, b2RestitutionCallback* callback );
+
+/// Dump memory stats to box2d_memory.txt
+B2_API void b2World_DumpMemoryStats( b2WorldId worldId );
+
+/// This is for internal testing
+B2_API void b2World_RebuildStaticTree( b2WorldId worldId );
+
+/// This is for internal testing
+B2_API void b2World_EnableSpeculative( b2WorldId worldId, bool flag );
+
+/** @} */
+
+/**
+ * @defgroup body Body
+ * This is the body API.
+ * @{
+ */
+
+/// Create a rigid body given a definition. No reference to the definition is retained. So you can create the definition
+/// on the stack and pass it as a pointer.
+/// @code{.c}
+/// b2BodyDef bodyDef = b2DefaultBodyDef();
+/// b2BodyId myBodyId = b2CreateBody(myWorldId, &bodyDef);
+/// @endcode
+/// @warning This function is locked during callbacks.
+B2_API b2BodyId b2CreateBody( b2WorldId worldId, const b2BodyDef* def );
+
+/// Destroy a rigid body given an id. This destroys all shapes and joints attached to the body.
+/// Do not keep references to the associated shapes and joints.
+B2_API void b2DestroyBody( b2BodyId bodyId );
+
+/// Body identifier validation. Can be used to detect orphaned ids. Provides validation for up to 64K allocations.
+B2_API bool b2Body_IsValid( b2BodyId id );
+
+/// Get the body type: static, kinematic, or dynamic
+B2_API b2BodyType b2Body_GetType( b2BodyId bodyId );
+
+/// Change the body type. This is an expensive operation. This automatically updates the mass
+/// properties regardless of the automatic mass setting.
+B2_API void b2Body_SetType( b2BodyId bodyId, b2BodyType type );
+
+/// Set the body name. Up to 31 characters excluding 0 termination.
+B2_API void b2Body_SetName( b2BodyId bodyId, const char* name );
+
+/// Get the body name. May be null.
+B2_API const char* b2Body_GetName( b2BodyId bodyId );
+
+/// Set the user data for a body
+B2_API void b2Body_SetUserData( b2BodyId bodyId, void* userData );
+
+/// Get the user data stored in a body
+B2_API void* b2Body_GetUserData( b2BodyId bodyId );
+
+/// Get the world position of a body. This is the location of the body origin.
+B2_API b2Vec2 b2Body_GetPosition( b2BodyId bodyId );
+
+/// Get the world rotation of a body as a cosine/sine pair (complex number)
+B2_API b2Rot b2Body_GetRotation( b2BodyId bodyId );
+
+/// Get the world transform of a body.
+B2_API b2Transform b2Body_GetTransform( b2BodyId bodyId );
+
+/// Set the world transform of a body. This acts as a teleport and is fairly expensive.
+/// @note Generally you should create a body with then intended transform.
+/// @see b2BodyDef::position and b2BodyDef::angle
+B2_API void b2Body_SetTransform( b2BodyId bodyId, b2Vec2 position, b2Rot rotation );
+
+/// Get a local point on a body given a world point
+B2_API b2Vec2 b2Body_GetLocalPoint( b2BodyId bodyId, b2Vec2 worldPoint );
+
+/// Get a world point on a body given a local point
+B2_API b2Vec2 b2Body_GetWorldPoint( b2BodyId bodyId, b2Vec2 localPoint );
+
+/// Get a local vector on a body given a world vector
+B2_API b2Vec2 b2Body_GetLocalVector( b2BodyId bodyId, b2Vec2 worldVector );
+
+/// Get a world vector on a body given a local vector
+B2_API b2Vec2 b2Body_GetWorldVector( b2BodyId bodyId, b2Vec2 localVector );
+
+/// Get the linear velocity of a body's center of mass. Usually in meters per second.
+B2_API b2Vec2 b2Body_GetLinearVelocity( b2BodyId bodyId );
+
+/// Get the angular velocity of a body in radians per second
+B2_API float b2Body_GetAngularVelocity( b2BodyId bodyId );
+
+/// Set the linear velocity of a body. Usually in meters per second.
+B2_API void b2Body_SetLinearVelocity( b2BodyId bodyId, b2Vec2 linearVelocity );
+
+/// Set the angular velocity of a body in radians per second
+B2_API void b2Body_SetAngularVelocity( b2BodyId bodyId, float angularVelocity );
+
+/// Set the velocity to reach the given transform after a given time step.
+/// The result will be close but maybe not exact. This is meant for kinematic bodies.
+/// This will automatically wake the body if asleep.
+B2_API void b2Body_SetTargetTransform( b2BodyId bodyId, b2Transform target, float timeStep );
+
+/// Get the linear velocity of a local point attached to a body. Usually in meters per second.
+B2_API b2Vec2 b2Body_GetLocalPointVelocity( b2BodyId bodyId, b2Vec2 localPoint );
+
+/// Get the linear velocity of a world point attached to a body. Usually in meters per second.
+B2_API b2Vec2 b2Body_GetWorldPointVelocity( b2BodyId bodyId, b2Vec2 worldPoint );
+
+/// Apply a force at a world point. If the force is not applied at the center of mass,
+/// it will generate a torque and affect the angular velocity. This optionally wakes up the body.
+/// The force is ignored if the body is not awake.
+/// @param bodyId The body id
+/// @param force The world force vector, usually in newtons (N)
+/// @param point The world position of the point of application
+/// @param wake Option to wake up the body
+B2_API void b2Body_ApplyForce( b2BodyId bodyId, b2Vec2 force, b2Vec2 point, bool wake );
+
+/// Apply a force to the center of mass. This optionally wakes up the body.
+/// The force is ignored if the body is not awake.
+/// @param bodyId The body id
+/// @param force the world force vector, usually in newtons (N).
+/// @param wake also wake up the body
+B2_API void b2Body_ApplyForceToCenter( b2BodyId bodyId, b2Vec2 force, bool wake );
+
+/// Apply a torque. This affects the angular velocity without affecting the linear velocity.
+/// This optionally wakes the body. The torque is ignored if the body is not awake.
+/// @param bodyId The body id
+/// @param torque about the z-axis (out of the screen), usually in N*m.
+/// @param wake also wake up the body
+B2_API void b2Body_ApplyTorque( b2BodyId bodyId, float torque, bool wake );
+
+/// Apply an impulse at a point. This immediately modifies the velocity.
+/// It also modifies the angular velocity if the point of application
+/// is not at the center of mass. This optionally wakes the body.
+/// The impulse is ignored if the body is not awake.
+/// @param bodyId The body id
+/// @param impulse the world impulse vector, usually in N*s or kg*m/s.
+/// @param point the world position of the point of application.
+/// @param wake also wake up the body
+/// @warning This should be used for one-shot impulses. If you need a steady force,
+/// use a force instead, which will work better with the sub-stepping solver.
+B2_API void b2Body_ApplyLinearImpulse( b2BodyId bodyId, b2Vec2 impulse, b2Vec2 point, bool wake );
+
+/// Apply an impulse to the center of mass. This immediately modifies the velocity.
+/// The impulse is ignored if the body is not awake. This optionally wakes the body.
+/// @param bodyId The body id
+/// @param impulse the world impulse vector, usually in N*s or kg*m/s.
+/// @param wake also wake up the body
+/// @warning This should be used for one-shot impulses. If you need a steady force,
+/// use a force instead, which will work better with the sub-stepping solver.
+B2_API void b2Body_ApplyLinearImpulseToCenter( b2BodyId bodyId, b2Vec2 impulse, bool wake );
+
+/// Apply an angular impulse. The impulse is ignored if the body is not awake.
+/// This optionally wakes the body.
+/// @param bodyId The body id
+/// @param impulse the angular impulse, usually in units of kg*m*m/s
+/// @param wake also wake up the body
+/// @warning This should be used for one-shot impulses. If you need a steady force,
+/// use a force instead, which will work better with the sub-stepping solver.
+B2_API void b2Body_ApplyAngularImpulse( b2BodyId bodyId, float impulse, bool wake );
+
+/// Get the mass of the body, usually in kilograms
+B2_API float b2Body_GetMass( b2BodyId bodyId );
+
+/// Get the rotational inertia of the body, usually in kg*m^2
+B2_API float b2Body_GetRotationalInertia( b2BodyId bodyId );
+
+/// Get the center of mass position of the body in local space
+B2_API b2Vec2 b2Body_GetLocalCenterOfMass( b2BodyId bodyId );
+
+/// Get the center of mass position of the body in world space
+B2_API b2Vec2 b2Body_GetWorldCenterOfMass( b2BodyId bodyId );
+
+/// Override the body's mass properties. Normally this is computed automatically using the
+/// shape geometry and density. This information is lost if a shape is added or removed or if the
+/// body type changes.
+B2_API void b2Body_SetMassData( b2BodyId bodyId, b2MassData massData );
+
+/// Get the mass data for a body
+B2_API b2MassData b2Body_GetMassData( b2BodyId bodyId );
+
+/// This update the mass properties to the sum of the mass properties of the shapes.
+/// This normally does not need to be called unless you called SetMassData to override
+/// the mass and you later want to reset the mass.
+/// You may also use this when automatic mass computation has been disabled.
+/// You should call this regardless of body type.
+/// Note that sensor shapes may have mass.
+B2_API void b2Body_ApplyMassFromShapes( b2BodyId bodyId );
+
+/// Adjust the linear damping. Normally this is set in b2BodyDef before creation.
+B2_API void b2Body_SetLinearDamping( b2BodyId bodyId, float linearDamping );
+
+/// Get the current linear damping.
+B2_API float b2Body_GetLinearDamping( b2BodyId bodyId );
+
+/// Adjust the angular damping. Normally this is set in b2BodyDef before creation.
+B2_API void b2Body_SetAngularDamping( b2BodyId bodyId, float angularDamping );
+
+/// Get the current angular damping.
+B2_API float b2Body_GetAngularDamping( b2BodyId bodyId );
+
+/// Adjust the gravity scale. Normally this is set in b2BodyDef before creation.
+/// @see b2BodyDef::gravityScale
+B2_API void b2Body_SetGravityScale( b2BodyId bodyId, float gravityScale );
+
+/// Get the current gravity scale
+B2_API float b2Body_GetGravityScale( b2BodyId bodyId );
+
+/// @return true if this body is awake
+B2_API bool b2Body_IsAwake( b2BodyId bodyId );
+
+/// Wake a body from sleep. This wakes the entire island the body is touching.
+/// @warning Putting a body to sleep will put the entire island of bodies touching this body to sleep,
+/// which can be expensive and possibly unintuitive.
+B2_API void b2Body_SetAwake( b2BodyId bodyId, bool awake );
+
+/// Enable or disable sleeping for this body. If sleeping is disabled the body will wake.
+B2_API void b2Body_EnableSleep( b2BodyId bodyId, bool enableSleep );
+
+/// Returns true if sleeping is enabled for this body
+B2_API bool b2Body_IsSleepEnabled( b2BodyId bodyId );
+
+/// Set the sleep threshold, usually in meters per second
+B2_API void b2Body_SetSleepThreshold( b2BodyId bodyId, float sleepThreshold );
+
+/// Get the sleep threshold, usually in meters per second.
+B2_API float b2Body_GetSleepThreshold( b2BodyId bodyId );
+
+/// Returns true if this body is enabled
+B2_API bool b2Body_IsEnabled( b2BodyId bodyId );
+
+/// Disable a body by removing it completely from the simulation. This is expensive.
+B2_API void b2Body_Disable( b2BodyId bodyId );
+
+/// Enable a body by adding it to the simulation. This is expensive.
+B2_API void b2Body_Enable( b2BodyId bodyId );
+
+/// Set this body to have fixed rotation. This causes the mass to be reset in all cases.
+B2_API void b2Body_SetFixedRotation( b2BodyId bodyId, bool flag );
+
+/// Does this body have fixed rotation?
+B2_API bool b2Body_IsFixedRotation( b2BodyId bodyId );
+
+/// Set this body to be a bullet. A bullet does continuous collision detection
+/// against dynamic bodies (but not other bullets).
+B2_API void b2Body_SetBullet( b2BodyId bodyId, bool flag );
+
+/// Is this body a bullet?
+B2_API bool b2Body_IsBullet( b2BodyId bodyId );
+
+/// Enable/disable contact events on all shapes.
+/// @see b2ShapeDef::enableContactEvents
+/// @warning changing this at runtime may cause mismatched begin/end touch events
+B2_API void b2Body_EnableContactEvents( b2BodyId bodyId, bool flag );
+
+/// Enable/disable hit events on all shapes
+/// @see b2ShapeDef::enableHitEvents
+B2_API void b2Body_EnableHitEvents( b2BodyId bodyId, bool flag );
+
+/// Get the world that owns this body
+B2_API b2WorldId b2Body_GetWorld( b2BodyId bodyId );
+
+/// Get the number of shapes on this body
+B2_API int b2Body_GetShapeCount( b2BodyId bodyId );
+
+/// Get the shape ids for all shapes on this body, up to the provided capacity.
+/// @returns the number of shape ids stored in the user array
+B2_API int b2Body_GetShapes( b2BodyId bodyId, b2ShapeId* shapeArray, int capacity );
+
+/// Get the number of joints on this body
+B2_API int b2Body_GetJointCount( b2BodyId bodyId );
+
+/// Get the joint ids for all joints on this body, up to the provided capacity
+/// @returns the number of joint ids stored in the user array
+B2_API int b2Body_GetJoints( b2BodyId bodyId, b2JointId* jointArray, int capacity );
+
+/// Get the maximum capacity required for retrieving all the touching contacts on a body
+B2_API int b2Body_GetContactCapacity( b2BodyId bodyId );
+
+/// Get the touching contact data for a body.
+/// @note Box2D uses speculative collision so some contact points may be separated.
+/// @returns the number of elements filled in the provided array
+/// @warning do not ignore the return value, it specifies the valid number of elements
+B2_API int b2Body_GetContactData( b2BodyId bodyId, b2ContactData* contactData, int capacity );
+
+/// Get the current world AABB that contains all the attached shapes. Note that this may not encompass the body origin.
+/// If there are no shapes attached then the returned AABB is empty and centered on the body origin.
+B2_API b2AABB b2Body_ComputeAABB( b2BodyId bodyId );
+
+/** @} */
+
+/**
+ * @defgroup shape Shape
+ * Functions to create, destroy, and access.
+ * Shapes bind raw geometry to bodies and hold material properties including friction and restitution.
+ * @{
+ */
+
+/// Create a circle shape and attach it to a body. The shape definition and geometry are fully cloned.
+/// Contacts are not created until the next time step.
+/// @return the shape id for accessing the shape
+B2_API b2ShapeId b2CreateCircleShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Circle* circle );
+
+/// Create a line segment shape and attach it to a body. The shape definition and geometry are fully cloned.
+/// Contacts are not created until the next time step.
+/// @return the shape id for accessing the shape
+B2_API b2ShapeId b2CreateSegmentShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Segment* segment );
+
+/// Create a capsule shape and attach it to a body. The shape definition and geometry are fully cloned.
+/// Contacts are not created until the next time step.
+/// @return the shape id for accessing the shape
+B2_API b2ShapeId b2CreateCapsuleShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Capsule* capsule );
+
+/// Create a polygon shape and attach it to a body. The shape definition and geometry are fully cloned.
+/// Contacts are not created until the next time step.
+/// @return the shape id for accessing the shape
+B2_API b2ShapeId b2CreatePolygonShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Polygon* polygon );
+
+/// Destroy a shape. You may defer the body mass update which can improve performance if several shapes on a
+///	body are destroyed at once.
+///	@see b2Body_ApplyMassFromShapes
+B2_API void b2DestroyShape( b2ShapeId shapeId, bool updateBodyMass );
+
+/// Shape identifier validation. Provides validation for up to 64K allocations.
+B2_API bool b2Shape_IsValid( b2ShapeId id );
+
+/// Get the type of a shape
+B2_API b2ShapeType b2Shape_GetType( b2ShapeId shapeId );
+
+/// Get the id of the body that a shape is attached to
+B2_API b2BodyId b2Shape_GetBody( b2ShapeId shapeId );
+
+/// Get the world that owns this shape
+B2_API b2WorldId b2Shape_GetWorld( b2ShapeId shapeId );
+
+/// Returns true if the shape is a sensor. It is not possible to change a shape
+/// from sensor to solid dynamically because this breaks the contract for
+/// sensor events.
+B2_API bool b2Shape_IsSensor( b2ShapeId shapeId );
+
+/// Set the user data for a shape
+B2_API void b2Shape_SetUserData( b2ShapeId shapeId, void* userData );
+
+/// Get the user data for a shape. This is useful when you get a shape id
+/// from an event or query.
+B2_API void* b2Shape_GetUserData( b2ShapeId shapeId );
+
+/// Set the mass density of a shape, usually in kg/m^2.
+/// This will optionally update the mass properties on the parent body.
+/// @see b2ShapeDef::density, b2Body_ApplyMassFromShapes
+B2_API void b2Shape_SetDensity( b2ShapeId shapeId, float density, bool updateBodyMass );
+
+/// Get the density of a shape, usually in kg/m^2
+B2_API float b2Shape_GetDensity( b2ShapeId shapeId );
+
+/// Set the friction on a shape
+/// @see b2ShapeDef::friction
+B2_API void b2Shape_SetFriction( b2ShapeId shapeId, float friction );
+
+/// Get the friction of a shape
+B2_API float b2Shape_GetFriction( b2ShapeId shapeId );
+
+/// Set the shape restitution (bounciness)
+/// @see b2ShapeDef::restitution
+B2_API void b2Shape_SetRestitution( b2ShapeId shapeId, float restitution );
+
+/// Get the shape restitution
+B2_API float b2Shape_GetRestitution( b2ShapeId shapeId );
+
+/// Set the shape material identifier
+/// @see b2ShapeDef::material
+B2_API void b2Shape_SetMaterial( b2ShapeId shapeId, int material );
+
+/// Get the shape material identifier
+B2_API int b2Shape_GetMaterial( b2ShapeId shapeId );
+
+/// Get the shape filter
+B2_API b2Filter b2Shape_GetFilter( b2ShapeId shapeId );
+
+/// Set the current filter. This is almost as expensive as recreating the shape. This may cause
+/// contacts to be immediately destroyed. However contacts are not created until the next world step.
+/// Sensor overlap state is also not updated until the next world step.
+/// @see b2ShapeDef::filter
+B2_API void b2Shape_SetFilter( b2ShapeId shapeId, b2Filter filter );
+
+/// Enable sensor events for this shape.
+/// @see b2ShapeDef::enableSensorEvents
+B2_API void b2Shape_EnableSensorEvents( b2ShapeId shapeId, bool flag );
+
+/// Returns true if sensor events are enabled.
+B2_API bool b2Shape_AreSensorEventsEnabled( b2ShapeId shapeId );
+
+/// Enable contact events for this shape. Only applies to kinematic and dynamic bodies. Ignored for sensors.
+/// @see b2ShapeDef::enableContactEvents
+/// @warning changing this at run-time may lead to lost begin/end events
+B2_API void b2Shape_EnableContactEvents( b2ShapeId shapeId, bool flag );
+
+/// Returns true if contact events are enabled
+B2_API bool b2Shape_AreContactEventsEnabled( b2ShapeId shapeId );
+
+/// Enable pre-solve contact events for this shape. Only applies to dynamic bodies. These are expensive
+/// and must be carefully handled due to multithreading. Ignored for sensors.
+/// @see b2PreSolveFcn
+B2_API void b2Shape_EnablePreSolveEvents( b2ShapeId shapeId, bool flag );
+
+/// Returns true if pre-solve events are enabled
+B2_API bool b2Shape_ArePreSolveEventsEnabled( b2ShapeId shapeId );
+
+/// Enable contact hit events for this shape. Ignored for sensors.
+/// @see b2WorldDef.hitEventThreshold
+B2_API void b2Shape_EnableHitEvents( b2ShapeId shapeId, bool flag );
+
+/// Returns true if hit events are enabled
+B2_API bool b2Shape_AreHitEventsEnabled( b2ShapeId shapeId );
+
+/// Test a point for overlap with a shape
+B2_API bool b2Shape_TestPoint( b2ShapeId shapeId, b2Vec2 point );
+
+/// Ray cast a shape directly
+B2_API b2CastOutput b2Shape_RayCast( b2ShapeId shapeId, const b2RayCastInput* input );
+
+/// Get a copy of the shape's circle. Asserts the type is correct.
+B2_API b2Circle b2Shape_GetCircle( b2ShapeId shapeId );
+
+/// Get a copy of the shape's line segment. Asserts the type is correct.
+B2_API b2Segment b2Shape_GetSegment( b2ShapeId shapeId );
+
+/// Get a copy of the shape's chain segment. These come from chain shapes.
+/// Asserts the type is correct.
+B2_API b2ChainSegment b2Shape_GetChainSegment( b2ShapeId shapeId );
+
+/// Get a copy of the shape's capsule. Asserts the type is correct.
+B2_API b2Capsule b2Shape_GetCapsule( b2ShapeId shapeId );
+
+/// Get a copy of the shape's convex polygon. Asserts the type is correct.
+B2_API b2Polygon b2Shape_GetPolygon( b2ShapeId shapeId );
+
+/// Allows you to change a shape to be a circle or update the current circle.
+/// This does not modify the mass properties.
+/// @see b2Body_ApplyMassFromShapes
+B2_API void b2Shape_SetCircle( b2ShapeId shapeId, const b2Circle* circle );
+
+/// Allows you to change a shape to be a capsule or update the current capsule.
+/// This does not modify the mass properties.
+/// @see b2Body_ApplyMassFromShapes
+B2_API void b2Shape_SetCapsule( b2ShapeId shapeId, const b2Capsule* capsule );
+
+/// Allows you to change a shape to be a segment or update the current segment.
+B2_API void b2Shape_SetSegment( b2ShapeId shapeId, const b2Segment* segment );
+
+/// Allows you to change a shape to be a polygon or update the current polygon.
+/// This does not modify the mass properties.
+/// @see b2Body_ApplyMassFromShapes
+B2_API void b2Shape_SetPolygon( b2ShapeId shapeId, const b2Polygon* polygon );
+
+/// Get the parent chain id if the shape type is a chain segment, otherwise
+/// returns b2_nullChainId.
+B2_API b2ChainId b2Shape_GetParentChain( b2ShapeId shapeId );
+
+/// Get the maximum capacity required for retrieving all the touching contacts on a shape
+B2_API int b2Shape_GetContactCapacity( b2ShapeId shapeId );
+
+/// Get the touching contact data for a shape. The provided shapeId will be either shapeIdA or shapeIdB on the contact data.
+/// @note Box2D uses speculative collision so some contact points may be separated.
+/// @returns the number of elements filled in the provided array
+/// @warning do not ignore the return value, it specifies the valid number of elements
+B2_API int b2Shape_GetContactData( b2ShapeId shapeId, b2ContactData* contactData, int capacity );
+
+/// Get the maximum capacity required for retrieving all the overlapped shapes on a sensor shape.
+/// This returns 0 if the provided shape is not a sensor.
+/// @param shapeId the id of a sensor shape
+/// @returns the required capacity to get all the overlaps in b2Shape_GetSensorOverlaps
+B2_API int b2Shape_GetSensorCapacity( b2ShapeId shapeId );
+
+/// Get the overlapped shapes for a sensor shape.
+/// @param shapeId the id of a sensor shape
+/// @param overlaps a user allocated array that is filled with the overlapping shapes
+/// @param capacity the capacity of overlappedShapes
+/// @returns the number of elements filled in the provided array
+/// @warning do not ignore the return value, it specifies the valid number of elements
+/// @warning overlaps may contain destroyed shapes so use b2Shape_IsValid to confirm each overlap
+B2_API int b2Shape_GetSensorOverlaps( b2ShapeId shapeId, b2ShapeId* overlaps, int capacity );
+
+/// Get the current world AABB
+B2_API b2AABB b2Shape_GetAABB( b2ShapeId shapeId );
+
+/// Get the mass data for a shape
+B2_API b2MassData b2Shape_GetMassData( b2ShapeId shapeId );
+
+/// Get the closest point on a shape to a target point. Target and result are in world space.
+/// todo need sample
+B2_API b2Vec2 b2Shape_GetClosestPoint( b2ShapeId shapeId, b2Vec2 target );
+
+/// Chain Shape
+
+/// Create a chain shape
+/// @see b2ChainDef for details
+B2_API b2ChainId b2CreateChain( b2BodyId bodyId, const b2ChainDef* def );
+
+/// Destroy a chain shape
+B2_API void b2DestroyChain( b2ChainId chainId );
+
+/// Get the world that owns this chain shape
+B2_API b2WorldId b2Chain_GetWorld( b2ChainId chainId );
+
+/// Get the number of segments on this chain
+B2_API int b2Chain_GetSegmentCount( b2ChainId chainId );
+
+/// Fill a user array with chain segment shape ids up to the specified capacity. Returns
+/// the actual number of segments returned.
+B2_API int b2Chain_GetSegments( b2ChainId chainId, b2ShapeId* segmentArray, int capacity );
+
+/// Set the chain friction
+/// @see b2ChainDef::friction
+B2_API void b2Chain_SetFriction( b2ChainId chainId, float friction );
+
+/// Get the chain friction
+B2_API float b2Chain_GetFriction( b2ChainId chainId );
+
+/// Set the chain restitution (bounciness)
+/// @see b2ChainDef::restitution
+B2_API void b2Chain_SetRestitution( b2ChainId chainId, float restitution );
+
+/// Get the chain restitution
+B2_API float b2Chain_GetRestitution( b2ChainId chainId );
+
+/// Set the chain material
+/// @see b2ChainDef::material
+B2_API void b2Chain_SetMaterial( b2ChainId chainId, int material );
+
+/// Get the chain material
+B2_API int b2Chain_GetMaterial( b2ChainId chainId );
+
+/// Chain identifier validation. Provides validation for up to 64K allocations.
+B2_API bool b2Chain_IsValid( b2ChainId id );
+
+/** @} */
+
+/**
+ * @defgroup joint Joint
+ * @brief Joints allow you to connect rigid bodies together while allowing various forms of relative motions.
+ * @{
+ */
+
+/// Destroy a joint
+B2_API void b2DestroyJoint( b2JointId jointId );
+
+/// Joint identifier validation. Provides validation for up to 64K allocations.
+B2_API bool b2Joint_IsValid( b2JointId id );
+
+/// Get the joint type
+B2_API b2JointType b2Joint_GetType( b2JointId jointId );
+
+/// Get body A id on a joint
+B2_API b2BodyId b2Joint_GetBodyA( b2JointId jointId );
+
+/// Get body B id on a joint
+B2_API b2BodyId b2Joint_GetBodyB( b2JointId jointId );
+
+/// Get the world that owns this joint
+B2_API b2WorldId b2Joint_GetWorld( b2JointId jointId );
+
+/// Get the local anchor on bodyA
+B2_API b2Vec2 b2Joint_GetLocalAnchorA( b2JointId jointId );
+
+/// Get the local anchor on bodyB
+B2_API b2Vec2 b2Joint_GetLocalAnchorB( b2JointId jointId );
+
+/// Toggle collision between connected bodies
+B2_API void b2Joint_SetCollideConnected( b2JointId jointId, bool shouldCollide );
+
+/// Is collision allowed between connected bodies?
+B2_API bool b2Joint_GetCollideConnected( b2JointId jointId );
+
+/// Set the user data on a joint
+B2_API void b2Joint_SetUserData( b2JointId jointId, void* userData );
+
+/// Get the user data on a joint
+B2_API void* b2Joint_GetUserData( b2JointId jointId );
+
+/// Wake the bodies connect to this joint
+B2_API void b2Joint_WakeBodies( b2JointId jointId );
+
+/// Get the current constraint force for this joint. Usually in Newtons.
+B2_API b2Vec2 b2Joint_GetConstraintForce( b2JointId jointId );
+
+/// Get the current constraint torque for this joint. Usually in Newton * meters.
+B2_API float b2Joint_GetConstraintTorque( b2JointId jointId );
+
+/**
+ * @defgroup distance_joint Distance Joint
+ * @brief Functions for the distance joint.
+ * @{
+ */
+
+/// Create a distance joint
+/// @see b2DistanceJointDef for details
+B2_API b2JointId b2CreateDistanceJoint( b2WorldId worldId, const b2DistanceJointDef* def );
+
+/// Set the rest length of a distance joint
+/// @param jointId The id for a distance joint
+/// @param length The new distance joint length
+B2_API void b2DistanceJoint_SetLength( b2JointId jointId, float length );
+
+/// Get the rest length of a distance joint
+B2_API float b2DistanceJoint_GetLength( b2JointId jointId );
+
+/// Enable/disable the distance joint spring. When disabled the distance joint is rigid.
+B2_API void b2DistanceJoint_EnableSpring( b2JointId jointId, bool enableSpring );
+
+/// Is the distance joint spring enabled?
+B2_API bool b2DistanceJoint_IsSpringEnabled( b2JointId jointId );
+
+/// Set the spring stiffness in Hertz
+B2_API void b2DistanceJoint_SetSpringHertz( b2JointId jointId, float hertz );
+
+/// Set the spring damping ratio, non-dimensional
+B2_API void b2DistanceJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio );
+
+/// Get the spring Hertz
+B2_API float b2DistanceJoint_GetSpringHertz( b2JointId jointId );
+
+/// Get the spring damping ratio
+B2_API float b2DistanceJoint_GetSpringDampingRatio( b2JointId jointId );
+
+/// Enable joint limit. The limit only works if the joint spring is enabled. Otherwise the joint is rigid
+/// and the limit has no effect.
+B2_API void b2DistanceJoint_EnableLimit( b2JointId jointId, bool enableLimit );
+
+/// Is the distance joint limit enabled?
+B2_API bool b2DistanceJoint_IsLimitEnabled( b2JointId jointId );
+
+/// Set the minimum and maximum length parameters of a distance joint
+B2_API void b2DistanceJoint_SetLengthRange( b2JointId jointId, float minLength, float maxLength );
+
+/// Get the distance joint minimum length
+B2_API float b2DistanceJoint_GetMinLength( b2JointId jointId );
+
+/// Get the distance joint maximum length
+B2_API float b2DistanceJoint_GetMaxLength( b2JointId jointId );
+
+/// Get the current length of a distance joint
+B2_API float b2DistanceJoint_GetCurrentLength( b2JointId jointId );
+
+/// Enable/disable the distance joint motor
+B2_API void b2DistanceJoint_EnableMotor( b2JointId jointId, bool enableMotor );
+
+/// Is the distance joint motor enabled?
+B2_API bool b2DistanceJoint_IsMotorEnabled( b2JointId jointId );
+
+/// Set the distance joint motor speed, usually in meters per second
+B2_API void b2DistanceJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed );
+
+/// Get the distance joint motor speed, usually in meters per second
+B2_API float b2DistanceJoint_GetMotorSpeed( b2JointId jointId );
+
+/// Set the distance joint maximum motor force, usually in newtons
+B2_API void b2DistanceJoint_SetMaxMotorForce( b2JointId jointId, float force );
+
+/// Get the distance joint maximum motor force, usually in newtons
+B2_API float b2DistanceJoint_GetMaxMotorForce( b2JointId jointId );
+
+/// Get the distance joint current motor force, usually in newtons
+B2_API float b2DistanceJoint_GetMotorForce( b2JointId jointId );
+
+/** @} */
+
+/**
+ * @defgroup motor_joint Motor Joint
+ * @brief Functions for the motor joint.
+ *
+ * The motor joint is used to drive the relative transform between two bodies. It takes
+ * a relative position and rotation and applies the forces and torques needed to achieve
+ * that relative transform over time.
+ * @{
+ */
+
+/// Create a motor joint
+/// @see b2MotorJointDef for details
+B2_API b2JointId b2CreateMotorJoint( b2WorldId worldId, const b2MotorJointDef* def );
+
+/// Set the motor joint linear offset target
+B2_API void b2MotorJoint_SetLinearOffset( b2JointId jointId, b2Vec2 linearOffset );
+
+/// Get the motor joint linear offset target
+B2_API b2Vec2 b2MotorJoint_GetLinearOffset( b2JointId jointId );
+
+/// Set the motor joint angular offset target in radians
+B2_API void b2MotorJoint_SetAngularOffset( b2JointId jointId, float angularOffset );
+
+/// Get the motor joint angular offset target in radians
+B2_API float b2MotorJoint_GetAngularOffset( b2JointId jointId );
+
+/// Set the motor joint maximum force, usually in newtons
+B2_API void b2MotorJoint_SetMaxForce( b2JointId jointId, float maxForce );
+
+/// Get the motor joint maximum force, usually in newtons
+B2_API float b2MotorJoint_GetMaxForce( b2JointId jointId );
+
+/// Set the motor joint maximum torque, usually in newton-meters
+B2_API void b2MotorJoint_SetMaxTorque( b2JointId jointId, float maxTorque );
+
+/// Get the motor joint maximum torque, usually in newton-meters
+B2_API float b2MotorJoint_GetMaxTorque( b2JointId jointId );
+
+/// Set the motor joint correction factor, usually in [0, 1]
+B2_API void b2MotorJoint_SetCorrectionFactor( b2JointId jointId, float correctionFactor );
+
+/// Get the motor joint correction factor, usually in [0, 1]
+B2_API float b2MotorJoint_GetCorrectionFactor( b2JointId jointId );
+
+/**@}*/
+
+/**
+ * @defgroup mouse_joint Mouse Joint
+ * @brief Functions for the mouse joint.
+ *
+ * The mouse joint is designed for use in the samples application, but you may find it useful in applications where
+ * the user moves a rigid body with a cursor.
+ * @{
+ */
+
+/// Create a mouse joint
+/// @see b2MouseJointDef for details
+B2_API b2JointId b2CreateMouseJoint( b2WorldId worldId, const b2MouseJointDef* def );
+
+/// Set the mouse joint target
+B2_API void b2MouseJoint_SetTarget( b2JointId jointId, b2Vec2 target );
+
+/// Get the mouse joint target
+B2_API b2Vec2 b2MouseJoint_GetTarget( b2JointId jointId );
+
+/// Set the mouse joint spring stiffness in Hertz
+B2_API void b2MouseJoint_SetSpringHertz( b2JointId jointId, float hertz );
+
+/// Get the mouse joint spring stiffness in Hertz
+B2_API float b2MouseJoint_GetSpringHertz( b2JointId jointId );
+
+/// Set the mouse joint spring damping ratio, non-dimensional
+B2_API void b2MouseJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio );
+
+/// Get the mouse joint damping ratio, non-dimensional
+B2_API float b2MouseJoint_GetSpringDampingRatio( b2JointId jointId );
+
+/// Set the mouse joint maximum force, usually in newtons
+B2_API void b2MouseJoint_SetMaxForce( b2JointId jointId, float maxForce );
+
+/// Get the mouse joint maximum force, usually in newtons
+B2_API float b2MouseJoint_GetMaxForce( b2JointId jointId );
+
+/**@}*/
+
+/**
+ * @defgroup filter_joint Filter Joint
+ * @brief Functions for the filter joint.
+ *
+ * The filter joint is used to disable collision between two bodies. As a side effect of being a joint, it also
+ * keeps the two bodies in the same simulation island.
+ * @{
+ */
+
+/// Create a filter joint.
+/// @see b2FilterJointDef for details
+B2_API b2JointId b2CreateFilterJoint( b2WorldId worldId, const b2FilterJointDef* def );
+
+/**@}*/
+
+/**
+ * @defgroup prismatic_joint Prismatic Joint
+ * @brief A prismatic joint allows for translation along a single axis with no rotation.
+ *
+ * The prismatic joint is useful for things like pistons and moving platforms, where you want a body to translate
+ * along an axis and have no rotation. Also called a *slider* joint.
+ * @{
+ */
+
+/// Create a prismatic (slider) joint.
+/// @see b2PrismaticJointDef for details
+B2_API b2JointId b2CreatePrismaticJoint( b2WorldId worldId, const b2PrismaticJointDef* def );
+
+/// Enable/disable the joint spring.
+B2_API void b2PrismaticJoint_EnableSpring( b2JointId jointId, bool enableSpring );
+
+/// Is the prismatic joint spring enabled or not?
+B2_API bool b2PrismaticJoint_IsSpringEnabled( b2JointId jointId );
+
+/// Set the prismatic joint stiffness in Hertz.
+/// This should usually be less than a quarter of the simulation rate. For example, if the simulation
+/// runs at 60Hz then the joint stiffness should be 15Hz or less.
+B2_API void b2PrismaticJoint_SetSpringHertz( b2JointId jointId, float hertz );
+
+/// Get the prismatic joint stiffness in Hertz
+B2_API float b2PrismaticJoint_GetSpringHertz( b2JointId jointId );
+
+/// Set the prismatic joint damping ratio (non-dimensional)
+B2_API void b2PrismaticJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio );
+
+/// Get the prismatic spring damping ratio (non-dimensional)
+B2_API float b2PrismaticJoint_GetSpringDampingRatio( b2JointId jointId );
+
+/// Enable/disable a prismatic joint limit
+B2_API void b2PrismaticJoint_EnableLimit( b2JointId jointId, bool enableLimit );
+
+/// Is the prismatic joint limit enabled?
+B2_API bool b2PrismaticJoint_IsLimitEnabled( b2JointId jointId );
+
+/// Get the prismatic joint lower limit
+B2_API float b2PrismaticJoint_GetLowerLimit( b2JointId jointId );
+
+/// Get the prismatic joint upper limit
+B2_API float b2PrismaticJoint_GetUpperLimit( b2JointId jointId );
+
+/// Set the prismatic joint limits
+B2_API void b2PrismaticJoint_SetLimits( b2JointId jointId, float lower, float upper );
+
+/// Enable/disable a prismatic joint motor
+B2_API void b2PrismaticJoint_EnableMotor( b2JointId jointId, bool enableMotor );
+
+/// Is the prismatic joint motor enabled?
+B2_API bool b2PrismaticJoint_IsMotorEnabled( b2JointId jointId );
+
+/// Set the prismatic joint motor speed, usually in meters per second
+B2_API void b2PrismaticJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed );
+
+/// Get the prismatic joint motor speed, usually in meters per second
+B2_API float b2PrismaticJoint_GetMotorSpeed( b2JointId jointId );
+
+/// Set the prismatic joint maximum motor force, usually in newtons
+B2_API void b2PrismaticJoint_SetMaxMotorForce( b2JointId jointId, float force );
+
+/// Get the prismatic joint maximum motor force, usually in newtons
+B2_API float b2PrismaticJoint_GetMaxMotorForce( b2JointId jointId );
+
+/// Get the prismatic joint current motor force, usually in newtons
+B2_API float b2PrismaticJoint_GetMotorForce( b2JointId jointId );
+
+/// Get the current joint translation, usually in meters.
+B2_API float b2PrismaticJoint_GetTranslation( b2JointId jointId );
+
+/// Get the current joint translation speed, usually in meters per second.
+B2_API float b2PrismaticJoint_GetSpeed( b2JointId jointId );
+
+/** @} */
+
+/**
+ * @defgroup revolute_joint Revolute Joint
+ * @brief A revolute joint allows for relative rotation in the 2D plane with no relative translation.
+ *
+ * The revolute joint is probably the most common joint. It can be used for ragdolls and chains.
+ * Also called a *hinge* or *pin* joint.
+ * @{
+ */
+
+/// Create a revolute joint
+/// @see b2RevoluteJointDef for details
+B2_API b2JointId b2CreateRevoluteJoint( b2WorldId worldId, const b2RevoluteJointDef* def );
+
+/// Enable/disable the revolute joint spring
+B2_API void b2RevoluteJoint_EnableSpring( b2JointId jointId, bool enableSpring );
+
+/// It the revolute angular spring enabled?
+B2_API bool b2RevoluteJoint_IsSpringEnabled( b2JointId jointId );
+
+/// Set the revolute joint spring stiffness in Hertz
+B2_API void b2RevoluteJoint_SetSpringHertz( b2JointId jointId, float hertz );
+
+/// Get the revolute joint spring stiffness in Hertz
+B2_API float b2RevoluteJoint_GetSpringHertz( b2JointId jointId );
+
+/// Set the revolute joint spring damping ratio, non-dimensional
+B2_API void b2RevoluteJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio );
+
+/// Get the revolute joint spring damping ratio, non-dimensional
+B2_API float b2RevoluteJoint_GetSpringDampingRatio( b2JointId jointId );
+
+/// Get the revolute joint current angle in radians relative to the reference angle
+/// @see b2RevoluteJointDef::referenceAngle
+B2_API float b2RevoluteJoint_GetAngle( b2JointId jointId );
+
+/// Enable/disable the revolute joint limit
+B2_API void b2RevoluteJoint_EnableLimit( b2JointId jointId, bool enableLimit );
+
+/// Is the revolute joint limit enabled?
+B2_API bool b2RevoluteJoint_IsLimitEnabled( b2JointId jointId );
+
+/// Get the revolute joint lower limit in radians
+B2_API float b2RevoluteJoint_GetLowerLimit( b2JointId jointId );
+
+/// Get the revolute joint upper limit in radians
+B2_API float b2RevoluteJoint_GetUpperLimit( b2JointId jointId );
+
+/// Set the revolute joint limits in radians
+B2_API void b2RevoluteJoint_SetLimits( b2JointId jointId, float lower, float upper );
+
+/// Enable/disable a revolute joint motor
+B2_API void b2RevoluteJoint_EnableMotor( b2JointId jointId, bool enableMotor );
+
+/// Is the revolute joint motor enabled?
+B2_API bool b2RevoluteJoint_IsMotorEnabled( b2JointId jointId );
+
+/// Set the revolute joint motor speed in radians per second
+B2_API void b2RevoluteJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed );
+
+/// Get the revolute joint motor speed in radians per second
+B2_API float b2RevoluteJoint_GetMotorSpeed( b2JointId jointId );
+
+/// Get the revolute joint current motor torque, usually in newton-meters
+B2_API float b2RevoluteJoint_GetMotorTorque( b2JointId jointId );
+
+/// Set the revolute joint maximum motor torque, usually in newton-meters
+B2_API void b2RevoluteJoint_SetMaxMotorTorque( b2JointId jointId, float torque );
+
+/// Get the revolute joint maximum motor torque, usually in newton-meters
+B2_API float b2RevoluteJoint_GetMaxMotorTorque( b2JointId jointId );
+
+/**@}*/
+
+/**
+ * @defgroup weld_joint Weld Joint
+ * @brief A weld joint fully constrains the relative transform between two bodies while allowing for springiness
+ *
+ * A weld joint constrains the relative rotation and translation between two bodies. Both rotation and translation
+ * can have damped springs.
+ *
+ * @note The accuracy of weld joint is limited by the accuracy of the solver. Long chains of weld joints may flex.
+ * @{
+ */
+
+/// Create a weld joint
+/// @see b2WeldJointDef for details
+B2_API b2JointId b2CreateWeldJoint( b2WorldId worldId, const b2WeldJointDef* def );
+
+/// Get the weld joint reference angle in radians
+B2_API float b2WeldJoint_GetReferenceAngle( b2JointId jointId );
+
+/// Set the weld joint reference angle in radians, must be in [-pi,pi].
+B2_API void b2WeldJoint_SetReferenceAngle( b2JointId jointId, float angleInRadians );
+
+/// Set the weld joint linear stiffness in Hertz. 0 is rigid.
+B2_API void b2WeldJoint_SetLinearHertz( b2JointId jointId, float hertz );
+
+/// Get the weld joint linear stiffness in Hertz
+B2_API float b2WeldJoint_GetLinearHertz( b2JointId jointId );
+
+/// Set the weld joint linear damping ratio (non-dimensional)
+B2_API void b2WeldJoint_SetLinearDampingRatio( b2JointId jointId, float dampingRatio );
+
+/// Get the weld joint linear damping ratio (non-dimensional)
+B2_API float b2WeldJoint_GetLinearDampingRatio( b2JointId jointId );
+
+/// Set the weld joint angular stiffness in Hertz. 0 is rigid.
+B2_API void b2WeldJoint_SetAngularHertz( b2JointId jointId, float hertz );
+
+/// Get the weld joint angular stiffness in Hertz
+B2_API float b2WeldJoint_GetAngularHertz( b2JointId jointId );
+
+/// Set weld joint angular damping ratio, non-dimensional
+B2_API void b2WeldJoint_SetAngularDampingRatio( b2JointId jointId, float dampingRatio );
+
+/// Get the weld joint angular damping ratio, non-dimensional
+B2_API float b2WeldJoint_GetAngularDampingRatio( b2JointId jointId );
+
+/** @} */
+
+/**
+ * @defgroup wheel_joint Wheel Joint
+ * The wheel joint can be used to simulate wheels on vehicles.
+ *
+ * The wheel joint restricts body B to move along a local axis in body A. Body B is free to
+ * rotate. Supports a linear spring, linear limits, and a rotational motor.
+ *
+ * @{
+ */
+
+/// Create a wheel joint
+/// @see b2WheelJointDef for details
+B2_API b2JointId b2CreateWheelJoint( b2WorldId worldId, const b2WheelJointDef* def );
+
+/// Enable/disable the wheel joint spring
+B2_API void b2WheelJoint_EnableSpring( b2JointId jointId, bool enableSpring );
+
+/// Is the wheel joint spring enabled?
+B2_API bool b2WheelJoint_IsSpringEnabled( b2JointId jointId );
+
+/// Set the wheel joint stiffness in Hertz
+B2_API void b2WheelJoint_SetSpringHertz( b2JointId jointId, float hertz );
+
+/// Get the wheel joint stiffness in Hertz
+B2_API float b2WheelJoint_GetSpringHertz( b2JointId jointId );
+
+/// Set the wheel joint damping ratio, non-dimensional
+B2_API void b2WheelJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio );
+
+/// Get the wheel joint damping ratio, non-dimensional
+B2_API float b2WheelJoint_GetSpringDampingRatio( b2JointId jointId );
+
+/// Enable/disable the wheel joint limit
+B2_API void b2WheelJoint_EnableLimit( b2JointId jointId, bool enableLimit );
+
+/// Is the wheel joint limit enabled?
+B2_API bool b2WheelJoint_IsLimitEnabled( b2JointId jointId );
+
+/// Get the wheel joint lower limit
+B2_API float b2WheelJoint_GetLowerLimit( b2JointId jointId );
+
+/// Get the wheel joint upper limit
+B2_API float b2WheelJoint_GetUpperLimit( b2JointId jointId );
+
+/// Set the wheel joint limits
+B2_API void b2WheelJoint_SetLimits( b2JointId jointId, float lower, float upper );
+
+/// Enable/disable the wheel joint motor
+B2_API void b2WheelJoint_EnableMotor( b2JointId jointId, bool enableMotor );
+
+/// Is the wheel joint motor enabled?
+B2_API bool b2WheelJoint_IsMotorEnabled( b2JointId jointId );
+
+/// Set the wheel joint motor speed in radians per second
+B2_API void b2WheelJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed );
+
+/// Get the wheel joint motor speed in radians per second
+B2_API float b2WheelJoint_GetMotorSpeed( b2JointId jointId );
+
+/// Set the wheel joint maximum motor torque, usually in newton-meters
+B2_API void b2WheelJoint_SetMaxMotorTorque( b2JointId jointId, float torque );
+
+/// Get the wheel joint maximum motor torque, usually in newton-meters
+B2_API float b2WheelJoint_GetMaxMotorTorque( b2JointId jointId );
+
+/// Get the wheel joint current motor torque, usually in newton-meters
+B2_API float b2WheelJoint_GetMotorTorque( b2JointId jointId );
+
+/**@}*/
+
+/**@}*/
diff --git a/src/vendor/box2d/broad_phase.c b/src/vendor/box2d/broad_phase.c
new file mode 100644
index 0000000..c83729b
--- /dev/null
+++ b/src/vendor/box2d/broad_phase.c
@@ -0,0 +1,524 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#if defined( _MSC_VER ) && !defined( _CRT_SECURE_NO_WARNINGS )
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "broad_phase.h"
+
+#include "aabb.h"
+#include "array.h"
+#include "atomic.h"
+#include "body.h"
+#include "contact.h"
+#include "core.h"
+#include "shape.h"
+#include "arena_allocator.h"
+#include "world.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+// #include <stdio.h>
+
+// static FILE* s_file = NULL;
+
+void b2CreateBroadPhase( b2BroadPhase* bp )
+{
+	_Static_assert( b2_bodyTypeCount == 3, "must be three body types" );
+
+	// if (s_file == NULL)
+	//{
+	//	s_file = fopen("pairs01.txt", "a");
+	//	fprintf(s_file, "============\n\n");
+	// }
+
+	bp->proxyCount = 0;
+	bp->moveSet = b2CreateSet( 16 );
+	bp->moveArray = b2IntArray_Create( 16 );
+	bp->moveResults = NULL;
+	bp->movePairs = NULL;
+	bp->movePairCapacity = 0;
+	b2AtomicStoreInt(&bp->movePairIndex, 0);
+	bp->pairSet = b2CreateSet( 32 );
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		bp->trees[i] = b2DynamicTree_Create();
+	}
+}
+
+void b2DestroyBroadPhase( b2BroadPhase* bp )
+{
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2DynamicTree_Destroy( bp->trees + i );
+	}
+
+	b2DestroySet( &bp->moveSet );
+	b2IntArray_Destroy( &bp->moveArray );
+	b2DestroySet( &bp->pairSet );
+
+	memset( bp, 0, sizeof( b2BroadPhase ) );
+
+	// if (s_file != NULL)
+	//{
+	//	fclose(s_file);
+	//	s_file = NULL;
+	// }
+}
+
+static inline void b2UnBufferMove( b2BroadPhase* bp, int proxyKey )
+{
+	bool found = b2RemoveKey( &bp->moveSet, proxyKey + 1 );
+
+	if ( found )
+	{
+		// Purge from move buffer. Linear search.
+		// todo if I can iterate the move set then I don't need the moveArray
+		int count = bp->moveArray.count;
+		for ( int i = 0; i < count; ++i )
+		{
+			if ( bp->moveArray.data[i] == proxyKey )
+			{
+				b2IntArray_RemoveSwap( &bp->moveArray, i );
+				break;
+			}
+		}
+	}
+}
+
+int b2BroadPhase_CreateProxy( b2BroadPhase* bp, b2BodyType proxyType, b2AABB aabb, uint64_t categoryBits, int shapeIndex,
+							  bool forcePairCreation )
+{
+	B2_ASSERT( 0 <= proxyType && proxyType < b2_bodyTypeCount );
+	int proxyId = b2DynamicTree_CreateProxy( bp->trees + proxyType, aabb, categoryBits, shapeIndex );
+	int proxyKey = B2_PROXY_KEY( proxyId, proxyType );
+	if ( proxyType != b2_staticBody || forcePairCreation )
+	{
+		b2BufferMove( bp, proxyKey );
+	}
+	return proxyKey;
+}
+
+void b2BroadPhase_DestroyProxy( b2BroadPhase* bp, int proxyKey )
+{
+	B2_ASSERT( bp->moveArray.count == (int)bp->moveSet.count );
+	b2UnBufferMove( bp, proxyKey );
+
+	--bp->proxyCount;
+
+	b2BodyType proxyType = B2_PROXY_TYPE( proxyKey );
+	int proxyId = B2_PROXY_ID( proxyKey );
+
+	B2_ASSERT( 0 <= proxyType && proxyType <= b2_bodyTypeCount );
+	b2DynamicTree_DestroyProxy( bp->trees + proxyType, proxyId );
+}
+
+void b2BroadPhase_MoveProxy( b2BroadPhase* bp, int proxyKey, b2AABB aabb )
+{
+	b2BodyType proxyType = B2_PROXY_TYPE( proxyKey );
+	int proxyId = B2_PROXY_ID( proxyKey );
+
+	b2DynamicTree_MoveProxy( bp->trees + proxyType, proxyId, aabb );
+	b2BufferMove( bp, proxyKey );
+}
+
+void b2BroadPhase_EnlargeProxy( b2BroadPhase* bp, int proxyKey, b2AABB aabb )
+{
+	B2_ASSERT( proxyKey != B2_NULL_INDEX );
+	int typeIndex = B2_PROXY_TYPE( proxyKey );
+	int proxyId = B2_PROXY_ID( proxyKey );
+
+	B2_ASSERT( typeIndex != b2_staticBody );
+
+	b2DynamicTree_EnlargeProxy( bp->trees + typeIndex, proxyId, aabb );
+	b2BufferMove( bp, proxyKey );
+}
+
+typedef struct b2MovePair
+{
+	int shapeIndexA;
+	int shapeIndexB;
+	b2MovePair* next;
+	bool heap;
+} b2MovePair;
+
+typedef struct b2MoveResult
+{
+	b2MovePair* pairList;
+} b2MoveResult;
+
+typedef struct b2QueryPairContext
+{
+	b2World* world;
+	b2MoveResult* moveResult;
+	b2BodyType queryTreeType;
+	int queryProxyKey;
+	int queryShapeIndex;
+} b2QueryPairContext;
+
+// This is called from b2DynamicTree::Query when we are gathering pairs.
+static bool b2PairQueryCallback( int proxyId, uint64_t userData, void* context )
+{
+	int shapeId = (int)userData;
+
+	b2QueryPairContext* queryContext = context;
+	b2BroadPhase* broadPhase = &queryContext->world->broadPhase;
+
+	int proxyKey = B2_PROXY_KEY( proxyId, queryContext->queryTreeType );
+	int queryProxyKey = queryContext->queryProxyKey;
+
+	// A proxy cannot form a pair with itself.
+	if ( proxyKey == queryContext->queryProxyKey )
+	{
+		return true;
+	}
+
+	b2BodyType treeType = queryContext->queryTreeType;
+	b2BodyType queryProxyType = B2_PROXY_TYPE( queryProxyKey );
+
+	// De-duplication
+	// It is important to prevent duplicate contacts from being created. Ideally I can prevent duplicates
+	// early and in the worker. Most of the time the moveSet contains dynamic and kinematic proxies, but
+	// sometimes it has static proxies.
+
+	// I had an optimization here to skip checking the move set if this is a query into
+	// the static tree. The assumption is that the static proxies are never in the move set
+	// so there is no risk of duplication. However, this is not true with
+	// b2ShapeDef::forceContactCreation, b2ShapeDef::isSensor, or when a static shape is modified.
+	// There can easily be scenarios where the static proxy is in the moveSet but the dynamic proxy is not.
+	// I could have some flag to indicate that there are any static bodies in the moveSet.
+	
+	// Is this proxy also moving?
+	if ( queryProxyType == b2_dynamicBody)
+	{
+		if ( treeType == b2_dynamicBody && proxyKey < queryProxyKey)
+		{
+			bool moved = b2ContainsKey( &broadPhase->moveSet, proxyKey + 1 );
+			if ( moved )
+			{
+				// Both proxies are moving. Avoid duplicate pairs.
+				return true;
+			}
+		}
+	}
+	else
+	{
+		B2_ASSERT( treeType == b2_dynamicBody );
+		bool moved = b2ContainsKey( &broadPhase->moveSet, proxyKey + 1 );
+		if ( moved )
+		{
+			// Both proxies are moving. Avoid duplicate pairs.
+			return true;
+		}
+	}
+
+	uint64_t pairKey = B2_SHAPE_PAIR_KEY( shapeId, queryContext->queryShapeIndex );
+	if ( b2ContainsKey( &broadPhase->pairSet, pairKey ) )
+	{
+		// contact exists
+		return true;
+	}
+
+	int shapeIdA, shapeIdB;
+	if ( proxyKey < queryProxyKey )
+	{
+		shapeIdA = shapeId;
+		shapeIdB = queryContext->queryShapeIndex;
+	}
+	else
+	{
+		shapeIdA = queryContext->queryShapeIndex;
+		shapeIdB = shapeId;
+	}
+
+	b2World* world = queryContext->world;
+
+	b2Shape* shapeA = b2ShapeArray_Get( &world->shapes, shapeIdA );
+	b2Shape* shapeB = b2ShapeArray_Get( &world->shapes, shapeIdB );
+
+	int bodyIdA = shapeA->bodyId;
+	int bodyIdB = shapeB->bodyId;
+
+	// Are the shapes on the same body?
+	if ( bodyIdA == bodyIdB )
+	{
+		return true;
+	}
+
+	// Sensors are handled elsewhere
+	if ( shapeA->sensorIndex != B2_NULL_INDEX || shapeB->sensorIndex != B2_NULL_INDEX )
+	{
+		return true;
+	}
+
+	if ( b2ShouldShapesCollide( shapeA->filter, shapeB->filter ) == false )
+	{
+		return true;
+	}
+
+	// Does a joint override collision?
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, bodyIdA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, bodyIdB );
+	if ( b2ShouldBodiesCollide( world, bodyA, bodyB ) == false )
+	{
+		return true;
+	}
+
+	// Custom user filter
+	b2CustomFilterFcn* customFilterFcn = queryContext->world->customFilterFcn;
+	if ( customFilterFcn != NULL )
+	{
+		b2ShapeId idA = { shapeIdA + 1, world->worldId, shapeA->generation };
+		b2ShapeId idB = { shapeIdB + 1, world->worldId, shapeB->generation };
+		bool shouldCollide = customFilterFcn( idA, idB, queryContext->world->customFilterContext );
+		if ( shouldCollide == false )
+		{
+			return true;
+		}
+	}
+
+	// todo per thread to eliminate atomic?
+	int pairIndex = b2AtomicFetchAddInt( &broadPhase->movePairIndex, 1 );
+
+	b2MovePair* pair;
+	if ( pairIndex < broadPhase->movePairCapacity )
+	{
+		pair = broadPhase->movePairs + pairIndex;
+		pair->heap = false;
+	}
+	else
+	{
+		pair = b2Alloc( sizeof( b2MovePair ) );
+		pair->heap = true;
+	}
+
+	pair->shapeIndexA = shapeIdA;
+	pair->shapeIndexB = shapeIdB;
+	pair->next = queryContext->moveResult->pairList;
+	queryContext->moveResult->pairList = pair;
+
+	// continue the query
+	return true;
+}
+
+// Warning: writing to these globals significantly slows multithreading performance
+#if B2_SNOOP_PAIR_COUNTERS
+b2TreeStats b2_dynamicStats;
+b2TreeStats b2_kinematicStats;
+b2TreeStats b2_staticStats;
+#endif
+
+static void b2FindPairsTask( int startIndex, int endIndex, uint32_t threadIndex, void* context )
+{
+	b2TracyCZoneNC( pair_task, "Pair", b2_colorMediumSlateBlue, true );
+
+	B2_UNUSED( threadIndex );
+
+	b2World* world = context;
+	b2BroadPhase* bp = &world->broadPhase;
+
+	b2QueryPairContext queryContext;
+	queryContext.world = world;
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		// Initialize move result for this moved proxy
+		queryContext.moveResult = bp->moveResults + i;
+		queryContext.moveResult->pairList = NULL;
+
+		int proxyKey = bp->moveArray.data[i];
+		if ( proxyKey == B2_NULL_INDEX )
+		{
+			// proxy was destroyed after it moved
+			continue;
+		}
+
+		b2BodyType proxyType = B2_PROXY_TYPE( proxyKey );
+
+		int proxyId = B2_PROXY_ID( proxyKey );
+		queryContext.queryProxyKey = proxyKey;
+
+		const b2DynamicTree* baseTree = bp->trees + proxyType;
+
+		// We have to query the tree with the fat AABB so that
+		// we don't fail to create a contact that may touch later.
+		b2AABB fatAABB = b2DynamicTree_GetAABB( baseTree, proxyId );
+		queryContext.queryShapeIndex = (int)b2DynamicTree_GetUserData( baseTree, proxyId );
+
+		// Query trees. Only dynamic proxies collide with kinematic and static proxies.
+		// Using B2_DEFAULT_MASK_BITS so that b2Filter::groupIndex works.
+		b2TreeStats stats = { 0 };
+		if ( proxyType == b2_dynamicBody )
+		{
+			// consider using bits = groupIndex > 0 ? B2_DEFAULT_MASK_BITS : maskBits
+			queryContext.queryTreeType = b2_kinematicBody;
+			b2TreeStats statsKinematic = b2DynamicTree_Query( bp->trees + b2_kinematicBody, fatAABB, B2_DEFAULT_MASK_BITS, b2PairQueryCallback, &queryContext );
+			stats.nodeVisits += statsKinematic.nodeVisits;
+			stats.leafVisits += statsKinematic.leafVisits;
+
+			queryContext.queryTreeType = b2_staticBody;
+			b2TreeStats statsStatic = b2DynamicTree_Query( bp->trees + b2_staticBody, fatAABB, B2_DEFAULT_MASK_BITS, b2PairQueryCallback, &queryContext );
+			stats.nodeVisits += statsStatic.nodeVisits;
+			stats.leafVisits += statsStatic.leafVisits;
+		}
+
+		// All proxies collide with dynamic proxies
+		// Using B2_DEFAULT_MASK_BITS so that b2Filter::groupIndex works.
+		queryContext.queryTreeType = b2_dynamicBody;
+		b2TreeStats statsDynamic = b2DynamicTree_Query( bp->trees + b2_dynamicBody, fatAABB, B2_DEFAULT_MASK_BITS, b2PairQueryCallback, &queryContext );
+		stats.nodeVisits += statsDynamic.nodeVisits;
+		stats.leafVisits += statsDynamic.leafVisits;
+	}
+
+	b2TracyCZoneEnd( pair_task );
+}
+
+void b2UpdateBroadPhasePairs( b2World* world )
+{
+	b2BroadPhase* bp = &world->broadPhase;
+
+	int moveCount = bp->moveArray.count;
+	B2_ASSERT( moveCount == (int)bp->moveSet.count );
+
+	if ( moveCount == 0 )
+	{
+		return;
+	}
+
+	b2TracyCZoneNC( update_pairs, "Find Pairs", b2_colorMediumSlateBlue, true );
+
+	b2ArenaAllocator* alloc = &world->arena;
+
+	// todo these could be in the step context
+	bp->moveResults = b2AllocateArenaItem( alloc, moveCount * sizeof( b2MoveResult ), "move results" );
+	bp->movePairCapacity = 16 * moveCount;
+	bp->movePairs = b2AllocateArenaItem( alloc, bp->movePairCapacity * sizeof( b2MovePair ), "move pairs" );
+	b2AtomicStoreInt(&bp->movePairIndex, 0);
+
+#if B2_SNOOP_TABLE_COUNTERS
+	extern b2AtomicInt b2_probeCount;
+	b2AtomicStoreInt(&b2_probeCount, 0);
+#endif
+
+	int minRange = 64;
+	void* userPairTask = world->enqueueTaskFcn( &b2FindPairsTask, moveCount, minRange, world, world->userTaskContext );
+	if (userPairTask != NULL)
+	{
+		world->finishTaskFcn( userPairTask, world->userTaskContext );
+		world->taskCount += 1;
+	}
+
+	// todo_erin could start tree rebuild here
+
+	b2TracyCZoneNC( create_contacts, "Create Contacts", b2_colorCoral, true );
+
+	// Single-threaded work
+	// - Clear move flags
+	// - Create contacts in deterministic order
+	for ( int i = 0; i < moveCount; ++i )
+	{
+		b2MoveResult* result = bp->moveResults + i;
+		b2MovePair* pair = result->pairList;
+		while ( pair != NULL )
+		{
+			int shapeIdA = pair->shapeIndexA;
+			int shapeIdB = pair->shapeIndexB;
+
+			// if (s_file != NULL)
+			//{
+			//	fprintf(s_file, "%d %d\n", shapeIdA, shapeIdB);
+			// }
+
+			b2Shape* shapeA = b2ShapeArray_Get( &world->shapes, shapeIdA );
+			b2Shape* shapeB = b2ShapeArray_Get( &world->shapes, shapeIdB );
+
+			b2CreateContact( world, shapeA, shapeB );
+
+			if ( pair->heap )
+			{
+				b2MovePair* temp = pair;
+				pair = pair->next;
+				b2Free( temp, sizeof( b2MovePair ) );
+			}
+			else
+			{
+				pair = pair->next;
+			}
+		}
+
+		// if (s_file != NULL)
+		//{
+		//	fprintf(s_file, "\n");
+		// }
+	}
+
+	// if (s_file != NULL)
+	//{
+	//	fprintf(s_file, "count = %d\n\n", pairCount);
+	// }
+
+	// Reset move buffer
+	b2IntArray_Clear( &bp->moveArray );
+	b2ClearSet( &bp->moveSet );
+
+	b2FreeArenaItem( alloc, bp->movePairs );
+	bp->movePairs = NULL;
+	b2FreeArenaItem( alloc, bp->moveResults );
+	bp->moveResults = NULL;
+
+	b2ValidateSolverSets( world );
+
+	b2TracyCZoneEnd( create_contacts );
+
+	b2TracyCZoneEnd( update_pairs );
+}
+
+bool b2BroadPhase_TestOverlap( const b2BroadPhase* bp, int proxyKeyA, int proxyKeyB )
+{
+	int typeIndexA = B2_PROXY_TYPE( proxyKeyA );
+	int proxyIdA = B2_PROXY_ID( proxyKeyA );
+	int typeIndexB = B2_PROXY_TYPE( proxyKeyB );
+	int proxyIdB = B2_PROXY_ID( proxyKeyB );
+
+	b2AABB aabbA = b2DynamicTree_GetAABB( bp->trees + typeIndexA, proxyIdA );
+	b2AABB aabbB = b2DynamicTree_GetAABB( bp->trees + typeIndexB, proxyIdB );
+	return b2AABB_Overlaps( aabbA, aabbB );
+}
+
+void b2BroadPhase_RebuildTrees( b2BroadPhase* bp )
+{
+	b2DynamicTree_Rebuild( bp->trees + b2_dynamicBody, false );
+	b2DynamicTree_Rebuild( bp->trees + b2_kinematicBody, false );
+}
+
+int b2BroadPhase_GetShapeIndex( b2BroadPhase* bp, int proxyKey )
+{
+	int typeIndex = B2_PROXY_TYPE( proxyKey );
+	int proxyId = B2_PROXY_ID( proxyKey );
+
+	return (int)b2DynamicTree_GetUserData( bp->trees + typeIndex, proxyId );
+}
+
+void b2ValidateBroadphase( const b2BroadPhase* bp )
+{
+	b2DynamicTree_Validate( bp->trees + b2_dynamicBody );
+	b2DynamicTree_Validate( bp->trees + b2_kinematicBody );
+
+	// TODO_ERIN validate every shape AABB is contained in tree AABB
+}
+
+void b2ValidateNoEnlarged( const b2BroadPhase* bp )
+{
+#if B2_VALIDATE == 1
+	for ( int j = 0; j < b2_bodyTypeCount; ++j )
+	{
+		const b2DynamicTree* tree = bp->trees + j;
+		b2DynamicTree_ValidateNoEnlarged( tree );
+	}
+#else
+	B2_UNUSED( bp );
+#endif
+}
diff --git a/src/vendor/box2d/broad_phase.h b/src/vendor/box2d/broad_phase.h
new file mode 100644
index 0000000..43389ff
--- /dev/null
+++ b/src/vendor/box2d/broad_phase.h
@@ -0,0 +1,83 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+#include "table.h"
+
+#include "box2d/collision.h"
+#include "box2d/types.h"
+
+typedef struct b2Shape b2Shape;
+typedef struct b2MovePair b2MovePair;
+typedef struct b2MoveResult b2MoveResult;
+typedef struct b2ArenaAllocator b2ArenaAllocator;
+typedef struct b2World b2World;
+
+// Store the proxy type in the lower 2 bits of the proxy key. This leaves 30 bits for the id.
+#define B2_PROXY_TYPE( KEY ) ( (b2BodyType)( ( KEY ) & 3 ) )
+#define B2_PROXY_ID( KEY ) ( ( KEY ) >> 2 )
+#define B2_PROXY_KEY( ID, TYPE ) ( ( ( ID ) << 2 ) | ( TYPE ) )
+
+/// The broad-phase is used for computing pairs and performing volume queries and ray casts.
+/// This broad-phase does not persist pairs. Instead, this reports potentially new pairs.
+/// It is up to the client to consume the new pairs and to track subsequent overlap.
+typedef struct b2BroadPhase
+{
+	b2DynamicTree trees[b2_bodyTypeCount];
+	int proxyCount;
+
+	// The move set and array are used to track shapes that have moved significantly
+	// and need a pair query for new contacts. The array has a deterministic order.
+	// todo perhaps just a move set?
+	// todo implement a 32bit hash set for faster lookup
+	// todo moveSet can grow quite large on the first time step and remain large
+	b2HashSet moveSet;
+	b2IntArray moveArray;
+
+	// These are the results from the pair query and are used to create new contacts
+	// in deterministic order.
+	// todo these could be in the step context
+	b2MoveResult* moveResults;
+	b2MovePair* movePairs;
+	int movePairCapacity;
+	b2AtomicInt movePairIndex;
+
+	// Tracks shape pairs that have a b2Contact
+	// todo pairSet can grow quite large on the first time step and remain large
+	b2HashSet pairSet;
+
+} b2BroadPhase;
+
+void b2CreateBroadPhase( b2BroadPhase* bp );
+void b2DestroyBroadPhase( b2BroadPhase* bp );
+
+int b2BroadPhase_CreateProxy( b2BroadPhase* bp, b2BodyType proxyType, b2AABB aabb, uint64_t categoryBits, int shapeIndex,
+							  bool forcePairCreation );
+void b2BroadPhase_DestroyProxy( b2BroadPhase* bp, int proxyKey );
+
+void b2BroadPhase_MoveProxy( b2BroadPhase* bp, int proxyKey, b2AABB aabb );
+void b2BroadPhase_EnlargeProxy( b2BroadPhase* bp, int proxyKey, b2AABB aabb );
+
+void b2BroadPhase_RebuildTrees( b2BroadPhase* bp );
+
+int b2BroadPhase_GetShapeIndex( b2BroadPhase* bp, int proxyKey );
+
+void b2UpdateBroadPhasePairs( b2World* world );
+bool b2BroadPhase_TestOverlap( const b2BroadPhase* bp, int proxyKeyA, int proxyKeyB );
+
+void b2ValidateBroadphase( const b2BroadPhase* bp );
+void b2ValidateNoEnlarged( const b2BroadPhase* bp );
+
+// This is what triggers new contact pairs to be created
+// Warning: this must be called in deterministic order
+static inline void b2BufferMove( b2BroadPhase* bp, int queryProxy )
+{
+	// Adding 1 because 0 is the sentinel
+	bool alreadyAdded = b2AddKey( &bp->moveSet, queryProxy + 1 );
+	if ( alreadyAdded == false )
+	{
+		b2IntArray_Push( &bp->moveArray, queryProxy );
+	}
+}
diff --git a/src/vendor/box2d/collision.h b/src/vendor/box2d/collision.h
new file mode 100644
index 0000000..7ef93b0
--- /dev/null
+++ b/src/vendor/box2d/collision.h
@@ -0,0 +1,830 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "base.h"
+#include "math_functions.h"
+
+#include <stdbool.h>
+
+typedef struct b2SimplexCache b2SimplexCache;
+typedef struct b2Hull b2Hull;
+
+/**
+ * @defgroup geometry Geometry
+ * @brief Geometry types and algorithms
+ *
+ * Definitions of circles, capsules, segments, and polygons. Various algorithms to compute hulls, mass properties, and so on.
+ * @{
+ */
+
+/// The maximum number of vertices on a convex polygon. Changing this affects performance even if you
+/// don't use more vertices.
+#define B2_MAX_POLYGON_VERTICES 8
+
+/// Low level ray cast input data
+typedef struct b2RayCastInput
+{
+	/// Start point of the ray cast
+	b2Vec2 origin;
+
+	/// Translation of the ray cast
+	b2Vec2 translation;
+
+	/// The maximum fraction of the translation to consider, typically 1
+	float maxFraction;
+} b2RayCastInput;
+
+/// A distance proxy is used by the GJK algorithm. It encapsulates any shape.
+/// You can provide between 1 and B2_MAX_POLYGON_VERTICES and a radius.
+typedef struct b2ShapeProxy
+{
+	/// The point cloud
+	b2Vec2 points[B2_MAX_POLYGON_VERTICES];
+
+	/// The number of points. Must be greater than 0.
+	int count;
+
+	/// The external radius of the point cloud. May be zero.
+	float radius;
+} b2ShapeProxy;
+
+/// Low level shape cast input in generic form. This allows casting an arbitrary point
+/// cloud wrap with a radius. For example, a circle is a single point with a non-zero radius.
+/// A capsule is two points with a non-zero radius. A box is four points with a zero radius.
+typedef struct b2ShapeCastInput
+{
+	/// A generic shape
+	b2ShapeProxy proxy;
+
+	/// The translation of the shape cast
+	b2Vec2 translation;
+
+	/// The maximum fraction of the translation to consider, typically 1
+	float maxFraction;
+
+	/// Allow shape cast to encroach when initially touching. This only works if the radius is greater than zero.
+	bool canEncroach;
+} b2ShapeCastInput;
+
+/// Low level ray cast or shape-cast output data
+typedef struct b2CastOutput
+{
+	/// The surface normal at the hit point
+	b2Vec2 normal;
+
+	/// The surface hit point
+	b2Vec2 point;
+
+	/// The fraction of the input translation at collision
+	float fraction;
+
+	/// The number of iterations used
+	int iterations;
+
+	/// Did the cast hit?
+	bool hit;
+} b2CastOutput;
+
+/// This holds the mass data computed for a shape.
+typedef struct b2MassData
+{
+	/// The mass of the shape, usually in kilograms.
+	float mass;
+
+	/// The position of the shape's centroid relative to the shape's origin.
+	b2Vec2 center;
+
+	/// The rotational inertia of the shape about the local origin.
+	float rotationalInertia;
+} b2MassData;
+
+/// A solid circle
+typedef struct b2Circle
+{
+	/// The local center
+	b2Vec2 center;
+
+	/// The radius
+	float radius;
+} b2Circle;
+
+/// A solid capsule can be viewed as two semicircles connected
+/// by a rectangle.
+typedef struct b2Capsule
+{
+	/// Local center of the first semicircle
+	b2Vec2 center1;
+
+	/// Local center of the second semicircle
+	b2Vec2 center2;
+
+	/// The radius of the semicircles
+	float radius;
+} b2Capsule;
+
+/// A solid convex polygon. It is assumed that the interior of the polygon is to
+/// the left of each edge.
+/// Polygons have a maximum number of vertices equal to B2_MAX_POLYGON_VERTICES.
+/// In most cases you should not need many vertices for a convex polygon.
+/// @warning DO NOT fill this out manually, instead use a helper function like
+/// b2MakePolygon or b2MakeBox.
+typedef struct b2Polygon
+{
+	/// The polygon vertices
+	b2Vec2 vertices[B2_MAX_POLYGON_VERTICES];
+
+	/// The outward normal vectors of the polygon sides
+	b2Vec2 normals[B2_MAX_POLYGON_VERTICES];
+
+	/// The centroid of the polygon
+	b2Vec2 centroid;
+
+	/// The external radius for rounded polygons
+	float radius;
+
+	/// The number of polygon vertices
+	int count;
+} b2Polygon;
+
+/// A line segment with two-sided collision.
+typedef struct b2Segment
+{
+	/// The first point
+	b2Vec2 point1;
+
+	/// The second point
+	b2Vec2 point2;
+} b2Segment;
+
+/// A line segment with one-sided collision. Only collides on the right side.
+/// Several of these are generated for a chain shape.
+/// ghost1 -> point1 -> point2 -> ghost2
+typedef struct b2ChainSegment
+{
+	/// The tail ghost vertex
+	b2Vec2 ghost1;
+
+	/// The line segment
+	b2Segment segment;
+
+	/// The head ghost vertex
+	b2Vec2 ghost2;
+
+	/// The owning chain shape index (internal usage only)
+	int chainId;
+} b2ChainSegment;
+
+/// Validate ray cast input data (NaN, etc)
+B2_API bool b2IsValidRay( const b2RayCastInput* input );
+
+/// Make a convex polygon from a convex hull. This will assert if the hull is not valid.
+/// @warning Do not manually fill in the hull data, it must come directly from b2ComputeHull
+B2_API b2Polygon b2MakePolygon( const b2Hull* hull, float radius );
+
+/// Make an offset convex polygon from a convex hull. This will assert if the hull is not valid.
+/// @warning Do not manually fill in the hull data, it must come directly from b2ComputeHull
+B2_API b2Polygon b2MakeOffsetPolygon( const b2Hull* hull, b2Vec2 position, b2Rot rotation );
+
+/// Make an offset convex polygon from a convex hull. This will assert if the hull is not valid.
+/// @warning Do not manually fill in the hull data, it must come directly from b2ComputeHull
+B2_API b2Polygon b2MakeOffsetRoundedPolygon( const b2Hull* hull, b2Vec2 position, b2Rot rotation, float radius );
+
+/// Make a square polygon, bypassing the need for a convex hull.
+/// @param halfWidth the half-width
+B2_API b2Polygon b2MakeSquare( float halfWidth );
+
+/// Make a box (rectangle) polygon, bypassing the need for a convex hull.
+/// @param halfWidth the half-width (x-axis)
+/// @param halfHeight the half-height (y-axis)
+B2_API b2Polygon b2MakeBox( float halfWidth, float halfHeight );
+
+/// Make a rounded box, bypassing the need for a convex hull.
+/// @param halfWidth the half-width (x-axis)
+/// @param halfHeight the half-height (y-axis)
+/// @param radius the radius of the rounded extension
+B2_API b2Polygon b2MakeRoundedBox( float halfWidth, float halfHeight, float radius );
+
+/// Make an offset box, bypassing the need for a convex hull.
+/// @param halfWidth the half-width (x-axis)
+/// @param halfHeight the half-height (y-axis)
+/// @param center the local center of the box
+/// @param rotation the local rotation of the box
+B2_API b2Polygon b2MakeOffsetBox( float halfWidth, float halfHeight, b2Vec2 center, b2Rot rotation );
+
+/// Make an offset rounded box, bypassing the need for a convex hull.
+/// @param halfWidth the half-width (x-axis)
+/// @param halfHeight the half-height (y-axis)
+/// @param center the local center of the box
+/// @param rotation the local rotation of the box
+/// @param radius the radius of the rounded extension
+B2_API b2Polygon b2MakeOffsetRoundedBox( float halfWidth, float halfHeight, b2Vec2 center, b2Rot rotation, float radius );
+
+/// Transform a polygon. This is useful for transferring a shape from one body to another.
+B2_API b2Polygon b2TransformPolygon( b2Transform transform, const b2Polygon* polygon );
+
+/// Compute mass properties of a circle
+B2_API b2MassData b2ComputeCircleMass( const b2Circle* shape, float density );
+
+/// Compute mass properties of a capsule
+B2_API b2MassData b2ComputeCapsuleMass( const b2Capsule* shape, float density );
+
+/// Compute mass properties of a polygon
+B2_API b2MassData b2ComputePolygonMass( const b2Polygon* shape, float density );
+
+/// Compute the bounding box of a transformed circle
+B2_API b2AABB b2ComputeCircleAABB( const b2Circle* shape, b2Transform transform );
+
+/// Compute the bounding box of a transformed capsule
+B2_API b2AABB b2ComputeCapsuleAABB( const b2Capsule* shape, b2Transform transform );
+
+/// Compute the bounding box of a transformed polygon
+B2_API b2AABB b2ComputePolygonAABB( const b2Polygon* shape, b2Transform transform );
+
+/// Compute the bounding box of a transformed line segment
+B2_API b2AABB b2ComputeSegmentAABB( const b2Segment* shape, b2Transform transform );
+
+/// Test a point for overlap with a circle in local space
+B2_API bool b2PointInCircle( b2Vec2 point, const b2Circle* shape );
+
+/// Test a point for overlap with a capsule in local space
+B2_API bool b2PointInCapsule( b2Vec2 point, const b2Capsule* shape );
+
+/// Test a point for overlap with a convex polygon in local space
+B2_API bool b2PointInPolygon( b2Vec2 point, const b2Polygon* shape );
+
+/// Ray cast versus circle shape in local space. Initial overlap is treated as a miss.
+B2_API b2CastOutput b2RayCastCircle( const b2RayCastInput* input, const b2Circle* shape );
+
+/// Ray cast versus capsule shape in local space. Initial overlap is treated as a miss.
+B2_API b2CastOutput b2RayCastCapsule( const b2RayCastInput* input, const b2Capsule* shape );
+
+/// Ray cast versus segment shape in local space. Optionally treat the segment as one-sided with hits from
+/// the left side being treated as a miss.
+B2_API b2CastOutput b2RayCastSegment( const b2RayCastInput* input, const b2Segment* shape, bool oneSided );
+
+/// Ray cast versus polygon shape in local space. Initial overlap is treated as a miss.
+B2_API b2CastOutput b2RayCastPolygon( const b2RayCastInput* input, const b2Polygon* shape );
+
+/// Shape cast versus a circle. Initial overlap is treated as a miss.
+B2_API b2CastOutput b2ShapeCastCircle( const b2ShapeCastInput* input, const b2Circle* shape );
+
+/// Shape cast versus a capsule. Initial overlap is treated as a miss.
+B2_API b2CastOutput b2ShapeCastCapsule( const b2ShapeCastInput* input, const b2Capsule* shape );
+
+/// Shape cast versus a line segment. Initial overlap is treated as a miss.
+B2_API b2CastOutput b2ShapeCastSegment( const b2ShapeCastInput* input, const b2Segment* shape );
+
+/// Shape cast versus a convex polygon. Initial overlap is treated as a miss.
+B2_API b2CastOutput b2ShapeCastPolygon( const b2ShapeCastInput* input, const b2Polygon* shape );
+
+/// A convex hull. Used to create convex polygons.
+/// @warning Do not modify these values directly, instead use b2ComputeHull()
+typedef struct b2Hull
+{
+	/// The final points of the hull
+	b2Vec2 points[B2_MAX_POLYGON_VERTICES];
+
+	/// The number of points
+	int count;
+} b2Hull;
+
+/// Compute the convex hull of a set of points. Returns an empty hull if it fails.
+/// Some failure cases:
+/// - all points very close together
+/// - all points on a line
+/// - less than 3 points
+/// - more than B2_MAX_POLYGON_VERTICES points
+/// This welds close points and removes collinear points.
+/// @warning Do not modify a hull once it has been computed
+B2_API b2Hull b2ComputeHull( const b2Vec2* points, int count );
+
+/// This determines if a hull is valid. Checks for:
+/// - convexity
+/// - collinear points
+/// This is expensive and should not be called at runtime.
+B2_API bool b2ValidateHull( const b2Hull* hull );
+
+/**@}*/
+
+/**
+ * @defgroup distance Distance
+ * Functions for computing the distance between shapes.
+ *
+ * These are advanced functions you can use to perform distance calculations. There
+ * are functions for computing the closest points between shapes, doing linear shape casts,
+ * and doing rotational shape casts. The latter is called time of impact (TOI).
+ * @{
+ */
+
+/// Result of computing the distance between two line segments
+typedef struct b2SegmentDistanceResult
+{
+	/// The closest point on the first segment
+	b2Vec2 closest1;
+
+	/// The closest point on the second segment
+	b2Vec2 closest2;
+
+	/// The barycentric coordinate on the first segment
+	float fraction1;
+
+	/// The barycentric coordinate on the second segment
+	float fraction2;
+
+	/// The squared distance between the closest points
+	float distanceSquared;
+} b2SegmentDistanceResult;
+
+/// Compute the distance between two line segments, clamping at the end points if needed.
+B2_API b2SegmentDistanceResult b2SegmentDistance( b2Vec2 p1, b2Vec2 q1, b2Vec2 p2, b2Vec2 q2 );
+
+/// Used to warm start the GJK simplex. If you call this function multiple times with nearby
+/// transforms this might improve performance. Otherwise you can zero initialize this.
+/// The distance cache must be initialized to zero on the first call.
+/// Users should generally just zero initialize this structure for each call.
+typedef struct b2SimplexCache
+{
+	/// The number of stored simplex points
+	uint16_t count;
+
+	/// The cached simplex indices on shape A
+	uint8_t indexA[3];
+
+	/// The cached simplex indices on shape B
+	uint8_t indexB[3];
+} b2SimplexCache;
+
+static const b2SimplexCache b2_emptySimplexCache = B2_ZERO_INIT;
+
+/// Input for b2ShapeDistance
+typedef struct b2DistanceInput
+{
+	/// The proxy for shape A
+	b2ShapeProxy proxyA;
+
+	/// The proxy for shape B
+	b2ShapeProxy proxyB;
+
+	/// The world transform for shape A
+	b2Transform transformA;
+
+	/// The world transform for shape B
+	b2Transform transformB;
+
+	/// Should the proxy radius be considered?
+	bool useRadii;
+} b2DistanceInput;
+
+/// Output for b2ShapeDistance
+typedef struct b2DistanceOutput
+{
+	b2Vec2 pointA;	  ///< Closest point on shapeA
+	b2Vec2 pointB;	  ///< Closest point on shapeB
+	b2Vec2 normal;	  ///< Normal vector that points from A to B
+	float distance;	  ///< The final distance, zero if overlapped
+	int iterations;	  ///< Number of GJK iterations used
+	int simplexCount; ///< The number of simplexes stored in the simplex array
+} b2DistanceOutput;
+
+/// Simplex vertex for debugging the GJK algorithm
+typedef struct b2SimplexVertex
+{
+	b2Vec2 wA;	///< support point in proxyA
+	b2Vec2 wB;	///< support point in proxyB
+	b2Vec2 w;	///< wB - wA
+	float a;	///< barycentric coordinate for closest point
+	int indexA; ///< wA index
+	int indexB; ///< wB index
+} b2SimplexVertex;
+
+/// Simplex from the GJK algorithm
+typedef struct b2Simplex
+{
+	b2SimplexVertex v1, v2, v3; ///< vertices
+	int count;					///< number of valid vertices
+} b2Simplex;
+
+/// Compute the closest points between two shapes represented as point clouds.
+/// b2SimplexCache cache is input/output. On the first call set b2SimplexCache.count to zero.
+/// The underlying GJK algorithm may be debugged by passing in debug simplexes and capacity. You may pass in NULL and 0 for these.
+B2_API b2DistanceOutput b2ShapeDistance( const b2DistanceInput* input, b2SimplexCache* cache, b2Simplex* simplexes,
+										 int simplexCapacity );
+
+/// Input parameters for b2ShapeCast
+typedef struct b2ShapeCastPairInput
+{
+	b2ShapeProxy proxyA;	///< The proxy for shape A
+	b2ShapeProxy proxyB;	///< The proxy for shape B
+	b2Transform transformA; ///< The world transform for shape A
+	b2Transform transformB; ///< The world transform for shape B
+	b2Vec2 translationB;	///< The translation of shape B
+	float maxFraction;		///< The fraction of the translation to consider, typically 1
+	bool canEncroach;		///< Allows shapes with a radius to move slightly closer if already touching
+} b2ShapeCastPairInput;
+
+/// Perform a linear shape cast of shape B moving and shape A fixed. Determines the hit point, normal, and translation fraction.
+/// You may optionally supply an array to hold debug data.
+B2_API b2CastOutput b2ShapeCast( const b2ShapeCastPairInput* input);
+
+/// Make a proxy for use in overlap, shape cast, and related functions. This is a deep copy of the points.
+B2_API b2ShapeProxy b2MakeProxy( const b2Vec2* points, int count, float radius );
+
+/// Make a proxy with a transform. This is a deep copy of the points.
+B2_API b2ShapeProxy b2MakeOffsetProxy( const b2Vec2* points, int count, float radius, b2Vec2 position, b2Rot rotation );
+
+/// This describes the motion of a body/shape for TOI computation. Shapes are defined with respect to the body origin,
+/// which may not coincide with the center of mass. However, to support dynamics we must interpolate the center of mass
+/// position.
+typedef struct b2Sweep
+{
+	b2Vec2 localCenter; ///< Local center of mass position
+	b2Vec2 c1;			///< Starting center of mass world position
+	b2Vec2 c2;			///< Ending center of mass world position
+	b2Rot q1;			///< Starting world rotation
+	b2Rot q2;			///< Ending world rotation
+} b2Sweep;
+
+/// Evaluate the transform sweep at a specific time.
+B2_API b2Transform b2GetSweepTransform( const b2Sweep* sweep, float time );
+
+/// Input parameters for b2TimeOfImpact
+typedef struct b2TOIInput
+{
+	b2ShapeProxy proxyA; ///< The proxy for shape A
+	b2ShapeProxy proxyB; ///< The proxy for shape B
+	b2Sweep sweepA;		 ///< The movement of shape A
+	b2Sweep sweepB;		 ///< The movement of shape B
+	float maxFraction;	 ///< Defines the sweep interval [0, maxFraction]
+} b2TOIInput;
+
+/// Describes the TOI output
+typedef enum b2TOIState
+{
+	b2_toiStateUnknown,
+	b2_toiStateFailed,
+	b2_toiStateOverlapped,
+	b2_toiStateHit,
+	b2_toiStateSeparated
+} b2TOIState;
+
+/// Output parameters for b2TimeOfImpact.
+typedef struct b2TOIOutput
+{
+	b2TOIState state; ///< The type of result
+	float fraction;	  ///< The sweep time of the collision
+} b2TOIOutput;
+
+/// Compute the upper bound on time before two shapes penetrate. Time is represented as
+/// a fraction between [0,tMax]. This uses a swept separating axis and may miss some intermediate,
+/// non-tunneling collisions. If you change the time interval, you should call this function
+/// again.
+B2_API b2TOIOutput b2TimeOfImpact( const b2TOIInput* input );
+
+/**@}*/
+
+/**
+ * @defgroup collision Collision
+ * @brief Functions for colliding pairs of shapes
+ * @{
+ */
+
+/// A manifold point is a contact point belonging to a contact manifold.
+/// It holds details related to the geometry and dynamics of the contact points.
+/// Box2D uses speculative collision so some contact points may be separated.
+/// You may use the totalNormalImpulse to determine if there was an interaction during
+/// the time step.
+typedef struct b2ManifoldPoint
+{
+	/// Location of the contact point in world space. Subject to precision loss at large coordinates.
+	/// @note Should only be used for debugging.
+	b2Vec2 point;
+
+	/// Location of the contact point relative to shapeA's origin in world space
+	/// @note When used internally to the Box2D solver, this is relative to the body center of mass.
+	b2Vec2 anchorA;
+
+	/// Location of the contact point relative to shapeB's origin in world space
+	/// @note When used internally to the Box2D solver, this is relative to the body center of mass.
+	b2Vec2 anchorB;
+
+	/// The separation of the contact point, negative if penetrating
+	float separation;
+
+	/// The impulse along the manifold normal vector.
+	float normalImpulse;
+
+	/// The friction impulse
+	float tangentImpulse;
+
+	/// The total normal impulse applied across sub-stepping and restitution. This is important
+	/// to identify speculative contact points that had an interaction in the time step.
+	float totalNormalImpulse;
+
+	/// Relative normal velocity pre-solve. Used for hit events. If the normal impulse is
+	/// zero then there was no hit. Negative means shapes are approaching.
+	float normalVelocity;
+
+	/// Uniquely identifies a contact point between two shapes
+	uint16_t id;
+
+	/// Did this contact point exist the previous step?
+	bool persisted;
+} b2ManifoldPoint;
+
+/// A contact manifold describes the contact points between colliding shapes.
+/// @note Box2D uses speculative collision so some contact points may be separated.
+typedef struct b2Manifold
+{
+	/// The unit normal vector in world space, points from shape A to bodyB
+	b2Vec2 normal;
+
+	/// Angular impulse applied for rolling resistance. N * m * s = kg * m^2 / s
+	float rollingImpulse;
+
+	/// The manifold points, up to two are possible in 2D
+	b2ManifoldPoint points[2];
+
+	/// The number of contacts points, will be 0, 1, or 2
+	int pointCount;
+
+} b2Manifold;
+
+/// Compute the contact manifold between two circles
+B2_API b2Manifold b2CollideCircles( const b2Circle* circleA, b2Transform xfA, const b2Circle* circleB, b2Transform xfB );
+
+/// Compute the contact manifold between a capsule and circle
+B2_API b2Manifold b2CollideCapsuleAndCircle( const b2Capsule* capsuleA, b2Transform xfA, const b2Circle* circleB,
+											 b2Transform xfB );
+
+/// Compute the contact manifold between an segment and a circle
+B2_API b2Manifold b2CollideSegmentAndCircle( const b2Segment* segmentA, b2Transform xfA, const b2Circle* circleB,
+											 b2Transform xfB );
+
+/// Compute the contact manifold between a polygon and a circle
+B2_API b2Manifold b2CollidePolygonAndCircle( const b2Polygon* polygonA, b2Transform xfA, const b2Circle* circleB,
+											 b2Transform xfB );
+
+/// Compute the contact manifold between a capsule and circle
+B2_API b2Manifold b2CollideCapsules( const b2Capsule* capsuleA, b2Transform xfA, const b2Capsule* capsuleB, b2Transform xfB );
+
+/// Compute the contact manifold between an segment and a capsule
+B2_API b2Manifold b2CollideSegmentAndCapsule( const b2Segment* segmentA, b2Transform xfA, const b2Capsule* capsuleB,
+											  b2Transform xfB );
+
+/// Compute the contact manifold between a polygon and capsule
+B2_API b2Manifold b2CollidePolygonAndCapsule( const b2Polygon* polygonA, b2Transform xfA, const b2Capsule* capsuleB,
+											  b2Transform xfB );
+
+/// Compute the contact manifold between two polygons
+B2_API b2Manifold b2CollidePolygons( const b2Polygon* polygonA, b2Transform xfA, const b2Polygon* polygonB, b2Transform xfB );
+
+/// Compute the contact manifold between an segment and a polygon
+B2_API b2Manifold b2CollideSegmentAndPolygon( const b2Segment* segmentA, b2Transform xfA, const b2Polygon* polygonB,
+											  b2Transform xfB );
+
+/// Compute the contact manifold between a chain segment and a circle
+B2_API b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA, const b2Circle* circleB,
+												  b2Transform xfB );
+
+/// Compute the contact manifold between a chain segment and a capsule
+B2_API b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA, const b2Capsule* capsuleB,
+												   b2Transform xfB, b2SimplexCache* cache );
+
+/// Compute the contact manifold between a chain segment and a rounded polygon
+B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA, const b2Polygon* polygonB,
+												   b2Transform xfB, b2SimplexCache* cache );
+
+/**@}*/
+
+/**
+ * @defgroup tree Dynamic Tree
+ * The dynamic tree is a binary AABB tree to organize and query large numbers of geometric objects
+ *
+ * Box2D uses the dynamic tree internally to sort collision shapes into a binary bounding volume hierarchy.
+ * This data structure may have uses in games for organizing other geometry data and may be used independently
+ * of Box2D rigid body simulation.
+ *
+ * A dynamic AABB tree broad-phase, inspired by Nathanael Presson's btDbvt.
+ * A dynamic tree arranges data in a binary tree to accelerate
+ * queries such as AABB queries and ray casts. Leaf nodes are proxies
+ * with an AABB. These are used to hold a user collision object.
+ * Nodes are pooled and relocatable, so I use node indices rather than pointers.
+ * The dynamic tree is made available for advanced users that would like to use it to organize
+ * spatial game data besides rigid bodies.
+ * @{
+ */
+
+/// The dynamic tree structure. This should be considered private data.
+/// It is placed here for performance reasons.
+typedef struct b2DynamicTree
+{
+	/// The tree nodes
+	struct b2TreeNode* nodes;
+
+	/// The root index
+	int root;
+
+	/// The number of nodes
+	int nodeCount;
+
+	/// The allocated node space
+	int nodeCapacity;
+
+	/// Node free list
+	int freeList;
+
+	/// Number of proxies created
+	int proxyCount;
+
+	/// Leaf indices for rebuild
+	int* leafIndices;
+
+	/// Leaf bounding boxes for rebuild
+	b2AABB* leafBoxes;
+
+	/// Leaf bounding box centers for rebuild
+	b2Vec2* leafCenters;
+
+	/// Bins for sorting during rebuild
+	int* binIndices;
+
+	/// Allocated space for rebuilding
+	int rebuildCapacity;
+} b2DynamicTree;
+
+/// These are performance results returned by dynamic tree queries.
+typedef struct b2TreeStats
+{
+	/// Number of internal nodes visited during the query
+	int nodeVisits;
+
+	/// Number of leaf nodes visited during the query
+	int leafVisits;
+} b2TreeStats;
+
+/// Constructing the tree initializes the node pool.
+B2_API b2DynamicTree b2DynamicTree_Create( void );
+
+/// Destroy the tree, freeing the node pool.
+B2_API void b2DynamicTree_Destroy( b2DynamicTree* tree );
+
+/// Create a proxy. Provide an AABB and a userData value.
+B2_API int b2DynamicTree_CreateProxy( b2DynamicTree* tree, b2AABB aabb, uint64_t categoryBits, uint64_t userData );
+
+/// Destroy a proxy. This asserts if the id is invalid.
+B2_API void b2DynamicTree_DestroyProxy( b2DynamicTree* tree, int proxyId );
+
+/// Move a proxy to a new AABB by removing and reinserting into the tree.
+B2_API void b2DynamicTree_MoveProxy( b2DynamicTree* tree, int proxyId, b2AABB aabb );
+
+/// Enlarge a proxy and enlarge ancestors as necessary.
+B2_API void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int proxyId, b2AABB aabb );
+
+/// Modify the category bits on a proxy. This is an expensive operation.
+B2_API void b2DynamicTree_SetCategoryBits( b2DynamicTree* tree, int proxyId, uint64_t categoryBits );
+
+/// Get the category bits on a proxy.
+B2_API uint64_t b2DynamicTree_GetCategoryBits( b2DynamicTree* tree, int proxyId );
+
+/// This function receives proxies found in the AABB query.
+/// @return true if the query should continue
+typedef bool b2TreeQueryCallbackFcn( int proxyId, uint64_t userData, void* context );
+
+/// Query an AABB for overlapping proxies. The callback class is called for each proxy that overlaps the supplied AABB.
+///	@return performance data
+B2_API b2TreeStats b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits,
+										b2TreeQueryCallbackFcn* callback, void* context );
+
+/// This function receives clipped ray cast input for a proxy. The function
+/// returns the new ray fraction.
+/// - return a value of 0 to terminate the ray cast
+/// - return a value less than input->maxFraction to clip the ray
+/// - return a value of input->maxFraction to continue the ray cast without clipping
+typedef float b2TreeRayCastCallbackFcn( const b2RayCastInput* input, int proxyId, uint64_t userData, void* context );
+
+/// Ray cast against the proxies in the tree. This relies on the callback
+/// to perform a exact ray cast in the case were the proxy contains a shape.
+/// The callback also performs the any collision filtering. This has performance
+/// roughly equal to k * log(n), where k is the number of collisions and n is the
+/// number of proxies in the tree.
+/// Bit-wise filtering using mask bits can greatly improve performance in some scenarios.
+///	However, this filtering may be approximate, so the user should still apply filtering to results.
+/// @param tree the dynamic tree to ray cast
+/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1)
+/// @param maskBits mask bit hint: `bool accept = (maskBits & node->categoryBits) != 0;`
+/// @param callback a callback class that is called for each proxy that is hit by the ray
+/// @param context user context that is passed to the callback
+///	@return performance data
+B2_API b2TreeStats b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits,
+										  b2TreeRayCastCallbackFcn* callback, void* context );
+
+/// This function receives clipped ray cast input for a proxy. The function
+/// returns the new ray fraction.
+/// - return a value of 0 to terminate the ray cast
+/// - return a value less than input->maxFraction to clip the ray
+/// - return a value of input->maxFraction to continue the ray cast without clipping
+typedef float b2TreeShapeCastCallbackFcn( const b2ShapeCastInput* input, int proxyId, uint64_t userData, void* context );
+
+/// Ray cast against the proxies in the tree. This relies on the callback
+/// to perform a exact ray cast in the case were the proxy contains a shape.
+/// The callback also performs the any collision filtering. This has performance
+/// roughly equal to k * log(n), where k is the number of collisions and n is the
+/// number of proxies in the tree.
+/// @param tree the dynamic tree to ray cast
+/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1).
+/// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;`
+/// @param callback a callback class that is called for each proxy that is hit by the shape
+/// @param context user context that is passed to the callback
+///	@return performance data
+B2_API b2TreeStats b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits,
+											b2TreeShapeCastCallbackFcn* callback, void* context );
+
+/// Get the height of the binary tree.
+B2_API int b2DynamicTree_GetHeight( const b2DynamicTree* tree );
+
+/// Get the ratio of the sum of the node areas to the root area.
+B2_API float b2DynamicTree_GetAreaRatio( const b2DynamicTree* tree );
+
+/// Get the bounding box that contains the entire tree
+B2_API b2AABB b2DynamicTree_GetRootBounds( const b2DynamicTree* tree );
+
+/// Get the number of proxies created
+B2_API int b2DynamicTree_GetProxyCount( const b2DynamicTree* tree );
+
+/// Rebuild the tree while retaining subtrees that haven't changed. Returns the number of boxes sorted.
+B2_API int b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild );
+
+/// Get the number of bytes used by this tree
+B2_API int b2DynamicTree_GetByteCount( const b2DynamicTree* tree );
+
+/// Get proxy user data
+B2_API uint64_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int proxyId );
+
+/// Get the AABB of a proxy
+B2_API b2AABB b2DynamicTree_GetAABB( const b2DynamicTree* tree, int proxyId );
+
+/// Validate this tree. For testing.
+B2_API void b2DynamicTree_Validate( const b2DynamicTree* tree );
+
+/// Validate this tree has no enlarged AABBs. For testing.
+B2_API void b2DynamicTree_ValidateNoEnlarged( const b2DynamicTree* tree );
+
+/**@}*/
+
+/**
+ * @defgroup character
+ * Character movement solver
+ * @{
+ */
+
+/// These are the collision planes returned from b2World_CollideMover
+typedef struct b2PlaneResult
+{
+	/// The collision plane between the mover and a convex shape
+	b2Plane plane;
+
+	/// Did the collision register a hit? If not this plane should be ignored.
+	bool hit;
+} b2PlaneResult;
+
+/// These are collision planes that can be fed to b2SolvePlanes. Normally
+/// this is assembled by the user from plane results in b2PlaneResult
+typedef struct b2CollisionPlane
+{
+	/// The collision plane between the mover and some shape
+	b2Plane plane;
+
+	/// Setting this to FLT_MAX makes the plane as rigid as possible. Lower values can
+	/// make the plane collision soft. Usually in meters.
+	float pushLimit;
+
+	/// The push on the mover determined by b2SolvePlanes. Usually in meters.
+	float push;
+
+	/// Indicates if b2ClipVector should clip against this plane. Should be false for soft collision.
+	bool clipVelocity;
+} b2CollisionPlane;
+
+/// Result returned by b2SolvePlanes
+typedef struct b2PlaneSolverResult
+{
+	/// The final position of the mover
+	b2Vec2 position;
+
+	/// The number of iterations used by the plane solver. For diagnostics.
+	int iterationCount;
+} b2PlaneSolverResult;
+
+/// Solves the position of a mover that satisfies the given collision planes.
+/// @param position this must be the position used to generate the collision planes
+/// @param planes the collision planes
+/// @param count the number of collision planes
+B2_API b2PlaneSolverResult b2SolvePlanes( b2Vec2 position, b2CollisionPlane* planes, int count );
+
+/// Clips the velocity against the given collision planes. Planes with clipVelocity set to
+/// true are skipped.
+B2_API b2Vec2 b2ClipVector( b2Vec2 vector, const b2CollisionPlane* planes, int count );
+
+/**@}*/
diff --git a/src/vendor/box2d/constants.h b/src/vendor/box2d/constants.h
new file mode 100644
index 0000000..f3dd71c
--- /dev/null
+++ b/src/vendor/box2d/constants.h
@@ -0,0 +1,54 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+extern float b2_lengthUnitsPerMeter;
+
+// Used to detect bad values. Positions greater than about 16km will have precision
+// problems, so 100km as a limit should be fine in all cases.
+#define B2_HUGE ( 100000.0f * b2_lengthUnitsPerMeter )
+
+// Maximum parallel workers. Used to size some static arrays.
+#define B2_MAX_WORKERS 64
+
+// Maximum number of colors in the constraint graph. Constraints that cannot
+// find a color are added to the overflow set which are solved single-threaded.
+#define B2_GRAPH_COLOR_COUNT 12
+
+// A small length used as a collision and constraint tolerance. Usually it is
+// chosen to be numerically significant, but visually insignificant. In meters.
+// Normally this is 0.5cm.
+// @warning modifying this can have a significant impact on stability
+#define B2_LINEAR_SLOP ( 0.005f * b2_lengthUnitsPerMeter )
+
+// Maximum number of simultaneous worlds that can be allocated
+#ifndef B2_MAX_WORLDS
+#define B2_MAX_WORLDS 128
+#endif
+
+// The maximum rotation of a body per time step. This limit is very large and is used
+// to prevent numerical problems. You shouldn't need to adjust this.
+// @warning increasing this to 0.5f * b2_pi or greater will break continuous collision.
+#define B2_MAX_ROTATION ( 0.25f * B2_PI )
+
+// Box2D uses limited speculative collision. This reduces jitter.
+// Normally this is 2cm.
+// @warning modifying this can have a significant impact on performance and stability
+#define B2_SPECULATIVE_DISTANCE ( 4.0f * B2_LINEAR_SLOP )
+
+// This is used to fatten AABBs in the dynamic tree. This allows proxies
+// to move by a small amount without triggering a tree adjustment. This is in meters.
+// Normally this is 5cm.
+// @warning modifying this can have a significant impact on performance
+#define B2_AABB_MARGIN ( 0.05f * b2_lengthUnitsPerMeter )
+
+// The time that a body must be still before it will go to sleep. In seconds.
+#define B2_TIME_TO_SLEEP 0.5f
+
+enum b2TreeNodeFlags
+{
+	b2_allocatedNode = 0x0001,
+	b2_enlargedNode = 0x0002,
+	b2_leafNode = 0x0004,
+};
diff --git a/src/vendor/box2d/constraint_graph.c b/src/vendor/box2d/constraint_graph.c
new file mode 100644
index 0000000..1737aea
--- /dev/null
+++ b/src/vendor/box2d/constraint_graph.c
@@ -0,0 +1,322 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "constraint_graph.h"
+
+#include "array.h"
+#include "bitset.h"
+#include "body.h"
+#include "contact.h"
+#include "joint.h"
+#include "solver_set.h"
+#include "world.h"
+
+#include <string.h>
+
+// Solver using graph coloring. Islands are only used for sleep.
+// High-Performance Physical Simulations on Next-Generation Architecture with Many Cores
+// http://web.eecs.umich.edu/~msmelyan/papers/physsim_onmanycore_itj.pdf
+
+// Kinematic bodies have to be treated like dynamic bodies in graph coloring. Unlike static bodies, we cannot use a dummy solver
+// body for kinematic bodies. We cannot access a kinematic body from multiple threads efficiently because the SIMD solver body
+// scatter would write to the same kinematic body from multiple threads. Even if these writes don't modify the body, they will
+// cause horrible cache stalls. To make this feasible I would need a way to block these writes.
+
+// This is used for debugging by making all constraints be assigned to overflow.
+#define B2_FORCE_OVERFLOW 0
+
+_Static_assert( B2_GRAPH_COLOR_COUNT == 12, "graph color count assumed to be 12" );
+
+void b2CreateGraph( b2ConstraintGraph* graph, int bodyCapacity )
+{
+	_Static_assert( B2_GRAPH_COLOR_COUNT >= 2, "must have at least two constraint graph colors" );
+	_Static_assert( B2_OVERFLOW_INDEX == B2_GRAPH_COLOR_COUNT - 1, "bad over flow index" );
+
+	*graph = ( b2ConstraintGraph ){ 0 };
+
+	bodyCapacity = b2MaxInt( bodyCapacity, 8 );
+
+	// Initialize graph color bit set.
+	// No bitset for overflow color.
+	for ( int i = 0; i < B2_OVERFLOW_INDEX; ++i )
+	{
+		b2GraphColor* color = graph->colors + i;
+		color->bodySet = b2CreateBitSet( bodyCapacity );
+		b2SetBitCountAndClear( &color->bodySet, bodyCapacity );
+	}
+}
+
+void b2DestroyGraph( b2ConstraintGraph* graph )
+{
+	for ( int i = 0; i < B2_GRAPH_COLOR_COUNT; ++i )
+	{
+		b2GraphColor* color = graph->colors + i;
+
+		// The bit set should never be used on the overflow color
+		B2_ASSERT( i != B2_OVERFLOW_INDEX || color->bodySet.bits == NULL );
+
+		b2DestroyBitSet( &color->bodySet );
+
+		b2ContactSimArray_Destroy( &color->contactSims );
+		b2JointSimArray_Destroy( &color->jointSims );
+	}
+}
+
+// Contacts are always created as non-touching. They get cloned into the constraint
+// graph once they are found to be touching.
+// todo maybe kinematic bodies should not go into graph
+void b2AddContactToGraph( b2World* world, b2ContactSim* contactSim, b2Contact* contact )
+{
+	B2_ASSERT( contactSim->manifold.pointCount > 0 );
+	B2_ASSERT( contactSim->simFlags & b2_simTouchingFlag );
+	B2_ASSERT( contact->flags & b2_contactTouchingFlag );
+
+	b2ConstraintGraph* graph = &world->constraintGraph;
+	int colorIndex = B2_OVERFLOW_INDEX;
+
+	int bodyIdA = contact->edges[0].bodyId;
+	int bodyIdB = contact->edges[1].bodyId;
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, bodyIdA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, bodyIdB );
+	bool staticA = bodyA->setIndex == b2_staticSet;
+	bool staticB = bodyB->setIndex == b2_staticSet;
+	B2_ASSERT( staticA == false || staticB == false );
+
+#if B2_FORCE_OVERFLOW == 0
+	if ( staticA == false && staticB == false )
+	{
+		for ( int i = 0; i < B2_OVERFLOW_INDEX; ++i )
+		{
+			b2GraphColor* color = graph->colors + i;
+			if ( b2GetBit( &color->bodySet, bodyIdA ) || b2GetBit( &color->bodySet, bodyIdB ) )
+			{
+				continue;
+			}
+
+			b2SetBitGrow( &color->bodySet, bodyIdA );
+			b2SetBitGrow( &color->bodySet, bodyIdB );
+			colorIndex = i;
+			break;
+		}
+	}
+	else if ( staticA == false )
+	{
+		// No static contacts in color 0
+		for ( int i = 1; i < B2_OVERFLOW_INDEX; ++i )
+		{
+			b2GraphColor* color = graph->colors + i;
+			if ( b2GetBit( &color->bodySet, bodyIdA ) )
+			{
+				continue;
+			}
+
+			b2SetBitGrow( &color->bodySet, bodyIdA );
+			colorIndex = i;
+			break;
+		}
+	}
+	else if ( staticB == false )
+	{
+		// No static contacts in color 0
+		for ( int i = 1; i < B2_OVERFLOW_INDEX; ++i )
+		{
+			b2GraphColor* color = graph->colors + i;
+			if ( b2GetBit( &color->bodySet, bodyIdB ) )
+			{
+				continue;
+			}
+
+			b2SetBitGrow( &color->bodySet, bodyIdB );
+			colorIndex = i;
+			break;
+		}
+	}
+#endif
+
+	b2GraphColor* color = graph->colors + colorIndex;
+	contact->colorIndex = colorIndex;
+	contact->localIndex = color->contactSims.count;
+
+	b2ContactSim* newContact = b2ContactSimArray_Add( &color->contactSims );
+	memcpy( newContact, contactSim, sizeof( b2ContactSim ) );
+
+	// todo perhaps skip this if the contact is already awake
+
+	if ( staticA )
+	{
+		newContact->bodySimIndexA = B2_NULL_INDEX;
+		newContact->invMassA = 0.0f;
+		newContact->invIA = 0.0f;
+	}
+	else
+	{
+		B2_ASSERT( bodyA->setIndex == b2_awakeSet );
+		b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+
+		int localIndex = bodyA->localIndex;
+		newContact->bodySimIndexA = localIndex;
+
+		b2BodySim* bodySimA = b2BodySimArray_Get( &awakeSet->bodySims, localIndex );
+		newContact->invMassA = bodySimA->invMass;
+		newContact->invIA = bodySimA->invInertia;
+	}
+
+	if ( staticB )
+	{
+		newContact->bodySimIndexB = B2_NULL_INDEX;
+		newContact->invMassB = 0.0f;
+		newContact->invIB = 0.0f;
+	}
+	else
+	{
+		B2_ASSERT( bodyB->setIndex == b2_awakeSet );
+		b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+
+		int localIndex = bodyB->localIndex;
+		newContact->bodySimIndexB = localIndex;
+
+		b2BodySim* bodySimB = b2BodySimArray_Get( &awakeSet->bodySims, localIndex );
+		newContact->invMassB = bodySimB->invMass;
+		newContact->invIB = bodySimB->invInertia;
+	}
+}
+
+void b2RemoveContactFromGraph( b2World* world, int bodyIdA, int bodyIdB, int colorIndex, int localIndex )
+{
+	b2ConstraintGraph* graph = &world->constraintGraph;
+
+	B2_ASSERT( 0 <= colorIndex && colorIndex < B2_GRAPH_COLOR_COUNT );
+	b2GraphColor* color = graph->colors + colorIndex;
+
+	if ( colorIndex != B2_OVERFLOW_INDEX )
+	{
+		// might clear a bit for a static body, but this has no effect
+		b2ClearBit( &color->bodySet, bodyIdA );
+		b2ClearBit( &color->bodySet, bodyIdB );
+	}
+
+	int movedIndex = b2ContactSimArray_RemoveSwap( &color->contactSims, localIndex );
+	if ( movedIndex != B2_NULL_INDEX )
+	{
+		// Fix index on swapped contact
+		b2ContactSim* movedContactSim = color->contactSims.data + localIndex;
+
+		// Fix moved contact
+		int movedId = movedContactSim->contactId;
+		b2Contact* movedContact = b2ContactArray_Get( &world->contacts, movedId );
+		B2_ASSERT( movedContact->setIndex == b2_awakeSet );
+		B2_ASSERT( movedContact->colorIndex == colorIndex );
+		B2_ASSERT( movedContact->localIndex == movedIndex );
+		movedContact->localIndex = localIndex;
+	}
+}
+
+static int b2AssignJointColor( b2ConstraintGraph* graph, int bodyIdA, int bodyIdB, bool staticA, bool staticB )
+{
+	B2_ASSERT( staticA == false || staticB == false );
+
+#if B2_FORCE_OVERFLOW == 0
+	if ( staticA == false && staticB == false )
+	{
+		for ( int i = 0; i < B2_OVERFLOW_INDEX; ++i )
+		{
+			b2GraphColor* color = graph->colors + i;
+			if ( b2GetBit( &color->bodySet, bodyIdA ) || b2GetBit( &color->bodySet, bodyIdB ) )
+			{
+				continue;
+			}
+
+			b2SetBitGrow( &color->bodySet, bodyIdA );
+			b2SetBitGrow( &color->bodySet, bodyIdB );
+			return i;
+		}
+	}
+	else if ( staticA == false )
+	{
+		for ( int i = 0; i < B2_OVERFLOW_INDEX; ++i )
+		{
+			b2GraphColor* color = graph->colors + i;
+			if ( b2GetBit( &color->bodySet, bodyIdA ) )
+			{
+				continue;
+			}
+
+			b2SetBitGrow( &color->bodySet, bodyIdA );
+			return i;
+		}
+	}
+	else if ( staticB == false )
+	{
+		for ( int i = 0; i < B2_OVERFLOW_INDEX; ++i )
+		{
+			b2GraphColor* color = graph->colors + i;
+			if ( b2GetBit( &color->bodySet, bodyIdB ) )
+			{
+				continue;
+			}
+
+			b2SetBitGrow( &color->bodySet, bodyIdB );
+			return i;
+		}
+	}
+#else
+	B2_UNUSED( graph, bodyIdA, bodyIdB, staticA, staticB );
+#endif
+
+	return B2_OVERFLOW_INDEX;
+}
+
+b2JointSim* b2CreateJointInGraph( b2World* world, b2Joint* joint )
+{
+	b2ConstraintGraph* graph = &world->constraintGraph;
+
+	int bodyIdA = joint->edges[0].bodyId;
+	int bodyIdB = joint->edges[1].bodyId;
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, bodyIdA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, bodyIdB );
+	bool staticA = bodyA->setIndex == b2_staticSet;
+	bool staticB = bodyB->setIndex == b2_staticSet;
+
+	int colorIndex = b2AssignJointColor( graph, bodyIdA, bodyIdB, staticA, staticB );
+
+	b2JointSim* jointSim = b2JointSimArray_Add( &graph->colors[colorIndex].jointSims );
+	memset( jointSim, 0, sizeof( b2JointSim ) );
+
+	joint->colorIndex = colorIndex;
+	joint->localIndex = graph->colors[colorIndex].jointSims.count - 1;
+	return jointSim;
+}
+
+void b2AddJointToGraph( b2World* world, b2JointSim* jointSim, b2Joint* joint )
+{
+	b2JointSim* jointDst = b2CreateJointInGraph( world, joint );
+	memcpy( jointDst, jointSim, sizeof( b2JointSim ) );
+}
+
+void b2RemoveJointFromGraph( b2World* world, int bodyIdA, int bodyIdB, int colorIndex, int localIndex )
+{
+	b2ConstraintGraph* graph = &world->constraintGraph;
+
+	B2_ASSERT( 0 <= colorIndex && colorIndex < B2_GRAPH_COLOR_COUNT );
+	b2GraphColor* color = graph->colors + colorIndex;
+
+	if ( colorIndex != B2_OVERFLOW_INDEX )
+	{
+		// May clear static bodies, no effect
+		b2ClearBit( &color->bodySet, bodyIdA );
+		b2ClearBit( &color->bodySet, bodyIdB );
+	}
+
+	int movedIndex = b2JointSimArray_RemoveSwap( &color->jointSims, localIndex );
+	if ( movedIndex != B2_NULL_INDEX )
+	{
+		// Fix moved joint
+		b2JointSim* movedJointSim = color->jointSims.data + localIndex;
+		int movedId = movedJointSim->jointId;
+		b2Joint* movedJoint = b2JointArray_Get( &world->joints, movedId );
+		B2_ASSERT( movedJoint->setIndex == b2_awakeSet );
+		B2_ASSERT( movedJoint->colorIndex == colorIndex );
+		B2_ASSERT( movedJoint->localIndex == movedIndex );
+		movedJoint->localIndex = localIndex;
+	}
+}
diff --git a/src/vendor/box2d/constraint_graph.h b/src/vendor/box2d/constraint_graph.h
new file mode 100644
index 0000000..ecee324
--- /dev/null
+++ b/src/vendor/box2d/constraint_graph.h
@@ -0,0 +1,58 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+#include "bitset.h"
+#include "constants.h"
+
+typedef struct b2Body b2Body;
+typedef struct b2ContactSim b2ContactSim;
+typedef struct b2Contact b2Contact;
+typedef struct b2ContactConstraint b2ContactConstraint;
+typedef struct b2ContactConstraintSIMD b2ContactConstraintSIMD;
+typedef struct b2JointSim b2JointSim;
+typedef struct b2Joint b2Joint;
+typedef struct b2StepContext b2StepContext;
+typedef struct b2World b2World;
+
+// This holds constraints that cannot fit the graph color limit. This happens when a single dynamic body
+// is touching many other bodies.
+#define B2_OVERFLOW_INDEX (B2_GRAPH_COLOR_COUNT - 1)
+
+typedef struct b2GraphColor
+{
+	// This bitset is indexed by bodyId so this is over-sized to encompass static bodies
+	// however I never traverse these bits or use the bit count for anything
+	// This bitset is unused on the overflow color.
+	// todo consider having a uint_16 per body that tracks the graph color membership
+	b2BitSet bodySet;
+
+	// cache friendly arrays
+	b2ContactSimArray contactSims;
+	b2JointSimArray jointSims;
+
+	// transient
+	union
+	{
+		b2ContactConstraintSIMD* simdConstraints;
+		b2ContactConstraint* overflowConstraints;
+	};
+} b2GraphColor;
+
+typedef struct b2ConstraintGraph
+{
+	// including overflow at the end
+	b2GraphColor colors[B2_GRAPH_COLOR_COUNT];
+} b2ConstraintGraph;
+
+void b2CreateGraph( b2ConstraintGraph* graph, int bodyCapacity );
+void b2DestroyGraph( b2ConstraintGraph* graph );
+
+void b2AddContactToGraph( b2World* world, b2ContactSim* contactSim, b2Contact* contact );
+void b2RemoveContactFromGraph( b2World* world, int bodyIdA, int bodyIdB, int colorIndex, int localIndex );
+
+b2JointSim* b2CreateJointInGraph( b2World* world, b2Joint* joint );
+void b2AddJointToGraph( b2World* world, b2JointSim* jointSim, b2Joint* joint );
+void b2RemoveJointFromGraph( b2World* world, int bodyIdA, int bodyIdB, int colorIndex, int localIndex );
diff --git a/src/vendor/box2d/contact.c b/src/vendor/box2d/contact.c
new file mode 100644
index 0000000..d2edf21
--- /dev/null
+++ b/src/vendor/box2d/contact.c
@@ -0,0 +1,650 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "contact.h"
+
+#include "array.h"
+#include "body.h"
+#include "core.h"
+#include "island.h"
+#include "shape.h"
+#include "solver_set.h"
+#include "table.h"
+#include "world.h"
+
+#include "box2d/collision.h"
+
+#include <float.h>
+#include <math.h>
+#include <stddef.h>
+
+B2_ARRAY_SOURCE( b2Contact, b2Contact )
+B2_ARRAY_SOURCE( b2ContactSim, b2ContactSim )
+
+// Contacts and determinism
+// A deterministic simulation requires contacts to exist in the same order in b2Island no matter the thread count.
+// The order must reproduce from run to run. This is necessary because the Gauss-Seidel constraint solver is order dependent.
+//
+// Creation:
+// - Contacts are created using results from b2UpdateBroadPhasePairs
+// - These results are ordered according to the order of the broad-phase move array
+// - The move array is ordered according to the shape creation order using a bitset.
+// - The island/shape/body order is determined by creation order
+// - Logically contacts are only created for awake bodies, so they are immediately added to the awake contact array (serially)
+//
+// Island linking:
+// - The awake contact array is built from the body-contact graph for all awake bodies in awake islands.
+// - Awake contacts are solved in parallel and they generate contact state changes.
+// - These state changes may link islands together using union find.
+// - The state changes are ordered using a bit array that encompasses all contacts
+// - As long as contacts are created in deterministic order, island link order is deterministic.
+// - This keeps the order of contacts in islands deterministic
+
+// Manifold functions should compute important results in local space to improve precision. However, this
+// interface function takes two world transforms instead of a relative transform for these reasons:
+//
+// First:
+// The anchors need to be computed relative to the shape origin in world space. This is necessary so the
+// solver does not need to access static body transforms. Not even in constraint preparation. This approach
+// has world space vectors yet retains precision.
+//
+// Second:
+// b2ManifoldPoint::point is very useful for debugging and it is in world space.
+//
+// Third:
+// The user may call the manifold functions directly and they should be easy to use and have easy to use
+// results.
+typedef b2Manifold b2ManifoldFcn( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+								  b2SimplexCache* cache );
+
+struct b2ContactRegister
+{
+	b2ManifoldFcn* fcn;
+	bool primary;
+};
+
+static struct b2ContactRegister s_registers[b2_shapeTypeCount][b2_shapeTypeCount];
+static bool s_initialized = false;
+
+static b2Manifold b2CircleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+									b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollideCircles( &shapeA->circle, xfA, &shapeB->circle, xfB );
+}
+
+static b2Manifold b2CapsuleAndCircleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+											  b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollideCapsuleAndCircle( &shapeA->capsule, xfA, &shapeB->circle, xfB );
+}
+
+static b2Manifold b2CapsuleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+									 b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollideCapsules( &shapeA->capsule, xfA, &shapeB->capsule, xfB );
+}
+
+static b2Manifold b2PolygonAndCircleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+											  b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollidePolygonAndCircle( &shapeA->polygon, xfA, &shapeB->circle, xfB );
+}
+
+static b2Manifold b2PolygonAndCapsuleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+											   b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollidePolygonAndCapsule( &shapeA->polygon, xfA, &shapeB->capsule, xfB );
+}
+
+static b2Manifold b2PolygonManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+									 b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollidePolygons( &shapeA->polygon, xfA, &shapeB->polygon, xfB );
+}
+
+static b2Manifold b2SegmentAndCircleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+											  b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollideSegmentAndCircle( &shapeA->segment, xfA, &shapeB->circle, xfB );
+}
+
+static b2Manifold b2SegmentAndCapsuleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+											   b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollideSegmentAndCapsule( &shapeA->segment, xfA, &shapeB->capsule, xfB );
+}
+
+static b2Manifold b2SegmentAndPolygonManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+											   b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollideSegmentAndPolygon( &shapeA->segment, xfA, &shapeB->polygon, xfB );
+}
+
+static b2Manifold b2ChainSegmentAndCircleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB, b2Transform xfB,
+												   b2SimplexCache* cache )
+{
+	B2_UNUSED( cache );
+	return b2CollideChainSegmentAndCircle( &shapeA->chainSegment, xfA, &shapeB->circle, xfB );
+}
+
+static b2Manifold b2ChainSegmentAndCapsuleManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB,
+													b2Transform xfB, b2SimplexCache* cache )
+{
+	return b2CollideChainSegmentAndCapsule( &shapeA->chainSegment, xfA, &shapeB->capsule, xfB, cache );
+}
+
+static b2Manifold b2ChainSegmentAndPolygonManifold( const b2Shape* shapeA, b2Transform xfA, const b2Shape* shapeB,
+													b2Transform xfB, b2SimplexCache* cache )
+{
+	return b2CollideChainSegmentAndPolygon( &shapeA->chainSegment, xfA, &shapeB->polygon, xfB, cache );
+}
+
+static void b2AddType( b2ManifoldFcn* fcn, b2ShapeType type1, b2ShapeType type2 )
+{
+	B2_ASSERT( 0 <= type1 && type1 < b2_shapeTypeCount );
+	B2_ASSERT( 0 <= type2 && type2 < b2_shapeTypeCount );
+
+	s_registers[type1][type2].fcn = fcn;
+	s_registers[type1][type2].primary = true;
+
+	if ( type1 != type2 )
+	{
+		s_registers[type2][type1].fcn = fcn;
+		s_registers[type2][type1].primary = false;
+	}
+}
+
+void b2InitializeContactRegisters( void )
+{
+	if ( s_initialized == false )
+	{
+		b2AddType( b2CircleManifold, b2_circleShape, b2_circleShape );
+		b2AddType( b2CapsuleAndCircleManifold, b2_capsuleShape, b2_circleShape );
+		b2AddType( b2CapsuleManifold, b2_capsuleShape, b2_capsuleShape );
+		b2AddType( b2PolygonAndCircleManifold, b2_polygonShape, b2_circleShape );
+		b2AddType( b2PolygonAndCapsuleManifold, b2_polygonShape, b2_capsuleShape );
+		b2AddType( b2PolygonManifold, b2_polygonShape, b2_polygonShape );
+		b2AddType( b2SegmentAndCircleManifold, b2_segmentShape, b2_circleShape );
+		b2AddType( b2SegmentAndCapsuleManifold, b2_segmentShape, b2_capsuleShape );
+		b2AddType( b2SegmentAndPolygonManifold, b2_segmentShape, b2_polygonShape );
+		b2AddType( b2ChainSegmentAndCircleManifold, b2_chainSegmentShape, b2_circleShape );
+		b2AddType( b2ChainSegmentAndCapsuleManifold, b2_chainSegmentShape, b2_capsuleShape );
+		b2AddType( b2ChainSegmentAndPolygonManifold, b2_chainSegmentShape, b2_polygonShape );
+		s_initialized = true;
+	}
+}
+
+void b2CreateContact( b2World* world, b2Shape* shapeA, b2Shape* shapeB )
+{
+	b2ShapeType type1 = shapeA->type;
+	b2ShapeType type2 = shapeB->type;
+
+	B2_ASSERT( 0 <= type1 && type1 < b2_shapeTypeCount );
+	B2_ASSERT( 0 <= type2 && type2 < b2_shapeTypeCount );
+
+	if ( s_registers[type1][type2].fcn == NULL )
+	{
+		// For example, no segment vs segment collision
+		return;
+	}
+
+	if ( s_registers[type1][type2].primary == false )
+	{
+		// flip order
+		b2CreateContact( world, shapeB, shapeA );
+		return;
+	}
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, shapeA->bodyId );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, shapeB->bodyId );
+
+	B2_ASSERT( bodyA->setIndex != b2_disabledSet && bodyB->setIndex != b2_disabledSet );
+	B2_ASSERT( bodyA->setIndex != b2_staticSet || bodyB->setIndex != b2_staticSet );
+
+	int setIndex;
+	if ( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet )
+	{
+		setIndex = b2_awakeSet;
+	}
+	else
+	{
+		// sleeping and non-touching contacts live in the disabled set
+		// later if this set is found to be touching then the sleeping
+		// islands will be linked and the contact moved to the merged island
+		setIndex = b2_disabledSet;
+	}
+
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+
+	// Create contact key and contact
+	int contactId = b2AllocId( &world->contactIdPool );
+	if ( contactId == world->contacts.count )
+	{
+		b2ContactArray_Push( &world->contacts, ( b2Contact ){ 0 } );
+	}
+
+	int shapeIdA = shapeA->id;
+	int shapeIdB = shapeB->id;
+
+	b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+	contact->contactId = contactId;
+	contact->setIndex = setIndex;
+	contact->colorIndex = B2_NULL_INDEX;
+	contact->localIndex = set->contactSims.count;
+	contact->islandId = B2_NULL_INDEX;
+	contact->islandPrev = B2_NULL_INDEX;
+	contact->islandNext = B2_NULL_INDEX;
+	contact->shapeIdA = shapeIdA;
+	contact->shapeIdB = shapeIdB;
+	contact->isMarked = false;
+	contact->flags = 0;
+
+	B2_ASSERT( shapeA->sensorIndex == B2_NULL_INDEX && shapeB->sensorIndex == B2_NULL_INDEX );
+
+	if ( shapeA->enableContactEvents || shapeB->enableContactEvents )
+	{
+		contact->flags |= b2_contactEnableContactEvents;
+	}
+
+	// Connect to body A
+	{
+		contact->edges[0].bodyId = shapeA->bodyId;
+		contact->edges[0].prevKey = B2_NULL_INDEX;
+		contact->edges[0].nextKey = bodyA->headContactKey;
+
+		int keyA = ( contactId << 1 ) | 0;
+		int headContactKey = bodyA->headContactKey;
+		if ( headContactKey != B2_NULL_INDEX )
+		{
+			b2Contact* headContact = b2ContactArray_Get( &world->contacts, headContactKey >> 1 );
+			headContact->edges[headContactKey & 1].prevKey = keyA;
+		}
+		bodyA->headContactKey = keyA;
+		bodyA->contactCount += 1;
+	}
+
+	// Connect to body B
+	{
+		contact->edges[1].bodyId = shapeB->bodyId;
+		contact->edges[1].prevKey = B2_NULL_INDEX;
+		contact->edges[1].nextKey = bodyB->headContactKey;
+
+		int keyB = ( contactId << 1 ) | 1;
+		int headContactKey = bodyB->headContactKey;
+		if ( bodyB->headContactKey != B2_NULL_INDEX )
+		{
+			b2Contact* headContact = b2ContactArray_Get( &world->contacts, headContactKey >> 1 );
+			headContact->edges[headContactKey & 1].prevKey = keyB;
+		}
+		bodyB->headContactKey = keyB;
+		bodyB->contactCount += 1;
+	}
+
+	// Add to pair set for fast lookup
+	uint64_t pairKey = B2_SHAPE_PAIR_KEY( shapeIdA, shapeIdB );
+	b2AddKey( &world->broadPhase.pairSet, pairKey );
+
+	// Contacts are created as non-touching. Later if they are found to be touching
+	// they will link islands and be moved into the constraint graph.
+	b2ContactSim* contactSim = b2ContactSimArray_Add( &set->contactSims );
+	contactSim->contactId = contactId;
+
+#if B2_VALIDATE
+	contactSim->bodyIdA = shapeA->bodyId;
+	contactSim->bodyIdB = shapeB->bodyId;
+#endif
+
+	contactSim->bodySimIndexA = B2_NULL_INDEX;
+	contactSim->bodySimIndexB = B2_NULL_INDEX;
+	contactSim->invMassA = 0.0f;
+	contactSim->invIA = 0.0f;
+	contactSim->invMassB = 0.0f;
+	contactSim->invIB = 0.0f;
+	contactSim->shapeIdA = shapeIdA;
+	contactSim->shapeIdB = shapeIdB;
+	contactSim->cache = b2_emptySimplexCache;
+	contactSim->manifold = ( b2Manifold ){ 0 };
+
+	// These also get updated in the narrow phase
+	contactSim->friction = world->frictionCallback(shapeA->friction, shapeA->userMaterialId, shapeB->friction, shapeB->userMaterialId);
+	contactSim->restitution = world->restitutionCallback(shapeA->restitution, shapeA->userMaterialId, shapeB->restitution, shapeB->userMaterialId);
+
+	contactSim->tangentSpeed = 0.0f;
+	contactSim->simFlags = 0;
+
+	if ( shapeA->enablePreSolveEvents || shapeB->enablePreSolveEvents )
+	{
+		contactSim->simFlags |= b2_simEnablePreSolveEvents;
+	}
+}
+
+// A contact is destroyed when:
+// - broad-phase proxies stop overlapping
+// - a body is destroyed
+// - a body is disabled
+// - a body changes type from dynamic to kinematic or static
+// - a shape is destroyed
+// - contact filtering is modified
+void b2DestroyContact( b2World* world, b2Contact* contact, bool wakeBodies )
+{
+	// Remove pair from set
+	uint64_t pairKey = B2_SHAPE_PAIR_KEY( contact->shapeIdA, contact->shapeIdB );
+	b2RemoveKey( &world->broadPhase.pairSet, pairKey );
+
+	b2ContactEdge* edgeA = contact->edges + 0;
+	b2ContactEdge* edgeB = contact->edges + 1;
+
+	int bodyIdA = edgeA->bodyId;
+	int bodyIdB = edgeB->bodyId;
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, bodyIdA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, bodyIdB );
+
+	uint32_t flags = contact->flags;
+	bool touching = ( flags & b2_contactTouchingFlag ) != 0;
+
+	// End touch event
+	if ( touching && ( flags & b2_contactEnableContactEvents ) != 0 )
+	{
+		uint16_t worldId = world->worldId;
+		const b2Shape* shapeA = b2ShapeArray_Get( &world->shapes, contact->shapeIdA );
+		const b2Shape* shapeB = b2ShapeArray_Get( &world->shapes, contact->shapeIdB );
+		b2ShapeId shapeIdA = { shapeA->id + 1, worldId, shapeA->generation };
+		b2ShapeId shapeIdB = { shapeB->id + 1, worldId, shapeB->generation };
+
+		b2ContactEndTouchEvent event = { shapeIdA, shapeIdB };
+		b2ContactEndTouchEventArray_Push( world->contactEndEvents + world->endEventArrayIndex, event );
+	}
+
+	// Remove from body A
+	if ( edgeA->prevKey != B2_NULL_INDEX )
+	{
+		b2Contact* prevContact = b2ContactArray_Get( &world->contacts, edgeA->prevKey >> 1 );
+		b2ContactEdge* prevEdge = prevContact->edges + ( edgeA->prevKey & 1 );
+		prevEdge->nextKey = edgeA->nextKey;
+	}
+
+	if ( edgeA->nextKey != B2_NULL_INDEX )
+	{
+		b2Contact* nextContact = b2ContactArray_Get( &world->contacts, edgeA->nextKey >> 1 );
+		b2ContactEdge* nextEdge = nextContact->edges + ( edgeA->nextKey & 1 );
+		nextEdge->prevKey = edgeA->prevKey;
+	}
+
+	int contactId = contact->contactId;
+
+	int edgeKeyA = ( contactId << 1 ) | 0;
+	if ( bodyA->headContactKey == edgeKeyA )
+	{
+		bodyA->headContactKey = edgeA->nextKey;
+	}
+
+	bodyA->contactCount -= 1;
+
+	// Remove from body B
+	if ( edgeB->prevKey != B2_NULL_INDEX )
+	{
+		b2Contact* prevContact = b2ContactArray_Get( &world->contacts, edgeB->prevKey >> 1 );
+		b2ContactEdge* prevEdge = prevContact->edges + ( edgeB->prevKey & 1 );
+		prevEdge->nextKey = edgeB->nextKey;
+	}
+
+	if ( edgeB->nextKey != B2_NULL_INDEX )
+	{
+		b2Contact* nextContact = b2ContactArray_Get( &world->contacts, edgeB->nextKey >> 1 );
+		b2ContactEdge* nextEdge = nextContact->edges + ( edgeB->nextKey & 1 );
+		nextEdge->prevKey = edgeB->prevKey;
+	}
+
+	int edgeKeyB = ( contactId << 1 ) | 1;
+	if ( bodyB->headContactKey == edgeKeyB )
+	{
+		bodyB->headContactKey = edgeB->nextKey;
+	}
+
+	bodyB->contactCount -= 1;
+
+	// Remove contact from the array that owns it
+	if ( contact->islandId != B2_NULL_INDEX )
+	{
+		b2UnlinkContact( world, contact );
+	}
+
+	if ( contact->colorIndex != B2_NULL_INDEX )
+	{
+		// contact is an active constraint
+		B2_ASSERT( contact->setIndex == b2_awakeSet );
+		b2RemoveContactFromGraph( world, bodyIdA, bodyIdB, contact->colorIndex, contact->localIndex );
+	}
+	else
+	{
+		// contact is non-touching or is sleeping or is a sensor
+		B2_ASSERT( contact->setIndex != b2_awakeSet || ( contact->flags & b2_contactTouchingFlag ) == 0 );
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, contact->setIndex );
+		int movedIndex = b2ContactSimArray_RemoveSwap( &set->contactSims, contact->localIndex );
+		if ( movedIndex != B2_NULL_INDEX )
+		{
+			b2ContactSim* movedContactSim = set->contactSims.data + contact->localIndex;
+			b2Contact* movedContact = b2ContactArray_Get( &world->contacts, movedContactSim->contactId );
+			movedContact->localIndex = contact->localIndex;
+		}
+	}
+
+	contact->contactId = B2_NULL_INDEX;
+	contact->setIndex = B2_NULL_INDEX;
+	contact->colorIndex = B2_NULL_INDEX;
+	contact->localIndex = B2_NULL_INDEX;
+
+	b2FreeId( &world->contactIdPool, contactId );
+
+	if ( wakeBodies && touching )
+	{
+		b2WakeBody( world, bodyA );
+		b2WakeBody( world, bodyB );
+	}
+}
+
+b2ContactSim* b2GetContactSim( b2World* world, b2Contact* contact )
+{
+	if ( contact->setIndex == b2_awakeSet && contact->colorIndex != B2_NULL_INDEX )
+	{
+		// contact lives in constraint graph
+		B2_ASSERT( 0 <= contact->colorIndex && contact->colorIndex < B2_GRAPH_COLOR_COUNT );
+		b2GraphColor* color = world->constraintGraph.colors + contact->colorIndex;
+		return b2ContactSimArray_Get( &color->contactSims, contact->localIndex );
+	}
+
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, contact->setIndex );
+	return b2ContactSimArray_Get( &set->contactSims, contact->localIndex );
+}
+
+bool b2ShouldShapesCollide( b2Filter filterA, b2Filter filterB )
+{
+	if ( filterA.groupIndex == filterB.groupIndex && filterA.groupIndex != 0 )
+	{
+		return filterA.groupIndex > 0;
+	}
+
+	bool collide = ( filterA.maskBits & filterB.categoryBits ) != 0 && ( filterA.categoryBits & filterB.maskBits ) != 0;
+	return collide;
+}
+
+// Update the contact manifold and touching status. Also updates sensor overlap.
+// Note: do not assume the shape AABBs are overlapping or are valid.
+bool b2UpdateContact( b2World* world, b2ContactSim* contactSim, b2Shape* shapeA, b2Transform transformA, b2Vec2 centerOffsetA,
+					  b2Shape* shapeB, b2Transform transformB, b2Vec2 centerOffsetB )
+{
+	// Save old manifold
+	b2Manifold oldManifold = contactSim->manifold;
+
+	// Compute new manifold
+	b2ManifoldFcn* fcn = s_registers[shapeA->type][shapeB->type].fcn;
+	contactSim->manifold = fcn( shapeA, transformA, shapeB, transformB, &contactSim->cache );
+
+	// Keep these updated in case the values on the shapes are modified
+	contactSim->friction = world->frictionCallback( shapeA->friction, shapeA->userMaterialId, shapeB->friction, shapeB->userMaterialId );
+	contactSim->restitution = world->restitutionCallback( shapeA->restitution, shapeA->userMaterialId, shapeB->restitution, shapeB->userMaterialId );
+
+	// todo branch improves perf?
+	if (shapeA->rollingResistance > 0.0f || shapeB->rollingResistance > 0.0f)
+	{
+		float radiusA = b2GetShapeRadius( shapeA );
+		float radiusB = b2GetShapeRadius( shapeB );
+		float maxRadius = b2MaxFloat( radiusA, radiusB );
+		contactSim->rollingResistance = b2MaxFloat( shapeA->rollingResistance, shapeB->rollingResistance ) * maxRadius;
+	}
+	else
+	{
+		contactSim->rollingResistance = 0.0f;
+	}
+
+	contactSim->tangentSpeed = shapeA->tangentSpeed + shapeB->tangentSpeed;
+
+	int pointCount = contactSim->manifold.pointCount;
+	bool touching = pointCount > 0;
+
+	if ( touching && world->preSolveFcn && ( contactSim->simFlags & b2_simEnablePreSolveEvents ) != 0 )
+	{
+		b2ShapeId shapeIdA = { shapeA->id + 1, world->worldId, shapeA->generation };
+		b2ShapeId shapeIdB = { shapeB->id + 1, world->worldId, shapeB->generation };
+
+		// this call assumes thread safety
+		touching = world->preSolveFcn( shapeIdA, shapeIdB, &contactSim->manifold, world->preSolveContext );
+		if ( touching == false )
+		{
+			// disable contact
+			pointCount = 0;
+			contactSim->manifold.pointCount = 0;
+		}
+	}
+
+	// This flag is for testing
+	if ( world->enableSpeculative == false && pointCount == 2 )
+	{
+		if ( contactSim->manifold.points[0].separation > 1.5f * B2_LINEAR_SLOP )
+		{
+			contactSim->manifold.points[0] = contactSim->manifold.points[1];
+			contactSim->manifold.pointCount = 1;
+		}
+		else if ( contactSim->manifold.points[0].separation > 1.5f * B2_LINEAR_SLOP )
+		{
+			contactSim->manifold.pointCount = 1;
+		}
+
+		pointCount = contactSim->manifold.pointCount;
+	}
+
+	if ( touching && ( shapeA->enableHitEvents || shapeB->enableHitEvents ) )
+	{
+		contactSim->simFlags |= b2_simEnableHitEvent;
+	}
+	else
+	{
+		contactSim->simFlags &= ~b2_simEnableHitEvent;
+	}
+
+	if (pointCount > 0)
+	{
+		contactSim->manifold.rollingImpulse = oldManifold.rollingImpulse;
+	}
+
+	// Match old contact ids to new contact ids and copy the
+	// stored impulses to warm start the solver.
+	int unmatchedCount = 0;
+	for ( int i = 0; i < pointCount; ++i )
+	{
+		b2ManifoldPoint* mp2 = contactSim->manifold.points + i;
+
+		// shift anchors to be center of mass relative
+		mp2->anchorA = b2Sub( mp2->anchorA, centerOffsetA );
+		mp2->anchorB = b2Sub( mp2->anchorB, centerOffsetB );
+
+		mp2->normalImpulse = 0.0f;
+		mp2->tangentImpulse = 0.0f;
+		mp2->totalNormalImpulse = 0.0f;
+		mp2->normalVelocity = 0.0f;
+		mp2->persisted = false;
+
+		uint16_t id2 = mp2->id;
+
+		for ( int j = 0; j < oldManifold.pointCount; ++j )
+		{
+			b2ManifoldPoint* mp1 = oldManifold.points + j;
+
+			if ( mp1->id == id2 )
+			{
+				mp2->normalImpulse = mp1->normalImpulse;
+				mp2->tangentImpulse = mp1->tangentImpulse;
+				mp2->persisted = true;
+
+				// clear old impulse
+				mp1->normalImpulse = 0.0f;
+				mp1->tangentImpulse = 0.0f;
+				break;
+			}
+		}
+
+		unmatchedCount += mp2->persisted ? 0 : 1;
+	}
+
+	B2_UNUSED( unmatchedCount );
+
+#if 0
+		// todo I haven't found an improvement from this yet
+		// If there are unmatched new contact points, apply any left over old impulse.
+		if (unmatchedCount > 0)
+		{
+			float unmatchedNormalImpulse = 0.0f;
+			float unmatchedTangentImpulse = 0.0f;
+			for (int i = 0; i < oldManifold.pointCount; ++i)
+			{
+				b2ManifoldPoint* mp = oldManifold.points + i;
+				unmatchedNormalImpulse += mp->normalImpulse;
+				unmatchedTangentImpulse += mp->tangentImpulse;
+			}
+
+			float inverse = 1.0f / unmatchedCount;
+			unmatchedNormalImpulse *= inverse;
+			unmatchedTangentImpulse *= inverse;
+
+			for ( int i = 0; i < pointCount; ++i )
+			{
+				b2ManifoldPoint* mp2 = contactSim->manifold.points + i;
+
+				if (mp2->persisted)
+				{
+					continue;
+				}
+
+				mp2->normalImpulse = unmatchedNormalImpulse;
+				mp2->tangentImpulse = unmatchedTangentImpulse;
+			}
+		}
+#endif
+
+	if ( touching )
+	{
+		contactSim->simFlags |= b2_simTouchingFlag;
+	}
+	else
+	{
+		contactSim->simFlags &= ~b2_simTouchingFlag;
+	}
+
+	return touching;
+}
+
+b2Manifold b2ComputeManifold( b2Shape* shapeA, b2Transform transformA, b2Shape* shapeB, b2Transform transformB )
+{
+	b2ManifoldFcn* fcn = s_registers[shapeA->type][shapeB->type].fcn;
+	b2SimplexCache cache = { 0 };
+	return fcn( shapeA, transformA, shapeB, transformB, &cache );
+}
diff --git a/src/vendor/box2d/contact.h b/src/vendor/box2d/contact.h
new file mode 100644
index 0000000..c23ed56
--- /dev/null
+++ b/src/vendor/box2d/contact.h
@@ -0,0 +1,148 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+#include "core.h"
+
+#include "box2d/collision.h"
+#include "box2d/types.h"
+
+typedef struct b2Shape b2Shape;
+typedef struct b2World b2World;
+
+enum b2ContactFlags
+{
+	// Set when the solid shapes are touching.
+	b2_contactTouchingFlag = 0x00000001,
+
+	// Contact has a hit event
+	b2_contactHitEventFlag = 0x00000002,
+
+	// This contact wants contact events
+	b2_contactEnableContactEvents = 0x00000004,
+};
+
+// A contact edge is used to connect bodies and contacts together
+// in a contact graph where each body is a node and each contact
+// is an edge. A contact edge belongs to a doubly linked list
+// maintained in each attached body. Each contact has two contact
+// edges, one for each attached body.
+typedef struct b2ContactEdge
+{
+	int bodyId;
+	int prevKey;
+	int nextKey;
+} b2ContactEdge;
+
+// Cold contact data. Used as a persistent handle and for persistent island
+// connectivity.
+typedef struct b2Contact
+{
+	// index of simulation set stored in b2World
+	// B2_NULL_INDEX when slot is free
+	int setIndex;
+
+	// index into the constraint graph color array
+	// B2_NULL_INDEX for non-touching or sleeping contacts
+	// B2_NULL_INDEX when slot is free
+	int colorIndex;
+
+	// contact index within set or graph color
+	// B2_NULL_INDEX when slot is free
+	int localIndex;
+
+	b2ContactEdge edges[2];
+	int shapeIdA;
+	int shapeIdB;
+
+	// A contact only belongs to an island if touching, otherwise B2_NULL_INDEX.
+	int islandPrev;
+	int islandNext;
+	int islandId;
+
+	int contactId;
+
+	// b2ContactFlags
+	uint32_t flags;
+
+	bool isMarked;
+} b2Contact;
+
+// Shifted to be distinct from b2ContactFlags
+enum b2ContactSimFlags
+{
+	// Set when the shapes are touching
+	b2_simTouchingFlag = 0x00010000,
+
+	// This contact no longer has overlapping AABBs
+	b2_simDisjoint = 0x00020000,
+
+	// This contact started touching
+	b2_simStartedTouching = 0x00040000,
+
+	// This contact stopped touching
+	b2_simStoppedTouching = 0x00080000,
+
+	// This contact has a hit event
+	b2_simEnableHitEvent = 0x00100000,
+
+	// This contact wants pre-solve events
+	b2_simEnablePreSolveEvents = 0x00200000,
+};
+
+/// The class manages contact between two shapes. A contact exists for each overlapping
+/// AABB in the broad-phase (except if filtered). Therefore a contact object may exist
+/// that has no contact points.
+typedef struct b2ContactSim
+{
+	int contactId;
+
+#if B2_VALIDATE
+	int bodyIdA;
+	int bodyIdB;
+#endif
+
+	int bodySimIndexA;
+	int bodySimIndexB;
+
+	int shapeIdA;
+	int shapeIdB;
+
+	float invMassA;
+	float invIA;
+
+	float invMassB;
+	float invIB;
+
+	b2Manifold manifold;
+
+	// Mixed friction and restitution
+	float friction;
+	float restitution;
+	float rollingResistance;
+	float tangentSpeed;
+
+	// b2ContactSimFlags
+	uint32_t simFlags;
+
+	b2SimplexCache cache;
+} b2ContactSim;
+
+void b2InitializeContactRegisters( void );
+
+void b2CreateContact( b2World* world, b2Shape* shapeA, b2Shape* shapeB );
+void b2DestroyContact( b2World* world, b2Contact* contact, bool wakeBodies );
+
+b2ContactSim* b2GetContactSim( b2World* world, b2Contact* contact );
+
+bool b2ShouldShapesCollide( b2Filter filterA, b2Filter filterB );
+
+bool b2UpdateContact( b2World* world, b2ContactSim* contactSim, b2Shape* shapeA, b2Transform transformA, b2Vec2 centerOffsetA,
+					  b2Shape* shapeB, b2Transform transformB, b2Vec2 centerOffsetB );
+
+b2Manifold b2ComputeManifold( b2Shape* shapeA, b2Transform transformA, b2Shape* shapeB, b2Transform transformB );
+
+B2_ARRAY_INLINE( b2Contact, b2Contact )
+B2_ARRAY_INLINE( b2ContactSim, b2ContactSim )
diff --git a/src/vendor/box2d/contact_solver.c b/src/vendor/box2d/contact_solver.c
new file mode 100644
index 0000000..332e8fb
--- /dev/null
+++ b/src/vendor/box2d/contact_solver.c
@@ -0,0 +1,2120 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "contact_solver.h"
+
+#include "body.h"
+#include "constraint_graph.h"
+#include "contact.h"
+#include "core.h"
+#include "solver_set.h"
+#include "world.h"
+
+#include <stddef.h>
+
+// contact separation for sub-stepping
+// s = s0 + dot(cB + rB - cA - rA, normal)
+// normal is held constant
+// body positions c can translation and anchors r can rotate
+// s(t) = s0 + dot(cB(t) + rB(t) - cA(t) - rA(t), normal)
+// s(t) = s0 + dot(cB0 + dpB + rot(dqB, rB0) - cA0 - dpA - rot(dqA, rA0), normal)
+// s(t) = s0 + dot(cB0 - cA0, normal) + dot(dpB - dpA + rot(dqB, rB0) - rot(dqA, rA0), normal)
+// s_base = s0 + dot(cB0 - cA0, normal)
+
+void b2PrepareOverflowContacts( b2StepContext* context )
+{
+	b2TracyCZoneNC( prepare_overflow_contact, "Prepare Overflow Contact", b2_colorYellow, true );
+
+	b2World* world = context->world;
+	b2ConstraintGraph* graph = context->graph;
+	b2GraphColor* color = graph->colors + B2_OVERFLOW_INDEX;
+	b2ContactConstraint* constraints = color->overflowConstraints;
+	int contactCount = color->contactSims.count;
+	b2ContactSim* contacts = color->contactSims.data;
+	b2BodyState* awakeStates = context->states;
+
+#if B2_VALIDATE
+	b2Body* bodies = world->bodies.data;
+#endif
+
+	// Stiffer for static contacts to avoid bodies getting pushed through the ground
+	b2Softness contactSoftness = context->contactSoftness;
+	b2Softness staticSoftness = context->staticSoftness;
+
+	float warmStartScale = world->enableWarmStarting ? 1.0f : 0.0f;
+
+	for ( int i = 0; i < contactCount; ++i )
+	{
+		b2ContactSim* contactSim = contacts + i;
+
+		const b2Manifold* manifold = &contactSim->manifold;
+		int pointCount = manifold->pointCount;
+
+		B2_ASSERT( 0 < pointCount && pointCount <= 2 );
+
+		int indexA = contactSim->bodySimIndexA;
+		int indexB = contactSim->bodySimIndexB;
+
+#if B2_VALIDATE
+		b2Body* bodyA = bodies + contactSim->bodyIdA;
+		int validIndexA = bodyA->setIndex == b2_awakeSet ? bodyA->localIndex : B2_NULL_INDEX;
+		B2_ASSERT( indexA == validIndexA );
+
+		b2Body* bodyB = bodies + contactSim->bodyIdB;
+		int validIndexB = bodyB->setIndex == b2_awakeSet ? bodyB->localIndex : B2_NULL_INDEX;
+		B2_ASSERT( indexB == validIndexB );
+#endif
+
+		b2ContactConstraint* constraint = constraints + i;
+		constraint->indexA = indexA;
+		constraint->indexB = indexB;
+		constraint->normal = manifold->normal;
+		constraint->friction = contactSim->friction;
+		constraint->restitution = contactSim->restitution;
+		constraint->rollingResistance = contactSim->rollingResistance;
+		constraint->rollingImpulse = warmStartScale * manifold->rollingImpulse;
+		constraint->tangentSpeed = contactSim->tangentSpeed;
+		constraint->pointCount = pointCount;
+
+		b2Vec2 vA = b2Vec2_zero;
+		float wA = 0.0f;
+		float mA = contactSim->invMassA;
+		float iA = contactSim->invIA;
+		if ( indexA != B2_NULL_INDEX )
+		{
+			b2BodyState* stateA = awakeStates + indexA;
+			vA = stateA->linearVelocity;
+			wA = stateA->angularVelocity;
+		}
+
+		b2Vec2 vB = b2Vec2_zero;
+		float wB = 0.0f;
+		float mB = contactSim->invMassB;
+		float iB = contactSim->invIB;
+		if ( indexB != B2_NULL_INDEX )
+		{
+			b2BodyState* stateB = awakeStates + indexB;
+			vB = stateB->linearVelocity;
+			wB = stateB->angularVelocity;
+		}
+
+		if ( indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX )
+		{
+			constraint->softness = staticSoftness;
+		}
+		else
+		{
+			constraint->softness = contactSoftness;
+		}
+
+		// copy mass into constraint to avoid cache misses during sub-stepping
+		constraint->invMassA = mA;
+		constraint->invIA = iA;
+		constraint->invMassB = mB;
+		constraint->invIB = iB;
+
+		{
+			float k = iA + iB;
+			constraint->rollingMass = k > 0.0f ? 1.0f / k : 0.0f;
+		}
+
+		b2Vec2 normal = constraint->normal;
+		b2Vec2 tangent = b2RightPerp( constraint->normal );
+
+		for ( int j = 0; j < pointCount; ++j )
+		{
+			const b2ManifoldPoint* mp = manifold->points + j;
+			b2ContactConstraintPoint* cp = constraint->points + j;
+
+			cp->normalImpulse = warmStartScale * mp->normalImpulse;
+			cp->tangentImpulse = warmStartScale * mp->tangentImpulse;
+			cp->totalNormalImpulse = 0.0f;
+
+			b2Vec2 rA = mp->anchorA;
+			b2Vec2 rB = mp->anchorB;
+
+			cp->anchorA = rA;
+			cp->anchorB = rB;
+			cp->baseSeparation = mp->separation - b2Dot( b2Sub( rB, rA ), normal );
+
+			float rnA = b2Cross( rA, normal );
+			float rnB = b2Cross( rB, normal );
+			float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB;
+			cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f;
+
+			float rtA = b2Cross( rA, tangent );
+			float rtB = b2Cross( rB, tangent );
+			float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB;
+			cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f;
+
+			// Save relative velocity for restitution
+			b2Vec2 vrA = b2Add( vA, b2CrossSV( wA, rA ) );
+			b2Vec2 vrB = b2Add( vB, b2CrossSV( wB, rB ) );
+			cp->relativeVelocity = b2Dot( normal, b2Sub( vrB, vrA ) );
+		}
+	}
+
+	b2TracyCZoneEnd( prepare_overflow_contact );
+}
+
+void b2WarmStartOverflowContacts( b2StepContext* context )
+{
+	b2TracyCZoneNC( warmstart_overflow_contact, "WarmStart Overflow Contact", b2_colorDarkOrange, true );
+
+	b2ConstraintGraph* graph = context->graph;
+	b2GraphColor* color = graph->colors + B2_OVERFLOW_INDEX;
+	b2ContactConstraint* constraints = color->overflowConstraints;
+	int contactCount = color->contactSims.count;
+	b2World* world = context->world;
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	b2BodyState* states = awakeSet->bodyStates.data;
+
+	// This is a dummy state to represent a static body because static bodies don't have a solver body.
+	b2BodyState dummyState = b2_identityBodyState;
+
+	for ( int i = 0; i < contactCount; ++i )
+	{
+		const b2ContactConstraint* constraint = constraints + i;
+
+		int indexA = constraint->indexA;
+		int indexB = constraint->indexB;
+
+		b2BodyState* stateA = indexA == B2_NULL_INDEX ? &dummyState : states + indexA;
+		b2BodyState* stateB = indexB == B2_NULL_INDEX ? &dummyState : states + indexB;
+
+		b2Vec2 vA = stateA->linearVelocity;
+		float wA = stateA->angularVelocity;
+		b2Vec2 vB = stateB->linearVelocity;
+		float wB = stateB->angularVelocity;
+
+		float mA = constraint->invMassA;
+		float iA = constraint->invIA;
+		float mB = constraint->invMassB;
+		float iB = constraint->invIB;
+
+		// Stiffer for static contacts to avoid bodies getting pushed through the ground
+		b2Vec2 normal = constraint->normal;
+		b2Vec2 tangent = b2RightPerp( constraint->normal );
+		int pointCount = constraint->pointCount;
+
+		for ( int j = 0; j < pointCount; ++j )
+		{
+			const b2ContactConstraintPoint* cp = constraint->points + j;
+
+			// fixed anchors
+			b2Vec2 rA = cp->anchorA;
+			b2Vec2 rB = cp->anchorB;
+
+			b2Vec2 P = b2Add( b2MulSV( cp->normalImpulse, normal ), b2MulSV( cp->tangentImpulse, tangent ) );
+			wA -= iA * b2Cross( rA, P );
+			vA = b2MulAdd( vA, -mA, P );
+			wB += iB * b2Cross( rB, P );
+			vB = b2MulAdd( vB, mB, P );
+		}
+
+		wA -= iA * constraint->rollingImpulse;
+		wB += iB * constraint->rollingImpulse;
+
+		stateA->linearVelocity = vA;
+		stateA->angularVelocity = wA;
+		stateB->linearVelocity = vB;
+		stateB->angularVelocity = wB;
+	}
+
+	b2TracyCZoneEnd( warmstart_overflow_contact );
+}
+
+void b2SolveOverflowContacts( b2StepContext* context, bool useBias )
+{
+	b2TracyCZoneNC( solve_contact, "Solve Contact", b2_colorAliceBlue, true );
+
+	b2ConstraintGraph* graph = context->graph;
+	b2GraphColor* color = graph->colors + B2_OVERFLOW_INDEX;
+	b2ContactConstraint* constraints = color->overflowConstraints;
+	int contactCount = color->contactSims.count;
+	b2World* world = context->world;
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	b2BodyState* states = awakeSet->bodyStates.data;
+
+	float inv_h = context->inv_h;
+	const float pushout = context->world->maxContactPushSpeed;
+
+	// This is a dummy body to represent a static body since static bodies don't have a solver body.
+	b2BodyState dummyState = b2_identityBodyState;
+
+	for ( int i = 0; i < contactCount; ++i )
+	{
+		b2ContactConstraint* constraint = constraints + i;
+		float mA = constraint->invMassA;
+		float iA = constraint->invIA;
+		float mB = constraint->invMassB;
+		float iB = constraint->invIB;
+
+		b2BodyState* stateA = constraint->indexA == B2_NULL_INDEX ? &dummyState : states + constraint->indexA;
+		b2Vec2 vA = stateA->linearVelocity;
+		float wA = stateA->angularVelocity;
+		b2Rot dqA = stateA->deltaRotation;
+
+		b2BodyState* stateB = constraint->indexB == B2_NULL_INDEX ? &dummyState : states + constraint->indexB;
+		b2Vec2 vB = stateB->linearVelocity;
+		float wB = stateB->angularVelocity;
+		b2Rot dqB = stateB->deltaRotation;
+
+		b2Vec2 dp = b2Sub( stateB->deltaPosition, stateA->deltaPosition );
+
+		b2Vec2 normal = constraint->normal;
+		b2Vec2 tangent = b2RightPerp( normal );
+		float friction = constraint->friction;
+		b2Softness softness = constraint->softness;
+
+		int pointCount = constraint->pointCount;
+		float totalNormalImpulse = 0.0f;
+
+		// Non-penetration
+		for ( int j = 0; j < pointCount; ++j )
+		{
+			b2ContactConstraintPoint* cp = constraint->points + j;
+
+			// fixed anchor points
+			b2Vec2 rA = cp->anchorA;
+			b2Vec2 rB = cp->anchorB;
+
+			// compute current separation
+			// this is subject to round-off error if the anchor is far from the body center of mass
+			b2Vec2 ds = b2Add( dp, b2Sub( b2RotateVector( dqB, rB ), b2RotateVector( dqA, rA ) ) );
+			float s = cp->baseSeparation + b2Dot( ds, normal );
+
+			float velocityBias = 0.0f;
+			float massScale = 1.0f;
+			float impulseScale = 0.0f;
+			if ( s > 0.0f )
+			{
+				// speculative bias
+				velocityBias = s * inv_h;
+			}
+			else if ( useBias )
+			{
+				velocityBias = b2MaxFloat( softness.biasRate * s, -pushout );
+				massScale = softness.massScale;
+				impulseScale = softness.impulseScale;
+			}
+
+			// relative normal velocity at contact
+			b2Vec2 vrA = b2Add( vA, b2CrossSV( wA, rA ) );
+			b2Vec2 vrB = b2Add( vB, b2CrossSV( wB, rB ) );
+			float vn = b2Dot( b2Sub( vrB, vrA ), normal );
+
+			// incremental normal impulse
+			float impulse = -cp->normalMass * massScale * ( vn + velocityBias ) - impulseScale * cp->normalImpulse;
+
+			// clamp the accumulated impulse
+			float newImpulse = b2MaxFloat( cp->normalImpulse + impulse, 0.0f );
+			impulse = newImpulse - cp->normalImpulse;
+			cp->normalImpulse = newImpulse;
+			cp->totalNormalImpulse += newImpulse;
+			totalNormalImpulse += newImpulse;
+
+			// apply normal impulse
+			b2Vec2 P = b2MulSV( impulse, normal );
+			vA = b2MulSub( vA, mA, P );
+			wA -= iA * b2Cross( rA, P );
+
+			vB = b2MulAdd( vB, mB, P );
+			wB += iB * b2Cross( rB, P );
+		}
+
+		// Friction
+		for ( int j = 0; j < pointCount; ++j )
+		{
+			b2ContactConstraintPoint* cp = constraint->points + j;
+
+			// fixed anchor points
+			b2Vec2 rA = cp->anchorA;
+			b2Vec2 rB = cp->anchorB;
+
+			// relative tangent velocity at contact
+			b2Vec2 vrB = b2Add( vB, b2CrossSV( wB, rB ) );
+			b2Vec2 vrA = b2Add( vA, b2CrossSV( wA, rA ) );
+
+			// vt = dot(vrB - sB * tangent - (vrA + sA * tangent), tangent)
+			//    = dot(vrB - vrA, tangent) - (sA + sB)
+
+			float vt = b2Dot( b2Sub( vrB, vrA ), tangent ) - constraint->tangentSpeed;
+
+			// incremental tangent impulse
+			float impulse = cp->tangentMass * ( -vt );
+
+			// clamp the accumulated force
+			float maxFriction = friction * cp->normalImpulse;
+			float newImpulse = b2ClampFloat( cp->tangentImpulse + impulse, -maxFriction, maxFriction );
+			impulse = newImpulse - cp->tangentImpulse;
+			cp->tangentImpulse = newImpulse;
+
+			// apply tangent impulse
+			b2Vec2 P = b2MulSV( impulse, tangent );
+			vA = b2MulSub( vA, mA, P );
+			wA -= iA * b2Cross( rA, P );
+			vB = b2MulAdd( vB, mB, P );
+			wB += iB * b2Cross( rB, P );
+		}
+
+		// Rolling resistance
+		{
+			float deltaLambda = -constraint->rollingMass * ( wB - wA );
+			float lambda = constraint->rollingImpulse;
+			float maxLambda = constraint->rollingResistance * totalNormalImpulse;
+			constraint->rollingImpulse = b2ClampFloat( lambda + deltaLambda, -maxLambda, maxLambda );
+			deltaLambda = constraint->rollingImpulse - lambda;
+
+			wA -= iA * deltaLambda;
+			wB += iB * deltaLambda;
+		}
+
+		stateA->linearVelocity = vA;
+		stateA->angularVelocity = wA;
+		stateB->linearVelocity = vB;
+		stateB->angularVelocity = wB;
+	}
+
+	b2TracyCZoneEnd( solve_contact );
+}
+
+void b2ApplyOverflowRestitution( b2StepContext* context )
+{
+	b2TracyCZoneNC( overflow_resitution, "Overflow Restitution", b2_colorViolet, true );
+
+	b2ConstraintGraph* graph = context->graph;
+	b2GraphColor* color = graph->colors + B2_OVERFLOW_INDEX;
+	b2ContactConstraint* constraints = color->overflowConstraints;
+	int contactCount = color->contactSims.count;
+	b2World* world = context->world;
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	b2BodyState* states = awakeSet->bodyStates.data;
+
+	float threshold = context->world->restitutionThreshold;
+
+	// dummy state to represent a static body
+	b2BodyState dummyState = b2_identityBodyState;
+
+	for ( int i = 0; i < contactCount; ++i )
+	{
+		b2ContactConstraint* constraint = constraints + i;
+
+		float restitution = constraint->restitution;
+		if ( restitution == 0.0f )
+		{
+			continue;
+		}
+
+		float mA = constraint->invMassA;
+		float iA = constraint->invIA;
+		float mB = constraint->invMassB;
+		float iB = constraint->invIB;
+
+		b2BodyState* stateA = constraint->indexA == B2_NULL_INDEX ? &dummyState : states + constraint->indexA;
+		b2Vec2 vA = stateA->linearVelocity;
+		float wA = stateA->angularVelocity;
+
+		b2BodyState* stateB = constraint->indexB == B2_NULL_INDEX ? &dummyState : states + constraint->indexB;
+		b2Vec2 vB = stateB->linearVelocity;
+		float wB = stateB->angularVelocity;
+
+		b2Vec2 normal = constraint->normal;
+		int pointCount = constraint->pointCount;
+
+		// it is possible to get more accurate restitution by iterating
+		// this only makes a difference if there are two contact points
+		// for (int iter = 0; iter < 10; ++iter)
+		{
+			for ( int j = 0; j < pointCount; ++j )
+			{
+				b2ContactConstraintPoint* cp = constraint->points + j;
+
+				// if the normal impulse is zero then there was no collision
+				// this skips speculative contact points that didn't generate an impulse
+				// The max normal impulse is used in case there was a collision that moved away within the sub-step process
+				if ( cp->relativeVelocity > -threshold || cp->totalNormalImpulse == 0.0f )
+				{
+					continue;
+				}
+
+				// fixed anchor points
+				b2Vec2 rA = cp->anchorA;
+				b2Vec2 rB = cp->anchorB;
+
+				// relative normal velocity at contact
+				b2Vec2 vrB = b2Add( vB, b2CrossSV( wB, rB ) );
+				b2Vec2 vrA = b2Add( vA, b2CrossSV( wA, rA ) );
+				float vn = b2Dot( b2Sub( vrB, vrA ), normal );
+
+				// compute normal impulse
+				float impulse = -cp->normalMass * ( vn + restitution * cp->relativeVelocity );
+
+				// clamp the accumulated impulse
+				// todo should this be stored?
+				float newImpulse = b2MaxFloat( cp->normalImpulse + impulse, 0.0f );
+				impulse = newImpulse - cp->normalImpulse;
+				cp->normalImpulse = newImpulse;
+
+				// Add the incremental impulse rather than the full impulse because this is not a sub-step
+				cp->totalNormalImpulse += impulse;
+
+				// apply contact impulse
+				b2Vec2 P = b2MulSV( impulse, normal );
+				vA = b2MulSub( vA, mA, P );
+				wA -= iA * b2Cross( rA, P );
+				vB = b2MulAdd( vB, mB, P );
+				wB += iB * b2Cross( rB, P );
+			}
+		}
+
+		stateA->linearVelocity = vA;
+		stateA->angularVelocity = wA;
+		stateB->linearVelocity = vB;
+		stateB->angularVelocity = wB;
+	}
+
+	b2TracyCZoneEnd( overflow_resitution );
+}
+
+void b2StoreOverflowImpulses( b2StepContext* context )
+{
+	b2TracyCZoneNC( store_impulses, "Store", b2_colorFireBrick, true );
+
+	b2ConstraintGraph* graph = context->graph;
+	b2GraphColor* color = graph->colors + B2_OVERFLOW_INDEX;
+	b2ContactConstraint* constraints = color->overflowConstraints;
+	b2ContactSim* contacts = color->contactSims.data;
+	int contactCount = color->contactSims.count;
+
+	for ( int i = 0; i < contactCount; ++i )
+	{
+		const b2ContactConstraint* constraint = constraints + i;
+		b2ContactSim* contact = contacts + i;
+		b2Manifold* manifold = &contact->manifold;
+		int pointCount = manifold->pointCount;
+
+		for ( int j = 0; j < pointCount; ++j )
+		{
+			manifold->points[j].normalImpulse = constraint->points[j].normalImpulse;
+			manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse;
+			manifold->points[j].totalNormalImpulse = constraint->points[j].totalNormalImpulse;
+			manifold->points[j].normalVelocity = constraint->points[j].relativeVelocity;
+		}
+
+		manifold->rollingImpulse = constraint->rollingImpulse;
+	}
+
+	b2TracyCZoneEnd( store_impulses );
+}
+
+#if defined( B2_SIMD_AVX2 )
+
+#include <immintrin.h>
+
+// wide float holds 8 numbers
+typedef __m256 b2FloatW;
+
+#elif defined( B2_SIMD_NEON )
+
+#include <arm_neon.h>
+
+// wide float holds 4 numbers
+typedef float32x4_t b2FloatW;
+
+#elif defined( B2_SIMD_SSE2 )
+
+#include <emmintrin.h>
+
+// wide float holds 4 numbers
+typedef __m128 b2FloatW;
+
+#else
+
+// scalar math
+typedef struct b2FloatW
+{
+	float x, y, z, w;
+} b2FloatW;
+
+#endif
+
+// Wide vec2
+typedef struct b2Vec2W
+{
+	b2FloatW X, Y;
+} b2Vec2W;
+
+// Wide rotation
+typedef struct b2RotW
+{
+	b2FloatW C, S;
+} b2RotW;
+
+#if defined( B2_SIMD_AVX2 )
+
+static inline b2FloatW b2ZeroW( void )
+{
+	return _mm256_setzero_ps();
+}
+
+static inline b2FloatW b2SplatW( float scalar )
+{
+	return _mm256_set1_ps( scalar );
+}
+
+static inline b2FloatW b2AddW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_add_ps( a, b );
+}
+
+static inline b2FloatW b2SubW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_sub_ps( a, b );
+}
+
+static inline b2FloatW b2MulW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_mul_ps( a, b );
+}
+
+static inline b2FloatW b2MulAddW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	// FMA can be emulated: https://github.com/lattera/glibc/blob/master/sysdeps/ieee754/dbl-64/s_fmaf.c#L34
+	// return _mm256_fmadd_ps( b, c, a );
+	return _mm256_add_ps( _mm256_mul_ps( b, c ), a );
+}
+
+static inline b2FloatW b2MulSubW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	// return _mm256_fnmadd_ps(b, c, a);
+	return _mm256_sub_ps( a, _mm256_mul_ps( b, c ) );
+}
+
+static inline b2FloatW b2MinW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_min_ps( a, b );
+}
+
+static inline b2FloatW b2MaxW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_max_ps( a, b );
+}
+
+// a = clamp(a, -b, b)
+static inline b2FloatW b2SymClampW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW nb = _mm256_sub_ps( _mm256_setzero_ps(), b );
+	return _mm256_max_ps( nb, _mm256_min_ps( a, b ) );
+}
+
+static inline b2FloatW b2OrW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_or_ps( a, b );
+}
+
+static inline b2FloatW b2GreaterThanW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_cmp_ps( a, b, _CMP_GT_OQ );
+}
+
+static inline b2FloatW b2EqualsW( b2FloatW a, b2FloatW b )
+{
+	return _mm256_cmp_ps( a, b, _CMP_EQ_OQ );
+}
+
+static inline bool b2AllZeroW( b2FloatW a )
+{
+	// Compare each element with zero
+	b2FloatW zero = _mm256_setzero_ps();
+	b2FloatW cmp = _mm256_cmp_ps( a, zero, _CMP_EQ_OQ );
+
+	// Create a mask from the comparison results
+	int mask = _mm256_movemask_ps( cmp );
+
+	// If all elements are zero, the mask will be 0xFF (11111111 in binary)
+	return mask == 0xFF;
+}
+
+// component-wise returns mask ? b : a
+static inline b2FloatW b2BlendW( b2FloatW a, b2FloatW b, b2FloatW mask )
+{
+	return _mm256_blendv_ps( a, b, mask );
+}
+
+#elif defined( B2_SIMD_NEON )
+
+static inline b2FloatW b2ZeroW( void )
+{
+	return vdupq_n_f32( 0.0f );
+}
+
+static inline b2FloatW b2SplatW( float scalar )
+{
+	return vdupq_n_f32( scalar );
+}
+
+static inline b2FloatW b2SetW( float a, float b, float c, float d )
+{
+	float32_t array[4] = { a, b, c, d };
+	return vld1q_f32( array );
+}
+
+static inline b2FloatW b2AddW( b2FloatW a, b2FloatW b )
+{
+	return vaddq_f32( a, b );
+}
+
+static inline b2FloatW b2SubW( b2FloatW a, b2FloatW b )
+{
+	return vsubq_f32( a, b );
+}
+
+static inline b2FloatW b2MulW( b2FloatW a, b2FloatW b )
+{
+	return vmulq_f32( a, b );
+}
+
+static inline b2FloatW b2MulAddW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	return vmlaq_f32( a, b, c );
+}
+
+static inline b2FloatW b2MulSubW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	return vmlsq_f32( a, b, c );
+}
+
+static inline b2FloatW b2MinW( b2FloatW a, b2FloatW b )
+{
+	return vminq_f32( a, b );
+}
+
+static inline b2FloatW b2MaxW( b2FloatW a, b2FloatW b )
+{
+	return vmaxq_f32( a, b );
+}
+
+// a = clamp(a, -b, b)
+static inline b2FloatW b2SymClampW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW nb = vnegq_f32( b );
+	return vmaxq_f32( nb, vminq_f32( a, b ) );
+}
+
+static inline b2FloatW b2OrW( b2FloatW a, b2FloatW b )
+{
+	return vreinterpretq_f32_u32( vorrq_u32( vreinterpretq_u32_f32( a ), vreinterpretq_u32_f32( b ) ) );
+}
+
+static inline b2FloatW b2GreaterThanW( b2FloatW a, b2FloatW b )
+{
+	return vreinterpretq_f32_u32( vcgtq_f32( a, b ) );
+}
+
+static inline b2FloatW b2EqualsW( b2FloatW a, b2FloatW b )
+{
+	return vreinterpretq_f32_u32( vceqq_f32( a, b ) );
+}
+
+static inline bool b2AllZeroW( b2FloatW a )
+{
+	// Create a zero vector for comparison
+	b2FloatW zero = vdupq_n_f32( 0.0f );
+
+	// Compare the input vector with zero
+	uint32x4_t cmp_result = vceqq_f32( a, zero );
+
+// Check if all comparison results are non-zero using vminvq
+#ifdef __ARM_FEATURE_SVE
+	// ARM v8.2+ has horizontal minimum instruction
+	return vminvq_u32( cmp_result ) != 0;
+#else
+	// For older ARM architectures, we need to manually check all lanes
+	return vgetq_lane_u32( cmp_result, 0 ) != 0 && vgetq_lane_u32( cmp_result, 1 ) != 0 && vgetq_lane_u32( cmp_result, 2 ) != 0 &&
+		   vgetq_lane_u32( cmp_result, 3 ) != 0;
+#endif
+}
+
+// component-wise returns mask ? b : a
+static inline b2FloatW b2BlendW( b2FloatW a, b2FloatW b, b2FloatW mask )
+{
+	uint32x4_t mask32 = vreinterpretq_u32_f32( mask );
+	return vbslq_f32( mask32, b, a );
+}
+
+static inline b2FloatW b2LoadW( const float32_t* data )
+{
+	return vld1q_f32( data );
+}
+
+static inline void b2StoreW( float32_t* data, b2FloatW a )
+{
+	vst1q_f32( data, a );
+}
+
+static inline b2FloatW b2UnpackLoW( b2FloatW a, b2FloatW b )
+{
+#if defined( __aarch64__ )
+	return vzip1q_f32( a, b );
+#else
+	float32x2_t a1 = vget_low_f32( a );
+	float32x2_t b1 = vget_low_f32( b );
+	float32x2x2_t result = vzip_f32( a1, b1 );
+	return vcombine_f32( result.val[0], result.val[1] );
+#endif
+}
+
+static inline b2FloatW b2UnpackHiW( b2FloatW a, b2FloatW b )
+{
+#if defined( __aarch64__ )
+	return vzip2q_f32( a, b );
+#else
+	float32x2_t a1 = vget_high_f32( a );
+	float32x2_t b1 = vget_high_f32( b );
+	float32x2x2_t result = vzip_f32( a1, b1 );
+	return vcombine_f32( result.val[0], result.val[1] );
+#endif
+}
+
+#elif defined( B2_SIMD_SSE2 )
+
+static inline b2FloatW b2ZeroW( void )
+{
+	return _mm_setzero_ps();
+}
+
+static inline b2FloatW b2SplatW( float scalar )
+{
+	return _mm_set1_ps( scalar );
+}
+
+static inline b2FloatW b2SetW( float a, float b, float c, float d )
+{
+	return _mm_setr_ps( a, b, c, d );
+}
+
+static inline b2FloatW b2AddW( b2FloatW a, b2FloatW b )
+{
+	return _mm_add_ps( a, b );
+}
+
+static inline b2FloatW b2SubW( b2FloatW a, b2FloatW b )
+{
+	return _mm_sub_ps( a, b );
+}
+
+static inline b2FloatW b2MulW( b2FloatW a, b2FloatW b )
+{
+	return _mm_mul_ps( a, b );
+}
+
+static inline b2FloatW b2MulAddW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	return _mm_add_ps( a, _mm_mul_ps( b, c ) );
+}
+
+static inline b2FloatW b2MulSubW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	return _mm_sub_ps( a, _mm_mul_ps( b, c ) );
+}
+
+static inline b2FloatW b2MinW( b2FloatW a, b2FloatW b )
+{
+	return _mm_min_ps( a, b );
+}
+
+static inline b2FloatW b2MaxW( b2FloatW a, b2FloatW b )
+{
+	return _mm_max_ps( a, b );
+}
+
+// a = clamp(a, -b, b)
+static inline b2FloatW b2SymClampW( b2FloatW a, b2FloatW b )
+{
+	// Create a mask with the sign bit set for each element
+	__m128 mask = _mm_set1_ps( -0.0f );
+
+	// XOR the input with the mask to negate each element
+	__m128 nb = _mm_xor_ps( b, mask );
+
+	return _mm_max_ps( nb, _mm_min_ps( a, b ) );
+}
+
+static inline b2FloatW b2OrW( b2FloatW a, b2FloatW b )
+{
+	return _mm_or_ps( a, b );
+}
+
+static inline b2FloatW b2GreaterThanW( b2FloatW a, b2FloatW b )
+{
+	return _mm_cmpgt_ps( a, b );
+}
+
+static inline b2FloatW b2EqualsW( b2FloatW a, b2FloatW b )
+{
+	return _mm_cmpeq_ps( a, b );
+}
+
+static inline bool b2AllZeroW( b2FloatW a )
+{
+	// Compare each element with zero
+	b2FloatW zero = _mm_setzero_ps();
+	b2FloatW cmp = _mm_cmpeq_ps( a, zero );
+
+	// Create a mask from the comparison results
+	int mask = _mm_movemask_ps( cmp );
+
+	// If all elements are zero, the mask will be 0xF (1111 in binary)
+	return mask == 0xF;
+}
+
+// component-wise returns mask ? b : a
+static inline b2FloatW b2BlendW( b2FloatW a, b2FloatW b, b2FloatW mask )
+{
+	return _mm_or_ps( _mm_and_ps( mask, b ), _mm_andnot_ps( mask, a ) );
+}
+
+static inline b2FloatW b2LoadW( const float* data )
+{
+	return _mm_load_ps( data );
+}
+
+static inline void b2StoreW( float* data, b2FloatW a )
+{
+	_mm_store_ps( data, a );
+}
+
+static inline b2FloatW b2UnpackLoW( b2FloatW a, b2FloatW b )
+{
+	return _mm_unpacklo_ps( a, b );
+}
+
+static inline b2FloatW b2UnpackHiW( b2FloatW a, b2FloatW b )
+{
+	return _mm_unpackhi_ps( a, b );
+}
+
+#else
+
+static inline b2FloatW b2ZeroW( void )
+{
+	return (b2FloatW){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+static inline b2FloatW b2SplatW( float scalar )
+{
+	return (b2FloatW){ scalar, scalar, scalar, scalar };
+}
+
+static inline b2FloatW b2AddW( b2FloatW a, b2FloatW b )
+{
+	return (b2FloatW){ a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w };
+}
+
+static inline b2FloatW b2SubW( b2FloatW a, b2FloatW b )
+{
+	return (b2FloatW){ a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w };
+}
+
+static inline b2FloatW b2MulW( b2FloatW a, b2FloatW b )
+{
+	return (b2FloatW){ a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w };
+}
+
+static inline b2FloatW b2MulAddW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	return (b2FloatW){ a.x + b.x * c.x, a.y + b.y * c.y, a.z + b.z * c.z, a.w + b.w * c.w };
+}
+
+static inline b2FloatW b2MulSubW( b2FloatW a, b2FloatW b, b2FloatW c )
+{
+	return (b2FloatW){ a.x - b.x * c.x, a.y - b.y * c.y, a.z - b.z * c.z, a.w - b.w * c.w };
+}
+
+static inline b2FloatW b2MinW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW r;
+	r.x = a.x <= b.x ? a.x : b.x;
+	r.y = a.y <= b.y ? a.y : b.y;
+	r.z = a.z <= b.z ? a.z : b.z;
+	r.w = a.w <= b.w ? a.w : b.w;
+	return r;
+}
+
+static inline b2FloatW b2MaxW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW r;
+	r.x = a.x >= b.x ? a.x : b.x;
+	r.y = a.y >= b.y ? a.y : b.y;
+	r.z = a.z >= b.z ? a.z : b.z;
+	r.w = a.w >= b.w ? a.w : b.w;
+	return r;
+}
+
+// a = clamp(a, -b, b)
+static inline b2FloatW b2SymClampW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW r;
+	r.x = b2ClampFloat( a.x, -b.x, b.x );
+	r.y = b2ClampFloat( a.y, -b.y, b.y );
+	r.z = b2ClampFloat( a.z, -b.z, b.z );
+	r.w = b2ClampFloat( a.w, -b.w, b.w );
+	return r;
+}
+
+static inline b2FloatW b2OrW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW r;
+	r.x = a.x != 0.0f || b.x != 0.0f ? 1.0f : 0.0f;
+	r.y = a.y != 0.0f || b.y != 0.0f ? 1.0f : 0.0f;
+	r.z = a.z != 0.0f || b.z != 0.0f ? 1.0f : 0.0f;
+	r.w = a.w != 0.0f || b.w != 0.0f ? 1.0f : 0.0f;
+	return r;
+}
+
+static inline b2FloatW b2GreaterThanW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW r;
+	r.x = a.x > b.x ? 1.0f : 0.0f;
+	r.y = a.y > b.y ? 1.0f : 0.0f;
+	r.z = a.z > b.z ? 1.0f : 0.0f;
+	r.w = a.w > b.w ? 1.0f : 0.0f;
+	return r;
+}
+
+static inline b2FloatW b2EqualsW( b2FloatW a, b2FloatW b )
+{
+	b2FloatW r;
+	r.x = a.x == b.x ? 1.0f : 0.0f;
+	r.y = a.y == b.y ? 1.0f : 0.0f;
+	r.z = a.z == b.z ? 1.0f : 0.0f;
+	r.w = a.w == b.w ? 1.0f : 0.0f;
+	return r;
+}
+
+static inline bool b2AllZeroW( b2FloatW a )
+{
+	return a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f;
+}
+
+// component-wise returns mask ? b : a
+static inline b2FloatW b2BlendW( b2FloatW a, b2FloatW b, b2FloatW mask )
+{
+	b2FloatW r;
+	r.x = mask.x != 0.0f ? b.x : a.x;
+	r.y = mask.y != 0.0f ? b.y : a.y;
+	r.z = mask.z != 0.0f ? b.z : a.z;
+	r.w = mask.w != 0.0f ? b.w : a.w;
+	return r;
+}
+
+#endif
+
+static inline b2FloatW b2DotW( b2Vec2W a, b2Vec2W b )
+{
+	return b2AddW( b2MulW( a.X, b.X ), b2MulW( a.Y, b.Y ) );
+}
+
+static inline b2FloatW b2CrossW( b2Vec2W a, b2Vec2W b )
+{
+	return b2SubW( b2MulW( a.X, b.Y ), b2MulW( a.Y, b.X ) );
+}
+
+static inline b2Vec2W b2RotateVectorW( b2RotW q, b2Vec2W v )
+{
+	return (b2Vec2W){ b2SubW( b2MulW( q.C, v.X ), b2MulW( q.S, v.Y ) ), b2AddW( b2MulW( q.S, v.X ), b2MulW( q.C, v.Y ) ) };
+}
+
+// Soft contact constraints with sub-stepping support
+// Uses fixed anchors for Jacobians for better behavior on rolling shapes (circles & capsules)
+// http://mmacklin.com/smallsteps.pdf
+// https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf
+
+typedef struct b2ContactConstraintSIMD
+{
+	int indexA[B2_SIMD_WIDTH];
+	int indexB[B2_SIMD_WIDTH];
+
+	b2FloatW invMassA, invMassB;
+	b2FloatW invIA, invIB;
+	b2Vec2W normal;
+	b2FloatW friction;
+	b2FloatW tangentSpeed;
+	b2FloatW rollingResistance;
+	b2FloatW rollingMass;
+	b2FloatW rollingImpulse;
+	b2FloatW biasRate;
+	b2FloatW massScale;
+	b2FloatW impulseScale;
+	b2Vec2W anchorA1, anchorB1;
+	b2FloatW normalMass1, tangentMass1;
+	b2FloatW baseSeparation1;
+	b2FloatW normalImpulse1;
+	b2FloatW totalNormalImpulse1;
+	b2FloatW tangentImpulse1;
+	b2Vec2W anchorA2, anchorB2;
+	b2FloatW baseSeparation2;
+	b2FloatW normalImpulse2;
+	b2FloatW totalNormalImpulse2;
+	b2FloatW tangentImpulse2;
+	b2FloatW normalMass2, tangentMass2;
+	b2FloatW restitution;
+	b2FloatW relativeVelocity1, relativeVelocity2;
+} b2ContactConstraintSIMD;
+
+int b2GetContactConstraintSIMDByteCount( void )
+{
+	return sizeof( b2ContactConstraintSIMD );
+}
+
+// wide version of b2BodyState
+typedef struct b2BodyStateW
+{
+	b2Vec2W v;
+	b2FloatW w;
+	b2FloatW flags;
+	b2Vec2W dp;
+	b2RotW dq;
+} b2BodyStateW;
+
+// Custom gather/scatter for each SIMD type
+#if defined( B2_SIMD_AVX2 )
+
+// This is a load and 8x8 transpose
+static b2BodyStateW b2GatherBodies( const b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices )
+{
+	_Static_assert( sizeof( b2BodyState ) == 32, "b2BodyState not 32 bytes" );
+	B2_ASSERT( ( (uintptr_t)states & 0x1F ) == 0 );
+	// b2BodyState b2_identityBodyState = {{0.0f, 0.0f}, 0.0f, 0, {0.0f, 0.0f}, {1.0f, 0.0f}};
+	b2FloatW identity = _mm256_setr_ps( 0.0f, 0.0f, 0.0f, 0, 0.0f, 0.0f, 1.0f, 0.0f );
+	b2FloatW b0 = indices[0] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[0] ) );
+	b2FloatW b1 = indices[1] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[1] ) );
+	b2FloatW b2 = indices[2] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[2] ) );
+	b2FloatW b3 = indices[3] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[3] ) );
+	b2FloatW b4 = indices[4] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[4] ) );
+	b2FloatW b5 = indices[5] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[5] ) );
+	b2FloatW b6 = indices[6] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[6] ) );
+	b2FloatW b7 = indices[7] == B2_NULL_INDEX ? identity : _mm256_load_ps( (float*)( states + indices[7] ) );
+
+	b2FloatW t0 = _mm256_unpacklo_ps( b0, b1 );
+	b2FloatW t1 = _mm256_unpackhi_ps( b0, b1 );
+	b2FloatW t2 = _mm256_unpacklo_ps( b2, b3 );
+	b2FloatW t3 = _mm256_unpackhi_ps( b2, b3 );
+	b2FloatW t4 = _mm256_unpacklo_ps( b4, b5 );
+	b2FloatW t5 = _mm256_unpackhi_ps( b4, b5 );
+	b2FloatW t6 = _mm256_unpacklo_ps( b6, b7 );
+	b2FloatW t7 = _mm256_unpackhi_ps( b6, b7 );
+	b2FloatW tt0 = _mm256_shuffle_ps( t0, t2, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt1 = _mm256_shuffle_ps( t0, t2, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+	b2FloatW tt2 = _mm256_shuffle_ps( t1, t3, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt3 = _mm256_shuffle_ps( t1, t3, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+	b2FloatW tt4 = _mm256_shuffle_ps( t4, t6, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt5 = _mm256_shuffle_ps( t4, t6, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+	b2FloatW tt6 = _mm256_shuffle_ps( t5, t7, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt7 = _mm256_shuffle_ps( t5, t7, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+
+	b2BodyStateW simdBody;
+	simdBody.v.X = _mm256_permute2f128_ps( tt0, tt4, 0x20 );
+	simdBody.v.Y = _mm256_permute2f128_ps( tt1, tt5, 0x20 );
+	simdBody.w = _mm256_permute2f128_ps( tt2, tt6, 0x20 );
+	simdBody.flags = _mm256_permute2f128_ps( tt3, tt7, 0x20 );
+	simdBody.dp.X = _mm256_permute2f128_ps( tt0, tt4, 0x31 );
+	simdBody.dp.Y = _mm256_permute2f128_ps( tt1, tt5, 0x31 );
+	simdBody.dq.C = _mm256_permute2f128_ps( tt2, tt6, 0x31 );
+	simdBody.dq.S = _mm256_permute2f128_ps( tt3, tt7, 0x31 );
+	return simdBody;
+}
+
+// This writes everything back to the solver bodies but only the velocities change
+static void b2ScatterBodies( b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices, const b2BodyStateW* B2_RESTRICT simdBody )
+{
+	_Static_assert( sizeof( b2BodyState ) == 32, "b2BodyState not 32 bytes" );
+	B2_ASSERT( ( (uintptr_t)states & 0x1F ) == 0 );
+	b2FloatW t0 = _mm256_unpacklo_ps( simdBody->v.X, simdBody->v.Y );
+	b2FloatW t1 = _mm256_unpackhi_ps( simdBody->v.X, simdBody->v.Y );
+	b2FloatW t2 = _mm256_unpacklo_ps( simdBody->w, simdBody->flags );
+	b2FloatW t3 = _mm256_unpackhi_ps( simdBody->w, simdBody->flags );
+	b2FloatW t4 = _mm256_unpacklo_ps( simdBody->dp.X, simdBody->dp.Y );
+	b2FloatW t5 = _mm256_unpackhi_ps( simdBody->dp.X, simdBody->dp.Y );
+	b2FloatW t6 = _mm256_unpacklo_ps( simdBody->dq.C, simdBody->dq.S );
+	b2FloatW t7 = _mm256_unpackhi_ps( simdBody->dq.C, simdBody->dq.S );
+	b2FloatW tt0 = _mm256_shuffle_ps( t0, t2, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt1 = _mm256_shuffle_ps( t0, t2, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+	b2FloatW tt2 = _mm256_shuffle_ps( t1, t3, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt3 = _mm256_shuffle_ps( t1, t3, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+	b2FloatW tt4 = _mm256_shuffle_ps( t4, t6, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt5 = _mm256_shuffle_ps( t4, t6, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+	b2FloatW tt6 = _mm256_shuffle_ps( t5, t7, _MM_SHUFFLE( 1, 0, 1, 0 ) );
+	b2FloatW tt7 = _mm256_shuffle_ps( t5, t7, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+
+	// I don't use any dummy body in the body array because this will lead to multithreaded sharing and the
+	// associated cache flushing.
+	if ( indices[0] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[0] ), _mm256_permute2f128_ps( tt0, tt4, 0x20 ) );
+	if ( indices[1] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[1] ), _mm256_permute2f128_ps( tt1, tt5, 0x20 ) );
+	if ( indices[2] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[2] ), _mm256_permute2f128_ps( tt2, tt6, 0x20 ) );
+	if ( indices[3] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[3] ), _mm256_permute2f128_ps( tt3, tt7, 0x20 ) );
+	if ( indices[4] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[4] ), _mm256_permute2f128_ps( tt0, tt4, 0x31 ) );
+	if ( indices[5] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[5] ), _mm256_permute2f128_ps( tt1, tt5, 0x31 ) );
+	if ( indices[6] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[6] ), _mm256_permute2f128_ps( tt2, tt6, 0x31 ) );
+	if ( indices[7] != B2_NULL_INDEX )
+		_mm256_store_ps( (float*)( states + indices[7] ), _mm256_permute2f128_ps( tt3, tt7, 0x31 ) );
+}
+
+#elif defined( B2_SIMD_NEON )
+
+// This is a load and transpose
+static b2BodyStateW b2GatherBodies( const b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices )
+{
+	_Static_assert( sizeof( b2BodyState ) == 32, "b2BodyState not 32 bytes" );
+	B2_ASSERT( ( (uintptr_t)states & 0x1F ) == 0 );
+
+	// [vx vy w flags]
+	b2FloatW identityA = b2ZeroW();
+
+	// [dpx dpy dqc dqs]
+
+	b2FloatW identityB = b2SetW( 0.0f, 0.0f, 1.0f, 0.0f );
+
+	b2FloatW b1a = indices[0] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[0] ) + 0 );
+	b2FloatW b1b = indices[0] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[0] ) + 4 );
+	b2FloatW b2a = indices[1] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[1] ) + 0 );
+	b2FloatW b2b = indices[1] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[1] ) + 4 );
+	b2FloatW b3a = indices[2] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[2] ) + 0 );
+	b2FloatW b3b = indices[2] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[2] ) + 4 );
+	b2FloatW b4a = indices[3] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[3] ) + 0 );
+	b2FloatW b4b = indices[3] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[3] ) + 4 );
+
+	// [vx1 vx3 vy1 vy3]
+	b2FloatW t1a = b2UnpackLoW( b1a, b3a );
+
+	// [vx2 vx4 vy2 vy4]
+	b2FloatW t2a = b2UnpackLoW( b2a, b4a );
+
+	// [w1 w3 f1 f3]
+	b2FloatW t3a = b2UnpackHiW( b1a, b3a );
+
+	// [w2 w4 f2 f4]
+	b2FloatW t4a = b2UnpackHiW( b2a, b4a );
+
+	b2BodyStateW simdBody;
+	simdBody.v.X = b2UnpackLoW( t1a, t2a );
+	simdBody.v.Y = b2UnpackHiW( t1a, t2a );
+	simdBody.w = b2UnpackLoW( t3a, t4a );
+	simdBody.flags = b2UnpackHiW( t3a, t4a );
+
+	b2FloatW t1b = b2UnpackLoW( b1b, b3b );
+	b2FloatW t2b = b2UnpackLoW( b2b, b4b );
+	b2FloatW t3b = b2UnpackHiW( b1b, b3b );
+	b2FloatW t4b = b2UnpackHiW( b2b, b4b );
+
+	simdBody.dp.X = b2UnpackLoW( t1b, t2b );
+	simdBody.dp.Y = b2UnpackHiW( t1b, t2b );
+	simdBody.dq.C = b2UnpackLoW( t3b, t4b );
+	simdBody.dq.S = b2UnpackHiW( t3b, t4b );
+
+	return simdBody;
+}
+
+// This writes only the velocities back to the solver bodies
+// https://developer.arm.com/documentation/102107a/0100/Floating-point-4x4-matrix-transposition
+static void b2ScatterBodies( b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices, const b2BodyStateW* B2_RESTRICT simdBody )
+{
+	_Static_assert( sizeof( b2BodyState ) == 32, "b2BodyState not 32 bytes" );
+	B2_ASSERT( ( (uintptr_t)states & 0x1F ) == 0 );
+
+	//	b2FloatW x = b2SetW(0.0f, 1.0f, 2.0f, 3.0f);
+	//	b2FloatW y = b2SetW(4.0f, 5.0f, 6.0f, 7.0f);
+	//	b2FloatW z = b2SetW(8.0f, 9.0f, 10.0f, 11.0f);
+	//	b2FloatW w = b2SetW(12.0f, 13.0f, 14.0f, 15.0f);
+	//
+	//	float32x4x2_t rr1 = vtrnq_f32( x, y );
+	//	float32x4x2_t rr2 = vtrnq_f32( z, w );
+	//
+	//	float32x4_t b1 = vcombine_f32(vget_low_f32(rr1.val[0]), vget_low_f32(rr2.val[0]));
+	//	float32x4_t b2 = vcombine_f32(vget_low_f32(rr1.val[1]), vget_low_f32(rr2.val[1]));
+	//	float32x4_t b3 = vcombine_f32(vget_high_f32(rr1.val[0]), vget_high_f32(rr2.val[0]));
+	//	float32x4_t b4 = vcombine_f32(vget_high_f32(rr1.val[1]), vget_high_f32(rr2.val[1]));
+
+	// transpose
+	float32x4x2_t r1 = vtrnq_f32( simdBody->v.X, simdBody->v.Y );
+	float32x4x2_t r2 = vtrnq_f32( simdBody->w, simdBody->flags );
+
+	// I don't use any dummy body in the body array because this will lead to multithreaded sharing and the
+	// associated cache flushing.
+	if ( indices[0] != B2_NULL_INDEX )
+	{
+		float32x4_t body1 = vcombine_f32( vget_low_f32( r1.val[0] ), vget_low_f32( r2.val[0] ) );
+		b2StoreW( (float*)( states + indices[0] ), body1 );
+	}
+
+	if ( indices[1] != B2_NULL_INDEX )
+	{
+		float32x4_t body2 = vcombine_f32( vget_low_f32( r1.val[1] ), vget_low_f32( r2.val[1] ) );
+		b2StoreW( (float*)( states + indices[1] ), body2 );
+	}
+
+	if ( indices[2] != B2_NULL_INDEX )
+	{
+		float32x4_t body3 = vcombine_f32( vget_high_f32( r1.val[0] ), vget_high_f32( r2.val[0] ) );
+		b2StoreW( (float*)( states + indices[2] ), body3 );
+	}
+
+	if ( indices[3] != B2_NULL_INDEX )
+	{
+		float32x4_t body4 = vcombine_f32( vget_high_f32( r1.val[1] ), vget_high_f32( r2.val[1] ) );
+		b2StoreW( (float*)( states + indices[3] ), body4 );
+	}
+}
+
+#elif defined( B2_SIMD_SSE2 )
+
+// This is a load and transpose
+static b2BodyStateW b2GatherBodies( const b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices )
+{
+	_Static_assert( sizeof( b2BodyState ) == 32, "b2BodyState not 32 bytes" );
+	B2_ASSERT( ( (uintptr_t)states & 0x1F ) == 0 );
+
+	// [vx vy w flags]
+	b2FloatW identityA = b2ZeroW();
+
+	// [dpx dpy dqc dqs]
+	b2FloatW identityB = b2SetW( 0.0f, 0.0f, 1.0f, 0.0f );
+
+	b2FloatW b1a = indices[0] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[0] ) + 0 );
+	b2FloatW b1b = indices[0] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[0] ) + 4 );
+	b2FloatW b2a = indices[1] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[1] ) + 0 );
+	b2FloatW b2b = indices[1] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[1] ) + 4 );
+	b2FloatW b3a = indices[2] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[2] ) + 0 );
+	b2FloatW b3b = indices[2] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[2] ) + 4 );
+	b2FloatW b4a = indices[3] == B2_NULL_INDEX ? identityA : b2LoadW( (float*)( states + indices[3] ) + 0 );
+	b2FloatW b4b = indices[3] == B2_NULL_INDEX ? identityB : b2LoadW( (float*)( states + indices[3] ) + 4 );
+
+	// [vx1 vx3 vy1 vy3]
+	b2FloatW t1a = b2UnpackLoW( b1a, b3a );
+
+	// [vx2 vx4 vy2 vy4]
+	b2FloatW t2a = b2UnpackLoW( b2a, b4a );
+
+	// [w1 w3 f1 f3]
+	b2FloatW t3a = b2UnpackHiW( b1a, b3a );
+
+	// [w2 w4 f2 f4]
+	b2FloatW t4a = b2UnpackHiW( b2a, b4a );
+
+	b2BodyStateW simdBody;
+	simdBody.v.X = b2UnpackLoW( t1a, t2a );
+	simdBody.v.Y = b2UnpackHiW( t1a, t2a );
+	simdBody.w = b2UnpackLoW( t3a, t4a );
+	simdBody.flags = b2UnpackHiW( t3a, t4a );
+
+	b2FloatW t1b = b2UnpackLoW( b1b, b3b );
+	b2FloatW t2b = b2UnpackLoW( b2b, b4b );
+	b2FloatW t3b = b2UnpackHiW( b1b, b3b );
+	b2FloatW t4b = b2UnpackHiW( b2b, b4b );
+
+	simdBody.dp.X = b2UnpackLoW( t1b, t2b );
+	simdBody.dp.Y = b2UnpackHiW( t1b, t2b );
+	simdBody.dq.C = b2UnpackLoW( t3b, t4b );
+	simdBody.dq.S = b2UnpackHiW( t3b, t4b );
+
+	return simdBody;
+}
+
+// This writes only the velocities back to the solver bodies
+static void b2ScatterBodies( b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices, const b2BodyStateW* B2_RESTRICT simdBody )
+{
+	_Static_assert( sizeof( b2BodyState ) == 32, "b2BodyState not 32 bytes" );
+	B2_ASSERT( ( (uintptr_t)states & 0x1F ) == 0 );
+
+	// [vx1 vy1 vx2 vy2]
+	b2FloatW t1 = b2UnpackLoW( simdBody->v.X, simdBody->v.Y );
+	// [vx3 vy3 vx4 vy4]
+	b2FloatW t2 = b2UnpackHiW( simdBody->v.X, simdBody->v.Y );
+	// [w1 f1 w2 f2]
+	b2FloatW t3 = b2UnpackLoW( simdBody->w, simdBody->flags );
+	// [w3 f3 w4 f4]
+	b2FloatW t4 = b2UnpackHiW( simdBody->w, simdBody->flags );
+
+	// I don't use any dummy body in the body array because this will lead to multithreaded sharing and the
+	// associated cache flushing.
+	if ( indices[0] != B2_NULL_INDEX )
+	{
+		// [t1.x t1.y t3.x t3.y]
+		b2StoreW( (float*)( states + indices[0] ), _mm_shuffle_ps( t1, t3, _MM_SHUFFLE( 1, 0, 1, 0 ) ) );
+	}
+
+	if ( indices[1] != B2_NULL_INDEX )
+	{
+		// [t1.z t1.w t3.z t3.w]
+		b2StoreW( (float*)( states + indices[1] ), _mm_shuffle_ps( t1, t3, _MM_SHUFFLE( 3, 2, 3, 2 ) ) );
+	}
+
+	if ( indices[2] != B2_NULL_INDEX )
+	{
+		// [t2.x t2.y t4.x t4.y]
+		b2StoreW( (float*)( states + indices[2] ), _mm_shuffle_ps( t2, t4, _MM_SHUFFLE( 1, 0, 1, 0 ) ) );
+	}
+
+	if ( indices[3] != B2_NULL_INDEX )
+	{
+		// [t2.z t2.w t4.z t4.w]
+		b2StoreW( (float*)( states + indices[3] ), _mm_shuffle_ps( t2, t4, _MM_SHUFFLE( 3, 2, 3, 2 ) ) );
+	}
+}
+
+#else
+
+// This is a load and transpose
+static b2BodyStateW b2GatherBodies( const b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices )
+{
+	b2BodyState identity = b2_identityBodyState;
+
+	b2BodyState s1 = indices[0] == B2_NULL_INDEX ? identity : states[indices[0]];
+	b2BodyState s2 = indices[1] == B2_NULL_INDEX ? identity : states[indices[1]];
+	b2BodyState s3 = indices[2] == B2_NULL_INDEX ? identity : states[indices[2]];
+	b2BodyState s4 = indices[3] == B2_NULL_INDEX ? identity : states[indices[3]];
+
+	b2BodyStateW simdBody;
+	simdBody.v.X = (b2FloatW){ s1.linearVelocity.x, s2.linearVelocity.x, s3.linearVelocity.x, s4.linearVelocity.x };
+	simdBody.v.Y = (b2FloatW){ s1.linearVelocity.y, s2.linearVelocity.y, s3.linearVelocity.y, s4.linearVelocity.y };
+	simdBody.w = (b2FloatW){ s1.angularVelocity, s2.angularVelocity, s3.angularVelocity, s4.angularVelocity };
+	simdBody.flags = (b2FloatW){ (float)s1.flags, (float)s2.flags, (float)s3.flags, (float)s4.flags };
+	simdBody.dp.X = (b2FloatW){ s1.deltaPosition.x, s2.deltaPosition.x, s3.deltaPosition.x, s4.deltaPosition.x };
+	simdBody.dp.Y = (b2FloatW){ s1.deltaPosition.y, s2.deltaPosition.y, s3.deltaPosition.y, s4.deltaPosition.y };
+	simdBody.dq.C = (b2FloatW){ s1.deltaRotation.c, s2.deltaRotation.c, s3.deltaRotation.c, s4.deltaRotation.c };
+	simdBody.dq.S = (b2FloatW){ s1.deltaRotation.s, s2.deltaRotation.s, s3.deltaRotation.s, s4.deltaRotation.s };
+
+	return simdBody;
+}
+
+// This writes only the velocities back to the solver bodies
+static void b2ScatterBodies( b2BodyState* B2_RESTRICT states, int* B2_RESTRICT indices, const b2BodyStateW* B2_RESTRICT simdBody )
+{
+	// todo somehow skip writing to kinematic bodies
+
+	if ( indices[0] != B2_NULL_INDEX )
+	{
+		b2BodyState* state = states + indices[0];
+		state->linearVelocity.x = simdBody->v.X.x;
+		state->linearVelocity.y = simdBody->v.Y.x;
+		state->angularVelocity = simdBody->w.x;
+	}
+
+	if ( indices[1] != B2_NULL_INDEX )
+	{
+		b2BodyState* state = states + indices[1];
+		state->linearVelocity.x = simdBody->v.X.y;
+		state->linearVelocity.y = simdBody->v.Y.y;
+		state->angularVelocity = simdBody->w.y;
+	}
+
+	if ( indices[2] != B2_NULL_INDEX )
+	{
+		b2BodyState* state = states + indices[2];
+		state->linearVelocity.x = simdBody->v.X.z;
+		state->linearVelocity.y = simdBody->v.Y.z;
+		state->angularVelocity = simdBody->w.z;
+	}
+
+	if ( indices[3] != B2_NULL_INDEX )
+	{
+		b2BodyState* state = states + indices[3];
+		state->linearVelocity.x = simdBody->v.X.w;
+		state->linearVelocity.y = simdBody->v.Y.w;
+		state->angularVelocity = simdBody->w.w;
+	}
+}
+
+#endif
+
+void b2PrepareContactsTask( int startIndex, int endIndex, b2StepContext* context )
+{
+	b2TracyCZoneNC( prepare_contact, "Prepare Contact", b2_colorYellow, true );
+	b2World* world = context->world;
+	b2ContactSim** contacts = context->contacts;
+	b2ContactConstraintSIMD* constraints = context->simdContactConstraints;
+	b2BodyState* awakeStates = context->states;
+#if B2_VALIDATE
+	b2Body* bodies = world->bodies.data;
+#endif
+
+	// Stiffer for static contacts to avoid bodies getting pushed through the ground
+	b2Softness contactSoftness = context->contactSoftness;
+	b2Softness staticSoftness = context->staticSoftness;
+
+	float warmStartScale = world->enableWarmStarting ? 1.0f : 0.0f;
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2ContactConstraintSIMD* constraint = constraints + i;
+
+		for ( int j = 0; j < B2_SIMD_WIDTH; ++j )
+		{
+			b2ContactSim* contactSim = contacts[B2_SIMD_WIDTH * i + j];
+
+			if ( contactSim != NULL )
+			{
+				const b2Manifold* manifold = &contactSim->manifold;
+
+				int indexA = contactSim->bodySimIndexA;
+				int indexB = contactSim->bodySimIndexB;
+
+#if B2_VALIDATE
+				b2Body* bodyA = bodies + contactSim->bodyIdA;
+				int validIndexA = bodyA->setIndex == b2_awakeSet ? bodyA->localIndex : B2_NULL_INDEX;
+				b2Body* bodyB = bodies + contactSim->bodyIdB;
+				int validIndexB = bodyB->setIndex == b2_awakeSet ? bodyB->localIndex : B2_NULL_INDEX;
+
+				B2_ASSERT( indexA == validIndexA );
+				B2_ASSERT( indexB == validIndexB );
+#endif
+				constraint->indexA[j] = indexA;
+				constraint->indexB[j] = indexB;
+
+				b2Vec2 vA = b2Vec2_zero;
+				float wA = 0.0f;
+				float mA = contactSim->invMassA;
+				float iA = contactSim->invIA;
+				if ( indexA != B2_NULL_INDEX )
+				{
+					b2BodyState* stateA = awakeStates + indexA;
+					vA = stateA->linearVelocity;
+					wA = stateA->angularVelocity;
+				}
+
+				b2Vec2 vB = b2Vec2_zero;
+				float wB = 0.0f;
+				float mB = contactSim->invMassB;
+				float iB = contactSim->invIB;
+				if ( indexB != B2_NULL_INDEX )
+				{
+					b2BodyState* stateB = awakeStates + indexB;
+					vB = stateB->linearVelocity;
+					wB = stateB->angularVelocity;
+				}
+
+				( (float*)&constraint->invMassA )[j] = mA;
+				( (float*)&constraint->invMassB )[j] = mB;
+				( (float*)&constraint->invIA )[j] = iA;
+				( (float*)&constraint->invIB )[j] = iB;
+
+				{
+					float k = iA + iB;
+					( (float*)&constraint->rollingMass )[j] = k > 0.0f ? 1.0f / k : 0.0f;
+				}
+
+				b2Softness soft = ( indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX ) ? staticSoftness : contactSoftness;
+
+				b2Vec2 normal = manifold->normal;
+				( (float*)&constraint->normal.X )[j] = normal.x;
+				( (float*)&constraint->normal.Y )[j] = normal.y;
+
+				( (float*)&constraint->friction )[j] = contactSim->friction;
+				( (float*)&constraint->tangentSpeed )[j] = contactSim->tangentSpeed;
+				( (float*)&constraint->restitution )[j] = contactSim->restitution;
+				( (float*)&constraint->rollingResistance )[j] = contactSim->rollingResistance;
+				( (float*)&constraint->rollingImpulse )[j] = warmStartScale * manifold->rollingImpulse;
+
+				( (float*)&constraint->biasRate )[j] = soft.biasRate;
+				( (float*)&constraint->massScale )[j] = soft.massScale;
+				( (float*)&constraint->impulseScale )[j] = soft.impulseScale;
+
+				b2Vec2 tangent = b2RightPerp( normal );
+
+				{
+					const b2ManifoldPoint* mp = manifold->points + 0;
+
+					b2Vec2 rA = mp->anchorA;
+					b2Vec2 rB = mp->anchorB;
+
+					( (float*)&constraint->anchorA1.X )[j] = rA.x;
+					( (float*)&constraint->anchorA1.Y )[j] = rA.y;
+					( (float*)&constraint->anchorB1.X )[j] = rB.x;
+					( (float*)&constraint->anchorB1.Y )[j] = rB.y;
+
+					( (float*)&constraint->baseSeparation1 )[j] = mp->separation - b2Dot( b2Sub( rB, rA ), normal );
+
+					( (float*)&constraint->normalImpulse1 )[j] = warmStartScale * mp->normalImpulse;
+					( (float*)&constraint->tangentImpulse1 )[j] = warmStartScale * mp->tangentImpulse;
+					( (float*)&constraint->totalNormalImpulse1 )[j] = 0.0f;
+
+					float rnA = b2Cross( rA, normal );
+					float rnB = b2Cross( rB, normal );
+					float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB;
+					( (float*)&constraint->normalMass1 )[j] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f;
+
+					float rtA = b2Cross( rA, tangent );
+					float rtB = b2Cross( rB, tangent );
+					float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB;
+					( (float*)&constraint->tangentMass1 )[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f;
+
+					// relative velocity for restitution
+					b2Vec2 vrA = b2Add( vA, b2CrossSV( wA, rA ) );
+					b2Vec2 vrB = b2Add( vB, b2CrossSV( wB, rB ) );
+					( (float*)&constraint->relativeVelocity1 )[j] = b2Dot( normal, b2Sub( vrB, vrA ) );
+				}
+
+				int pointCount = manifold->pointCount;
+				B2_ASSERT( 0 < pointCount && pointCount <= 2 );
+
+				if ( pointCount == 2 )
+				{
+					const b2ManifoldPoint* mp = manifold->points + 1;
+
+					b2Vec2 rA = mp->anchorA;
+					b2Vec2 rB = mp->anchorB;
+
+					( (float*)&constraint->anchorA2.X )[j] = rA.x;
+					( (float*)&constraint->anchorA2.Y )[j] = rA.y;
+					( (float*)&constraint->anchorB2.X )[j] = rB.x;
+					( (float*)&constraint->anchorB2.Y )[j] = rB.y;
+
+					( (float*)&constraint->baseSeparation2 )[j] = mp->separation - b2Dot( b2Sub( rB, rA ), normal );
+
+					( (float*)&constraint->normalImpulse2 )[j] = warmStartScale * mp->normalImpulse;
+					( (float*)&constraint->tangentImpulse2 )[j] = warmStartScale * mp->tangentImpulse;
+					( (float*)&constraint->totalNormalImpulse2 )[j] = 0.0f;
+
+					float rnA = b2Cross( rA, normal );
+					float rnB = b2Cross( rB, normal );
+					float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB;
+					( (float*)&constraint->normalMass2 )[j] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f;
+
+					float rtA = b2Cross( rA, tangent );
+					float rtB = b2Cross( rB, tangent );
+					float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB;
+					( (float*)&constraint->tangentMass2 )[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f;
+
+					// relative velocity for restitution
+					b2Vec2 vrA = b2Add( vA, b2CrossSV( wA, rA ) );
+					b2Vec2 vrB = b2Add( vB, b2CrossSV( wB, rB ) );
+					( (float*)&constraint->relativeVelocity2 )[j] = b2Dot( normal, b2Sub( vrB, vrA ) );
+				}
+				else
+				{
+					// dummy data that has no effect
+					( (float*)&constraint->baseSeparation2 )[j] = 0.0f;
+					( (float*)&constraint->normalImpulse2 )[j] = 0.0f;
+					( (float*)&constraint->tangentImpulse2 )[j] = 0.0f;
+					( (float*)&constraint->totalNormalImpulse2 )[j] = 0.0f;
+					( (float*)&constraint->anchorA2.X )[j] = 0.0f;
+					( (float*)&constraint->anchorA2.Y )[j] = 0.0f;
+					( (float*)&constraint->anchorB2.X )[j] = 0.0f;
+					( (float*)&constraint->anchorB2.Y )[j] = 0.0f;
+					( (float*)&constraint->normalMass2 )[j] = 0.0f;
+					( (float*)&constraint->tangentMass2 )[j] = 0.0f;
+					( (float*)&constraint->relativeVelocity2 )[j] = 0.0f;
+				}
+			}
+			else
+			{
+				// SIMD remainder
+				constraint->indexA[j] = B2_NULL_INDEX;
+				constraint->indexB[j] = B2_NULL_INDEX;
+
+				( (float*)&constraint->invMassA )[j] = 0.0f;
+				( (float*)&constraint->invMassB )[j] = 0.0f;
+				( (float*)&constraint->invIA )[j] = 0.0f;
+				( (float*)&constraint->invIB )[j] = 0.0f;
+
+				( (float*)&constraint->normal.X )[j] = 0.0f;
+				( (float*)&constraint->normal.Y )[j] = 0.0f;
+				( (float*)&constraint->friction )[j] = 0.0f;
+				( (float*)&constraint->tangentSpeed )[j] = 0.0f;
+				( (float*)&constraint->rollingResistance )[j] = 0.0f;
+				( (float*)&constraint->rollingMass )[j] = 0.0f;
+				( (float*)&constraint->rollingImpulse )[j] = 0.0f;
+				( (float*)&constraint->biasRate )[j] = 0.0f;
+				( (float*)&constraint->massScale )[j] = 0.0f;
+				( (float*)&constraint->impulseScale )[j] = 0.0f;
+
+				( (float*)&constraint->anchorA1.X )[j] = 0.0f;
+				( (float*)&constraint->anchorA1.Y )[j] = 0.0f;
+				( (float*)&constraint->anchorB1.X )[j] = 0.0f;
+				( (float*)&constraint->anchorB1.Y )[j] = 0.0f;
+				( (float*)&constraint->baseSeparation1 )[j] = 0.0f;
+				( (float*)&constraint->normalImpulse1 )[j] = 0.0f;
+				( (float*)&constraint->tangentImpulse1 )[j] = 0.0f;
+				( (float*)&constraint->totalNormalImpulse1 )[j] = 0.0f;
+				( (float*)&constraint->normalMass1 )[j] = 0.0f;
+				( (float*)&constraint->tangentMass1 )[j] = 0.0f;
+
+				( (float*)&constraint->anchorA2.X )[j] = 0.0f;
+				( (float*)&constraint->anchorA2.Y )[j] = 0.0f;
+				( (float*)&constraint->anchorB2.X )[j] = 0.0f;
+				( (float*)&constraint->anchorB2.Y )[j] = 0.0f;
+				( (float*)&constraint->baseSeparation2 )[j] = 0.0f;
+				( (float*)&constraint->normalImpulse2 )[j] = 0.0f;
+				( (float*)&constraint->tangentImpulse2 )[j] = 0.0f;
+				( (float*)&constraint->totalNormalImpulse2 )[j] = 0.0f;
+				( (float*)&constraint->normalMass2 )[j] = 0.0f;
+				( (float*)&constraint->tangentMass2 )[j] = 0.0f;
+
+				( (float*)&constraint->restitution )[j] = 0.0f;
+				( (float*)&constraint->relativeVelocity1 )[j] = 0.0f;
+				( (float*)&constraint->relativeVelocity2 )[j] = 0.0f;
+			}
+		}
+	}
+
+	b2TracyCZoneEnd( prepare_contact );
+}
+
+void b2WarmStartContactsTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex )
+{
+	b2TracyCZoneNC( warm_start_contact, "Warm Start", b2_colorGreen, true );
+
+	b2BodyState* states = context->states;
+	b2ContactConstraintSIMD* constraints = context->graph->colors[colorIndex].simdConstraints;
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2ContactConstraintSIMD* c = constraints + i;
+		b2BodyStateW bA = b2GatherBodies( states, c->indexA );
+		b2BodyStateW bB = b2GatherBodies( states, c->indexB );
+
+		b2FloatW tangentX = c->normal.Y;
+		b2FloatW tangentY = b2SubW( b2ZeroW(), c->normal.X );
+
+		{
+			// fixed anchors
+			b2Vec2W rA = c->anchorA1;
+			b2Vec2W rB = c->anchorB1;
+
+			b2Vec2W P;
+			P.X = b2AddW( b2MulW( c->normalImpulse1, c->normal.X ), b2MulW( c->tangentImpulse1, tangentX ) );
+			P.Y = b2AddW( b2MulW( c->normalImpulse1, c->normal.Y ), b2MulW( c->tangentImpulse1, tangentY ) );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2CrossW( rA, P ) );
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, P.X );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, P.Y );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2CrossW( rB, P ) );
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, P.X );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, P.Y );
+		}
+
+		{
+			// fixed anchors
+			b2Vec2W rA = c->anchorA2;
+			b2Vec2W rB = c->anchorB2;
+
+			b2Vec2W P;
+			P.X = b2AddW( b2MulW( c->normalImpulse2, c->normal.X ), b2MulW( c->tangentImpulse2, tangentX ) );
+			P.Y = b2AddW( b2MulW( c->normalImpulse2, c->normal.Y ), b2MulW( c->tangentImpulse2, tangentY ) );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2CrossW( rA, P ) );
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, P.X );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, P.Y );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2CrossW( rB, P ) );
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, P.X );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, P.Y );
+		}
+
+		bA.w = b2MulSubW( bA.w, c->invIA, c->rollingImpulse );
+		bB.w = b2MulAddW( bB.w, c->invIB, c->rollingImpulse );
+
+		b2ScatterBodies( states, c->indexA, &bA );
+		b2ScatterBodies( states, c->indexB, &bB );
+	}
+
+	b2TracyCZoneEnd( warm_start_contact );
+}
+
+void b2SolveContactsTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex, bool useBias )
+{
+	b2TracyCZoneNC( solve_contact, "Solve Contact", b2_colorAliceBlue, true );
+
+	b2BodyState* states = context->states;
+	b2ContactConstraintSIMD* constraints = context->graph->colors[colorIndex].simdConstraints;
+	b2FloatW inv_h = b2SplatW( context->inv_h );
+	b2FloatW minBiasVel = b2SplatW( -context->world->maxContactPushSpeed );
+	b2FloatW oneW = b2SplatW( 1.0f );
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2ContactConstraintSIMD* c = constraints + i;
+
+		b2BodyStateW bA = b2GatherBodies( states, c->indexA );
+		b2BodyStateW bB = b2GatherBodies( states, c->indexB );
+
+		b2FloatW biasRate, massScale, impulseScale;
+		if ( useBias )
+		{
+			biasRate = c->biasRate;
+			massScale = c->massScale;
+			impulseScale = c->impulseScale;
+		}
+		else
+		{
+			biasRate = b2ZeroW();
+			massScale = oneW;
+			impulseScale = b2ZeroW();
+		}
+
+		b2FloatW totalNormalImpulse = b2ZeroW();
+
+		b2Vec2W dp = { b2SubW( bB.dp.X, bA.dp.X ), b2SubW( bB.dp.Y, bA.dp.Y ) };
+
+		// point1 non-penetration constraint
+		{
+			// Fixed anchors for impulses
+			b2Vec2W rA = c->anchorA1;
+			b2Vec2W rB = c->anchorB1;
+
+			// Moving anchors for current separation
+			b2Vec2W rsA = b2RotateVectorW( bA.dq, rA );
+			b2Vec2W rsB = b2RotateVectorW( bB.dq, rB );
+
+			// compute current separation
+			// this is subject to round-off error if the anchor is far from the body center of mass
+			b2Vec2W ds = { b2AddW( dp.X, b2SubW( rsB.X, rsA.X ) ), b2AddW( dp.Y, b2SubW( rsB.Y, rsA.Y ) ) };
+			b2FloatW s = b2AddW( b2DotW( c->normal, ds ), c->baseSeparation1 );
+
+			// Apply speculative bias if separation is greater than zero, otherwise apply soft constraint bias
+			// The minBiasVel is meant to limit stiffness, not increase it.
+			b2FloatW mask = b2GreaterThanW( s, b2ZeroW() );
+			b2FloatW specBias = b2MulW( s, inv_h );
+			b2FloatW softBias = b2MaxW( b2MulW( biasRate, s ), minBiasVel );
+
+			// todo try b2MaxW(softBias, specBias);
+			b2FloatW bias = b2BlendW( softBias, specBias, mask );
+
+			b2FloatW pointMassScale = b2BlendW( massScale, oneW, mask );
+			b2FloatW pointImpulseScale = b2BlendW( impulseScale, b2ZeroW(), mask );
+
+			// Relative velocity at contact
+			b2FloatW dvx = b2SubW( b2SubW( bB.v.X, b2MulW( bB.w, rB.Y ) ), b2SubW( bA.v.X, b2MulW( bA.w, rA.Y ) ) );
+			b2FloatW dvy = b2SubW( b2AddW( bB.v.Y, b2MulW( bB.w, rB.X ) ), b2AddW( bA.v.Y, b2MulW( bA.w, rA.X ) ) );
+			b2FloatW vn = b2AddW( b2MulW( dvx, c->normal.X ), b2MulW( dvy, c->normal.Y ) );
+
+			// Compute normal impulse
+			b2FloatW negImpulse = b2AddW( b2MulW( c->normalMass1, b2MulW( pointMassScale, b2AddW( vn, bias ) ) ),
+										  b2MulW( pointImpulseScale, c->normalImpulse1 ) );
+
+			// Clamp the accumulated impulse
+			b2FloatW newImpulse = b2MaxW( b2SubW( c->normalImpulse1, negImpulse ), b2ZeroW() );
+			b2FloatW impulse = b2SubW( newImpulse, c->normalImpulse1 );
+			c->normalImpulse1 = newImpulse;
+			c->totalNormalImpulse1 = b2AddW( c->totalNormalImpulse1, newImpulse );
+
+			totalNormalImpulse = b2AddW( totalNormalImpulse, newImpulse );
+
+			// Apply contact impulse
+			b2FloatW Px = b2MulW( impulse, c->normal.X );
+			b2FloatW Py = b2MulW( impulse, c->normal.Y );
+
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, Px );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, Py );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2SubW( b2MulW( rA.X, Py ), b2MulW( rA.Y, Px ) ) );
+
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, Px );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, Py );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2SubW( b2MulW( rB.X, Py ), b2MulW( rB.Y, Px ) ) );
+		}
+
+		// second point non-penetration constraint
+		{
+			// moving anchors for current separation
+			b2Vec2W rsA = b2RotateVectorW( bA.dq, c->anchorA2 );
+			b2Vec2W rsB = b2RotateVectorW( bB.dq, c->anchorB2 );
+
+			// compute current separation
+			b2Vec2W ds = { b2AddW( dp.X, b2SubW( rsB.X, rsA.X ) ), b2AddW( dp.Y, b2SubW( rsB.Y, rsA.Y ) ) };
+			b2FloatW s = b2AddW( b2DotW( c->normal, ds ), c->baseSeparation2 );
+
+			b2FloatW mask = b2GreaterThanW( s, b2ZeroW() );
+			b2FloatW specBias = b2MulW( s, inv_h );
+			b2FloatW softBias = b2MaxW( b2MulW( biasRate, s ), minBiasVel );
+			b2FloatW bias = b2BlendW( softBias, specBias, mask );
+
+			b2FloatW pointMassScale = b2BlendW( massScale, oneW, mask );
+			b2FloatW pointImpulseScale = b2BlendW( impulseScale, b2ZeroW(), mask );
+
+			// fixed anchors for Jacobians
+			b2Vec2W rA = c->anchorA2;
+			b2Vec2W rB = c->anchorB2;
+
+			// Relative velocity at contact
+			b2FloatW dvx = b2SubW( b2SubW( bB.v.X, b2MulW( bB.w, rB.Y ) ), b2SubW( bA.v.X, b2MulW( bA.w, rA.Y ) ) );
+			b2FloatW dvy = b2SubW( b2AddW( bB.v.Y, b2MulW( bB.w, rB.X ) ), b2AddW( bA.v.Y, b2MulW( bA.w, rA.X ) ) );
+			b2FloatW vn = b2AddW( b2MulW( dvx, c->normal.X ), b2MulW( dvy, c->normal.Y ) );
+
+			// Compute normal impulse
+			b2FloatW negImpulse = b2AddW( b2MulW( c->normalMass2, b2MulW( pointMassScale, b2AddW( vn, bias ) ) ),
+										  b2MulW( pointImpulseScale, c->normalImpulse2 ) );
+
+			// Clamp the accumulated impulse
+			b2FloatW newImpulse = b2MaxW( b2SubW( c->normalImpulse2, negImpulse ), b2ZeroW() );
+			b2FloatW impulse = b2SubW( newImpulse, c->normalImpulse2 );
+			c->normalImpulse2 = newImpulse;
+			c->totalNormalImpulse2 = b2AddW( c->totalNormalImpulse2, newImpulse );
+
+			totalNormalImpulse = b2AddW( totalNormalImpulse, newImpulse );
+
+			// Apply contact impulse
+			b2FloatW Px = b2MulW( impulse, c->normal.X );
+			b2FloatW Py = b2MulW( impulse, c->normal.Y );
+
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, Px );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, Py );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2SubW( b2MulW( rA.X, Py ), b2MulW( rA.Y, Px ) ) );
+
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, Px );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, Py );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2SubW( b2MulW( rB.X, Py ), b2MulW( rB.Y, Px ) ) );
+		}
+
+		b2FloatW tangentX = c->normal.Y;
+		b2FloatW tangentY = b2SubW( b2ZeroW(), c->normal.X );
+
+		// point 1 friction constraint
+		{
+			// fixed anchors for Jacobians
+			b2Vec2W rA = c->anchorA1;
+			b2Vec2W rB = c->anchorB1;
+
+			// Relative velocity at contact
+			b2FloatW dvx = b2SubW( b2SubW( bB.v.X, b2MulW( bB.w, rB.Y ) ), b2SubW( bA.v.X, b2MulW( bA.w, rA.Y ) ) );
+			b2FloatW dvy = b2SubW( b2AddW( bB.v.Y, b2MulW( bB.w, rB.X ) ), b2AddW( bA.v.Y, b2MulW( bA.w, rA.X ) ) );
+			b2FloatW vt = b2AddW( b2MulW( dvx, tangentX ), b2MulW( dvy, tangentY ) );
+
+			// Tangent speed (conveyor belt)
+			vt = b2SubW( vt, c->tangentSpeed );
+
+			// Compute tangent force
+			b2FloatW negImpulse = b2MulW( c->tangentMass1, vt );
+
+			// Clamp the accumulated force
+			b2FloatW maxFriction = b2MulW( c->friction, c->normalImpulse1 );
+			b2FloatW newImpulse = b2SubW( c->tangentImpulse1, negImpulse );
+			newImpulse = b2MaxW( b2SubW( b2ZeroW(), maxFriction ), b2MinW( newImpulse, maxFriction ) );
+			b2FloatW impulse = b2SubW( newImpulse, c->tangentImpulse1 );
+			c->tangentImpulse1 = newImpulse;
+
+			// Apply contact impulse
+			b2FloatW Px = b2MulW( impulse, tangentX );
+			b2FloatW Py = b2MulW( impulse, tangentY );
+
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, Px );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, Py );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2SubW( b2MulW( rA.X, Py ), b2MulW( rA.Y, Px ) ) );
+
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, Px );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, Py );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2SubW( b2MulW( rB.X, Py ), b2MulW( rB.Y, Px ) ) );
+		}
+
+		// second point friction constraint
+		{
+			// fixed anchors for Jacobians
+			b2Vec2W rA = c->anchorA2;
+			b2Vec2W rB = c->anchorB2;
+
+			// Relative velocity at contact
+			b2FloatW dvx = b2SubW( b2SubW( bB.v.X, b2MulW( bB.w, rB.Y ) ), b2SubW( bA.v.X, b2MulW( bA.w, rA.Y ) ) );
+			b2FloatW dvy = b2SubW( b2AddW( bB.v.Y, b2MulW( bB.w, rB.X ) ), b2AddW( bA.v.Y, b2MulW( bA.w, rA.X ) ) );
+			b2FloatW vt = b2AddW( b2MulW( dvx, tangentX ), b2MulW( dvy, tangentY ) );
+
+			// Tangent speed (conveyor belt)
+			vt = b2SubW( vt, c->tangentSpeed );
+
+			// Compute tangent force
+			b2FloatW negImpulse = b2MulW( c->tangentMass2, vt );
+
+			// Clamp the accumulated force
+			b2FloatW maxFriction = b2MulW( c->friction, c->normalImpulse2 );
+			b2FloatW newImpulse = b2SubW( c->tangentImpulse2, negImpulse );
+			newImpulse = b2MaxW( b2SubW( b2ZeroW(), maxFriction ), b2MinW( newImpulse, maxFriction ) );
+			b2FloatW impulse = b2SubW( newImpulse, c->tangentImpulse2 );
+			c->tangentImpulse2 = newImpulse;
+
+			// Apply contact impulse
+			b2FloatW Px = b2MulW( impulse, tangentX );
+			b2FloatW Py = b2MulW( impulse, tangentY );
+
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, Px );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, Py );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2SubW( b2MulW( rA.X, Py ), b2MulW( rA.Y, Px ) ) );
+
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, Px );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, Py );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2SubW( b2MulW( rB.X, Py ), b2MulW( rB.Y, Px ) ) );
+		}
+
+		// Rolling resistance
+		{
+			b2FloatW deltaLambda = b2MulW( c->rollingMass, b2SubW( bA.w, bB.w ) );
+			b2FloatW lambda = c->rollingImpulse;
+			b2FloatW maxLambda = b2MulW( c->rollingResistance, totalNormalImpulse );
+			c->rollingImpulse = b2SymClampW( b2AddW( lambda, deltaLambda ), maxLambda );
+			deltaLambda = b2SubW( c->rollingImpulse, lambda );
+
+			bA.w = b2MulSubW( bA.w, c->invIA, deltaLambda );
+			bB.w = b2MulAddW( bB.w, c->invIB, deltaLambda );
+		}
+
+		b2ScatterBodies( states, c->indexA, &bA );
+		b2ScatterBodies( states, c->indexB, &bB );
+	}
+
+	b2TracyCZoneEnd( solve_contact );
+}
+
+void b2ApplyRestitutionTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex )
+{
+	b2TracyCZoneNC( restitution, "Restitution", b2_colorDodgerBlue, true );
+
+	b2BodyState* states = context->states;
+	b2ContactConstraintSIMD* constraints = context->graph->colors[colorIndex].simdConstraints;
+	b2FloatW threshold = b2SplatW( context->world->restitutionThreshold );
+	b2FloatW zero = b2ZeroW();
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2ContactConstraintSIMD* c = constraints + i;
+
+		if ( b2AllZeroW( c->restitution ) )
+		{
+			// No lanes have restitution. Common case.
+			continue;
+		}
+
+		// Create a mask based on restitution so that lanes with no restitution are not affected
+		// by the calculations below.
+		b2FloatW restitutionMask = b2EqualsW( c->restitution, zero );
+
+		b2BodyStateW bA = b2GatherBodies( states, c->indexA );
+		b2BodyStateW bB = b2GatherBodies( states, c->indexB );
+
+		// first point non-penetration constraint
+		{
+			// Set effective mass to zero if restitution should not be applied
+			b2FloatW mask1 = b2GreaterThanW( b2AddW( c->relativeVelocity1, threshold ), zero );
+			b2FloatW mask2 = b2EqualsW( c->totalNormalImpulse1, zero );
+			b2FloatW mask = b2OrW( b2OrW( mask1, mask2 ), restitutionMask );
+			b2FloatW mass = b2BlendW( c->normalMass1, zero, mask );
+
+			// fixed anchors for Jacobians
+			b2Vec2W rA = c->anchorA1;
+			b2Vec2W rB = c->anchorB1;
+
+			// Relative velocity at contact
+			b2FloatW dvx = b2SubW( b2SubW( bB.v.X, b2MulW( bB.w, rB.Y ) ), b2SubW( bA.v.X, b2MulW( bA.w, rA.Y ) ) );
+			b2FloatW dvy = b2SubW( b2AddW( bB.v.Y, b2MulW( bB.w, rB.X ) ), b2AddW( bA.v.Y, b2MulW( bA.w, rA.X ) ) );
+			b2FloatW vn = b2AddW( b2MulW( dvx, c->normal.X ), b2MulW( dvy, c->normal.Y ) );
+
+			// Compute normal impulse
+			b2FloatW negImpulse = b2MulW( mass, b2AddW( vn, b2MulW( c->restitution, c->relativeVelocity1 ) ) );
+
+			// Clamp the accumulated impulse
+			b2FloatW newImpulse = b2MaxW( b2SubW( c->normalImpulse1, negImpulse ), b2ZeroW() );
+			b2FloatW impulse = b2SubW( newImpulse, c->normalImpulse1 );
+			c->normalImpulse1 = newImpulse;
+
+			// Apply contact impulse
+			b2FloatW Px = b2MulW( impulse, c->normal.X );
+			b2FloatW Py = b2MulW( impulse, c->normal.Y );
+
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, Px );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, Py );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2SubW( b2MulW( rA.X, Py ), b2MulW( rA.Y, Px ) ) );
+
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, Px );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, Py );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2SubW( b2MulW( rB.X, Py ), b2MulW( rB.Y, Px ) ) );
+		}
+
+		// second point non-penetration constraint
+		{
+			// Set effective mass to zero if restitution should not be applied
+			b2FloatW mask1 = b2GreaterThanW( b2AddW( c->relativeVelocity2, threshold ), zero );
+			b2FloatW mask2 = b2EqualsW( c->totalNormalImpulse2, zero );
+			b2FloatW mask = b2OrW( b2OrW( mask1, mask2 ), restitutionMask );
+			b2FloatW mass = b2BlendW( c->normalMass2, zero, mask );
+
+			// fixed anchors for Jacobians
+			b2Vec2W rA = c->anchorA2;
+			b2Vec2W rB = c->anchorB2;
+
+			// Relative velocity at contact
+			b2FloatW dvx = b2SubW( b2SubW( bB.v.X, b2MulW( bB.w, rB.Y ) ), b2SubW( bA.v.X, b2MulW( bA.w, rA.Y ) ) );
+			b2FloatW dvy = b2SubW( b2AddW( bB.v.Y, b2MulW( bB.w, rB.X ) ), b2AddW( bA.v.Y, b2MulW( bA.w, rA.X ) ) );
+			b2FloatW vn = b2AddW( b2MulW( dvx, c->normal.X ), b2MulW( dvy, c->normal.Y ) );
+
+			// Compute normal impulse
+			b2FloatW negImpulse = b2MulW( mass, b2AddW( vn, b2MulW( c->restitution, c->relativeVelocity2 ) ) );
+
+			// Clamp the accumulated impulse
+			b2FloatW newImpulse = b2MaxW( b2SubW( c->normalImpulse2, negImpulse ), b2ZeroW() );
+			b2FloatW impulse = b2SubW( newImpulse, c->normalImpulse2 );
+			c->normalImpulse2 = newImpulse;
+
+			// Apply contact impulse
+			b2FloatW Px = b2MulW( impulse, c->normal.X );
+			b2FloatW Py = b2MulW( impulse, c->normal.Y );
+
+			bA.v.X = b2MulSubW( bA.v.X, c->invMassA, Px );
+			bA.v.Y = b2MulSubW( bA.v.Y, c->invMassA, Py );
+			bA.w = b2MulSubW( bA.w, c->invIA, b2SubW( b2MulW( rA.X, Py ), b2MulW( rA.Y, Px ) ) );
+
+			bB.v.X = b2MulAddW( bB.v.X, c->invMassB, Px );
+			bB.v.Y = b2MulAddW( bB.v.Y, c->invMassB, Py );
+			bB.w = b2MulAddW( bB.w, c->invIB, b2SubW( b2MulW( rB.X, Py ), b2MulW( rB.Y, Px ) ) );
+		}
+
+		b2ScatterBodies( states, c->indexA, &bA );
+		b2ScatterBodies( states, c->indexB, &bB );
+	}
+
+	b2TracyCZoneEnd( restitution );
+}
+
+void b2StoreImpulsesTask( int startIndex, int endIndex, b2StepContext* context )
+{
+	b2TracyCZoneNC( store_impulses, "Store", b2_colorFireBrick, true );
+
+	b2ContactSim** contacts = context->contacts;
+	const b2ContactConstraintSIMD* constraints = context->simdContactConstraints;
+
+	b2Manifold dummy = { 0 };
+
+	for ( int constraintIndex = startIndex; constraintIndex < endIndex; ++constraintIndex )
+	{
+		const b2ContactConstraintSIMD* c = constraints + constraintIndex;
+		const float* rollingImpulse = (float*)&c->rollingImpulse;
+		const float* normalImpulse1 = (float*)&c->normalImpulse1;
+		const float* normalImpulse2 = (float*)&c->normalImpulse2;
+		const float* tangentImpulse1 = (float*)&c->tangentImpulse1;
+		const float* tangentImpulse2 = (float*)&c->tangentImpulse2;
+		const float* totalNormalImpulse1 = (float*)&c->totalNormalImpulse1;
+		const float* totalNormalImpulse2 = (float*)&c->totalNormalImpulse2;
+		const float* normalVelocity1 = (float*)&c->relativeVelocity1;
+		const float* normalVelocity2 = (float*)&c->relativeVelocity2;
+
+		int baseIndex = B2_SIMD_WIDTH * constraintIndex;
+
+		for ( int laneIndex = 0; laneIndex < B2_SIMD_WIDTH; ++laneIndex )
+		{
+			b2Manifold* m = contacts[baseIndex + laneIndex] == NULL ? &dummy : &contacts[baseIndex + laneIndex]->manifold;
+			m->rollingImpulse = rollingImpulse[laneIndex];
+
+			m->points[0].normalImpulse = normalImpulse1[laneIndex];
+			m->points[0].tangentImpulse = tangentImpulse1[laneIndex];
+			m->points[0].totalNormalImpulse = totalNormalImpulse1[laneIndex];
+			m->points[0].normalVelocity = normalVelocity1[laneIndex];
+
+			m->points[1].normalImpulse = normalImpulse2[laneIndex];
+			m->points[1].tangentImpulse = tangentImpulse2[laneIndex];
+			m->points[1].totalNormalImpulse = totalNormalImpulse2[laneIndex];
+			m->points[1].normalVelocity = normalVelocity2[laneIndex];
+		}
+	}
+
+	b2TracyCZoneEnd( store_impulses );
+}
diff --git a/src/vendor/box2d/contact_solver.h b/src/vendor/box2d/contact_solver.h
new file mode 100644
index 0000000..61e46c7
--- /dev/null
+++ b/src/vendor/box2d/contact_solver.h
@@ -0,0 +1,54 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "solver.h"
+
+typedef struct b2ContactSim b2ContactSim;
+
+typedef struct b2ContactConstraintPoint
+{
+	b2Vec2 anchorA, anchorB;
+	float baseSeparation;
+	float relativeVelocity;
+	float normalImpulse;
+	float tangentImpulse;
+	float totalNormalImpulse;
+	float normalMass;
+	float tangentMass;
+} b2ContactConstraintPoint;
+
+typedef struct b2ContactConstraint
+{
+	int indexA;
+	int indexB;
+	b2ContactConstraintPoint points[2];
+	b2Vec2 normal;
+	float invMassA, invMassB;
+	float invIA, invIB;
+	float friction;
+	float restitution;
+	float tangentSpeed;
+	float rollingResistance;
+	float rollingMass;
+	float rollingImpulse;
+	b2Softness softness;
+	int pointCount;
+} b2ContactConstraint;
+
+int b2GetContactConstraintSIMDByteCount( void );
+
+// Overflow contacts don't fit into the constraint graph coloring
+void b2PrepareOverflowContacts( b2StepContext* context );
+void b2WarmStartOverflowContacts( b2StepContext* context );
+void b2SolveOverflowContacts( b2StepContext* context, bool useBias );
+void b2ApplyOverflowRestitution( b2StepContext* context );
+void b2StoreOverflowImpulses( b2StepContext* context );
+
+// Contacts that live within the constraint graph coloring
+void b2PrepareContactsTask( int startIndex, int endIndex, b2StepContext* context );
+void b2WarmStartContactsTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex );
+void b2SolveContactsTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex, bool useBias );
+void b2ApplyRestitutionTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex );
+void b2StoreImpulsesTask( int startIndex, int endIndex, b2StepContext* context );
diff --git a/src/vendor/box2d/core.c b/src/vendor/box2d/core.c
new file mode 100644
index 0000000..1ecfbb1
--- /dev/null
+++ b/src/vendor/box2d/core.c
@@ -0,0 +1,178 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "core.h"
+
+#if defined( B2_COMPILER_MSVC )
+#define _CRTDBG_MAP_ALLOC
+#include <crtdbg.h>
+#include <stdlib.h>
+#else
+#include <stdlib.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#ifdef BOX2D_PROFILE
+
+#include <tracy/TracyC.h>
+#define b2TracyCAlloc( ptr, size ) TracyCAlloc( ptr, size )
+#define b2TracyCFree( ptr ) TracyCFree( ptr )
+
+#else
+
+#define b2TracyCAlloc( ptr, size )
+#define b2TracyCFree( ptr )
+
+#endif
+
+#include "atomic.h"
+
+// This allows the user to change the length units at runtime
+float b2_lengthUnitsPerMeter = 1.0f;
+
+void b2SetLengthUnitsPerMeter( float lengthUnits )
+{
+	B2_ASSERT( b2IsValidFloat( lengthUnits ) && lengthUnits > 0.0f );
+	b2_lengthUnitsPerMeter = lengthUnits;
+}
+
+float b2GetLengthUnitsPerMeter( void )
+{
+	return b2_lengthUnitsPerMeter;
+}
+
+static int b2DefaultAssertFcn( const char* condition, const char* fileName, int lineNumber )
+{
+	printf( "BOX2D ASSERTION: %s, %s, line %d\n", condition, fileName, lineNumber );
+
+	// return non-zero to break to debugger
+	return 1;
+}
+
+b2AssertFcn* b2AssertHandler = b2DefaultAssertFcn;
+
+void b2SetAssertFcn( b2AssertFcn* assertFcn )
+{
+	B2_ASSERT( assertFcn != NULL );
+	b2AssertHandler = assertFcn;
+}
+
+#if !defined( NDEBUG ) || defined( B2_ENABLE_ASSERT )
+int b2InternalAssertFcn( const char* condition, const char* fileName, int lineNumber )
+{
+	return b2AssertHandler( condition, fileName, lineNumber );
+}
+#endif
+
+b2Version b2GetVersion( void )
+{
+	return (b2Version){
+		.major = 3,
+		.minor = 1,
+		.revision = 0,
+	};
+}
+
+static b2AllocFcn* b2_allocFcn = NULL;
+static b2FreeFcn* b2_freeFcn = NULL;
+
+b2AtomicInt b2_byteCount;
+
+void b2SetAllocator( b2AllocFcn* allocFcn, b2FreeFcn* freeFcn )
+{
+	b2_allocFcn = allocFcn;
+	b2_freeFcn = freeFcn;
+}
+
+// Use 32 byte alignment for everything. Works with 256bit SIMD.
+#define B2_ALIGNMENT 32
+
+void* b2Alloc( int size )
+{
+	if ( size == 0 )
+	{
+		return NULL;
+	}
+
+	// This could cause some sharing issues, however Box2D rarely calls b2Alloc.
+	b2AtomicFetchAddInt( &b2_byteCount, size );
+
+	// Allocation must be a multiple of 32 or risk a seg fault
+	// https://en.cppreference.com/w/c/memory/aligned_alloc
+	int size32 = ( ( size - 1 ) | 0x1F ) + 1;
+
+	if ( b2_allocFcn != NULL )
+	{
+		void* ptr = b2_allocFcn( size32, B2_ALIGNMENT );
+		b2TracyCAlloc( ptr, size );
+
+		B2_ASSERT( ptr != NULL );
+		B2_ASSERT( ( (uintptr_t)ptr & 0x1F ) == 0 );
+
+		return ptr;
+	}
+
+#ifdef B2_PLATFORM_WINDOWS
+	void* ptr = _aligned_malloc( size32, B2_ALIGNMENT );
+#elif defined( B2_PLATFORM_ANDROID )
+	void* ptr = NULL;
+	if ( posix_memalign( &ptr, B2_ALIGNMENT, size32 ) != 0 )
+	{
+		// allocation failed, exit the application
+		exit( EXIT_FAILURE );
+	}
+#else
+	void* ptr = aligned_alloc( B2_ALIGNMENT, size32 );
+#endif
+
+	b2TracyCAlloc( ptr, size );
+
+	B2_ASSERT( ptr != NULL );
+	B2_ASSERT( ( (uintptr_t)ptr & 0x1F ) == 0 );
+
+	return ptr;
+}
+
+void b2Free( void* mem, int size )
+{
+	if ( mem == NULL )
+	{
+		return;
+	}
+
+	b2TracyCFree( mem );
+
+	if ( b2_freeFcn != NULL )
+	{
+		b2_freeFcn( mem );
+	}
+	else
+	{
+#ifdef B2_PLATFORM_WINDOWS
+		_aligned_free( mem );
+#else
+		free( mem );
+#endif
+	}
+
+	b2AtomicFetchAddInt( &b2_byteCount, -size );
+}
+
+void* b2GrowAlloc( void* oldMem, int oldSize, int newSize )
+{
+	B2_ASSERT( newSize > oldSize );
+	void* newMem = b2Alloc( newSize );
+	if ( oldSize > 0 )
+	{
+		memcpy( newMem, oldMem, oldSize );
+		b2Free( oldMem, oldSize );
+	}
+	return newMem;
+}
+
+int b2GetByteCount( void )
+{
+	return b2AtomicLoadInt( &b2_byteCount );
+}
diff --git a/src/vendor/box2d/core.h b/src/vendor/box2d/core.h
new file mode 100644
index 0000000..9b2233e
--- /dev/null
+++ b/src/vendor/box2d/core.h
@@ -0,0 +1,143 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "box2d/math_functions.h"
+
+// clang-format off
+
+#define B2_NULL_INDEX ( -1 )
+
+// for performance comparisons
+#define B2_RESTRICT restrict
+
+#ifdef NDEBUG
+	#define B2_DEBUG 0
+#else
+	#define B2_DEBUG 1
+#endif
+
+#if defined( BOX2D_VALIDATE ) && !defined( NDEBUG )
+	#define B2_VALIDATE 1
+#else
+	#define B2_VALIDATE 0
+#endif
+
+// Define platform
+#if defined(_WIN32) || defined(_WIN64)
+	#define B2_PLATFORM_WINDOWS
+#elif defined( __ANDROID__ )
+	#define B2_PLATFORM_ANDROID
+#elif defined( __linux__ )
+	#define B2_PLATFORM_LINUX
+#elif defined( __APPLE__ )
+	#include <TargetConditionals.h>
+	#if defined( TARGET_OS_IPHONE ) && !TARGET_OS_IPHONE
+		#define B2_PLATFORM_MACOS
+	#else
+		#define B2_PLATFORM_IOS
+	#endif
+#elif defined( __EMSCRIPTEN__ )
+	#define B2_PLATFORM_WASM
+#else
+	#define B2_PLATFORM_UNKNOWN
+#endif
+
+// Define CPU
+#if defined( __x86_64__ ) || defined( _M_X64 ) || defined( __i386__ ) || defined( _M_IX86 )
+	#define B2_CPU_X86_X64
+#elif defined( __aarch64__ ) || defined( _M_ARM64 ) || defined( __arm__ ) || defined( _M_ARM )
+	#define B2_CPU_ARM
+#elif defined( __EMSCRIPTEN__ )
+	#define B2_CPU_WASM
+#else
+	#define B2_CPU_UNKNOWN
+#endif
+
+// Define SIMD
+#if defined( BOX2D_ENABLE_SIMD )
+	#if defined( B2_CPU_X86_X64 )
+		#if defined( BOX2D_AVX2 )
+			#define B2_SIMD_AVX2
+			#define B2_SIMD_WIDTH 8
+		#else
+			#define B2_SIMD_SSE2
+			#define B2_SIMD_WIDTH 4
+		#endif
+	#elif defined( B2_CPU_ARM )
+		#define B2_SIMD_NEON
+		#define B2_SIMD_WIDTH 4
+	#elif defined( B2_CPU_WASM )
+		#define B2_CPU_WASM
+		#define B2_SIMD_SSE2
+		#define B2_SIMD_WIDTH 4
+	#else
+		#define B2_SIMD_NONE
+		#define B2_SIMD_WIDTH 4
+	#endif
+#else
+	#define B2_SIMD_NONE
+	// note: I tried width of 1 and got no performance change
+	#define B2_SIMD_WIDTH 4
+#endif
+
+// Define compiler
+#if defined( __clang__ )
+	#define B2_COMPILER_CLANG
+#elif defined( __GNUC__ )
+	#define B2_COMPILER_GCC
+#elif defined( _MSC_VER )
+	#define B2_COMPILER_MSVC
+#endif
+
+/// Tracy profiler instrumentation
+/// https://github.com/wolfpld/tracy
+#ifdef BOX2D_PROFILE
+	#include <tracy/TracyC.h>
+	#define b2TracyCZoneC( ctx, color, active ) TracyCZoneC( ctx, color, active )
+	#define b2TracyCZoneNC( ctx, name, color, active ) TracyCZoneNC( ctx, name, color, active )
+	#define b2TracyCZoneEnd( ctx ) TracyCZoneEnd( ctx )
+#else
+	#define b2TracyCZoneC( ctx, color, active )
+	#define b2TracyCZoneNC( ctx, name, color, active )
+	#define b2TracyCZoneEnd( ctx )
+#endif
+
+// clang-format on
+
+// Returns the number of elements of an array
+#define B2_ARRAY_COUNT( A ) (int)( sizeof( A ) / sizeof( A[0] ) )
+
+// Used to prevent the compiler from warning about unused variables
+#define B2_UNUSED( ... ) (void)sizeof( ( __VA_ARGS__, 0 ) )
+
+// Use to validate definitions. Do not take my cookie.
+#define B2_SECRET_COOKIE 1152023
+
+// Snoop counters. These should be disabled in optimized builds because they are expensive.
+#define B2_SNOOP_TABLE_COUNTERS B2_DEBUG
+#define B2_SNOOP_PAIR_COUNTERS B2_DEBUG
+#define B2_SNOOP_TOI_COUNTERS B2_DEBUG
+
+#define B2_CHECK_DEF( DEF ) B2_ASSERT( DEF->internalValue == B2_SECRET_COOKIE )
+
+typedef struct b2AtomicInt
+{
+	int value;
+} b2AtomicInt;
+
+typedef struct b2AtomicU32
+{
+	uint32_t value;
+} b2AtomicU32;
+
+void* b2Alloc( int size );
+#define B2_ALLOC_STRUCT( type ) b2Alloc(sizeof(type))
+#define B2_ALLOC_ARRAY( count, type ) b2Alloc(count * sizeof(type))
+
+void b2Free( void* mem, int size );
+#define B2_FREE_STRUCT( mem, type ) b2Free( mem, sizeof(type));
+#define B2_FREE_ARRAY( mem, count, type ) b2Free(mem, count * sizeof(type))
+
+void* b2GrowAlloc( void* oldMem, int oldSize, int newSize );
diff --git a/src/vendor/box2d/ctz.h b/src/vendor/box2d/ctz.h
new file mode 100644
index 0000000..9959527
--- /dev/null
+++ b/src/vendor/box2d/ctz.h
@@ -0,0 +1,112 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#if defined( _MSC_VER ) && !defined( __clang__ )
+	#include <intrin.h>
+
+// https://en.wikipedia.org/wiki/Find_first_set
+
+static inline uint32_t b2CTZ32( uint32_t block )
+{
+	unsigned long index;
+	_BitScanForward( &index, block );
+	return index;
+}
+
+// This function doesn't need to be fast, so using the Ivy Bridge fallback.
+static inline uint32_t b2CLZ32( uint32_t value )
+{
+	#if 1
+
+	// Use BSR (Bit Scan Reverse) which is available on Ivy Bridge
+	unsigned long index;
+	if ( _BitScanReverse( &index, value ) )
+	{
+		// BSR gives the index of the most significant 1-bit
+		// We need to invert this to get the number of leading zeros
+		return 31 - index;
+	}
+	else
+	{
+		// If x is 0, BSR sets the zero flag and doesn't modify index
+		// LZCNT should return 32 for an input of 0
+		return 32;
+	}
+
+	#else
+
+	return __lzcnt( value );
+
+	#endif
+}
+
+static inline uint32_t b2CTZ64( uint64_t block )
+{
+	unsigned long index;
+
+	#ifdef _WIN64
+	_BitScanForward64( &index, block );
+	#else
+	// 32-bit fall back
+	if ( (uint32_t)block != 0 )
+	{
+		_BitScanForward( &index, (uint32_t)block );
+	}
+	else
+	{
+		_BitScanForward( &index, (uint32_t)( block >> 32 ) );
+		index += 32;
+	}
+	#endif
+
+	return index;
+}
+
+#else
+
+static inline uint32_t b2CTZ32( uint32_t block )
+{
+	return __builtin_ctz( block );
+}
+
+static inline uint32_t b2CLZ32( uint32_t value )
+{
+	return __builtin_clz( value );
+}
+
+static inline uint32_t b2CTZ64( uint64_t block )
+{
+	return __builtin_ctzll( block );
+}
+
+#endif
+
+static inline bool b2IsPowerOf2( int x )
+{
+	return ( x & ( x - 1 ) ) == 0;
+}
+
+static inline int b2BoundingPowerOf2( int x )
+{
+	if ( x <= 1 )
+	{
+		return 1;
+	}
+
+	return 32 - (int)b2CLZ32( (uint32_t)x - 1 );
+}
+
+static inline int b2RoundUpPowerOf2( int x )
+{
+	if ( x <= 1 )
+	{
+		return 1;
+	}
+
+	return 1 << ( 32 - (int)b2CLZ32( (uint32_t)x - 1 ) );
+}
diff --git a/src/vendor/box2d/distance.c b/src/vendor/box2d/distance.c
new file mode 100644
index 0000000..415ca80
--- /dev/null
+++ b/src/vendor/box2d/distance.c
@@ -0,0 +1,1415 @@
+
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "constants.h"
+#include "core.h"
+
+#include "box2d/collision.h"
+#include "box2d/math_functions.h"
+
+#include <float.h>
+#include <stddef.h>
+
+b2Transform b2GetSweepTransform( const b2Sweep* sweep, float time )
+{
+	// https://fgiesen.wordpress.com/2012/08/15/linear-interpolation-past-present-and-future/
+	b2Transform xf;
+	xf.p = b2Add( b2MulSV( 1.0f - time, sweep->c1 ), b2MulSV( time, sweep->c2 ) );
+
+	b2Rot q = {
+		( 1.0f - time ) * sweep->q1.c + time * sweep->q2.c,
+		( 1.0f - time ) * sweep->q1.s + time * sweep->q2.s,
+	};
+
+	xf.q = b2NormalizeRot( q );
+
+	// Shift to origin
+	xf.p = b2Sub( xf.p, b2RotateVector( xf.q, sweep->localCenter ) );
+	return xf;
+}
+
+/// Follows Ericson 5.1.9 Closest Points of Two Line Segments
+b2SegmentDistanceResult b2SegmentDistance( b2Vec2 p1, b2Vec2 q1, b2Vec2 p2, b2Vec2 q2 )
+{
+	b2SegmentDistanceResult result = { 0 };
+
+	b2Vec2 d1 = b2Sub( q1, p1 );
+	b2Vec2 d2 = b2Sub( q2, p2 );
+	b2Vec2 r = b2Sub( p1, p2 );
+	float dd1 = b2Dot( d1, d1 );
+	float dd2 = b2Dot( d2, d2 );
+	float rd1 = b2Dot( r, d1 );
+	float rd2 = b2Dot( r, d2 );
+
+	const float epsSqr = FLT_EPSILON * FLT_EPSILON;
+
+	if ( dd1 < epsSqr || dd2 < epsSqr )
+	{
+		// Handle all degeneracies
+		if ( dd1 >= epsSqr )
+		{
+			// Segment 2 is degenerate
+			result.fraction1 = b2ClampFloat( -rd1 / dd1, 0.0f, 1.0f );
+			result.fraction2 = 0.0f;
+		}
+		else if ( dd2 >= epsSqr )
+		{
+			// Segment 1 is degenerate
+			result.fraction1 = 0.0f;
+			result.fraction2 = b2ClampFloat( rd2 / dd2, 0.0f, 1.0f );
+		}
+		else
+		{
+			result.fraction1 = 0.0f;
+			result.fraction2 = 0.0f;
+		}
+	}
+	else
+	{
+		// Non-degenerate segments
+		float d12 = b2Dot( d1, d2 );
+
+		float denominator = dd1 * dd2 - d12 * d12;
+
+		// Fraction on segment 1
+		float f1 = 0.0f;
+		if ( denominator != 0.0f )
+		{
+			// not parallel
+			f1 = b2ClampFloat( ( d12 * rd2 - rd1 * dd2 ) / denominator, 0.0f, 1.0f );
+		}
+
+		// Compute point on segment 2 closest to p1 + f1 * d1
+		float f2 = ( d12 * f1 + rd2 ) / dd2;
+
+		// Clamping of segment 2 requires a do over on segment 1
+		if ( f2 < 0.0f )
+		{
+			f2 = 0.0f;
+			f1 = b2ClampFloat( -rd1 / dd1, 0.0f, 1.0f );
+		}
+		else if ( f2 > 1.0f )
+		{
+			f2 = 1.0f;
+			f1 = b2ClampFloat( ( d12 - rd1 ) / dd1, 0.0f, 1.0f );
+		}
+
+		result.fraction1 = f1;
+		result.fraction2 = f2;
+	}
+
+	result.closest1 = b2MulAdd( p1, result.fraction1, d1 );
+	result.closest2 = b2MulAdd( p2, result.fraction2, d2 );
+	result.distanceSquared = b2DistanceSquared( result.closest1, result.closest2 );
+	return result;
+}
+
+b2ShapeProxy b2MakeProxy( const b2Vec2* points, int count, float radius )
+{
+	count = b2MinInt( count, B2_MAX_POLYGON_VERTICES );
+	b2ShapeProxy proxy;
+	for ( int i = 0; i < count; ++i )
+	{
+		proxy.points[i] = points[i];
+	}
+	proxy.count = count;
+	proxy.radius = radius;
+	return proxy;
+}
+
+b2ShapeProxy b2MakeOffsetProxy( const b2Vec2* points, int count, float radius, b2Vec2 position, b2Rot rotation )
+{
+	count = b2MinInt( count, B2_MAX_POLYGON_VERTICES );
+	b2Transform transform = {
+		.p = position,
+		.q = rotation,
+	};
+	b2ShapeProxy proxy;
+	for ( int i = 0; i < count; ++i )
+	{
+		proxy.points[i] = b2TransformPoint( transform, points[i] );
+	}
+	proxy.count = count;
+	proxy.radius = radius;
+	return proxy;
+}
+
+static inline b2Vec2 b2Weight2( float a1, b2Vec2 w1, float a2, b2Vec2 w2 )
+{
+	return (b2Vec2){ a1 * w1.x + a2 * w2.x, a1 * w1.y + a2 * w2.y };
+}
+
+static inline b2Vec2 b2Weight3( float a1, b2Vec2 w1, float a2, b2Vec2 w2, float a3, b2Vec2 w3 )
+{
+	return (b2Vec2){ a1 * w1.x + a2 * w2.x + a3 * w3.x, a1 * w1.y + a2 * w2.y + a3 * w3.y };
+}
+
+static inline int b2FindSupport( const b2ShapeProxy* proxy, b2Vec2 direction )
+{
+	const b2Vec2* points = proxy->points;
+	int count = proxy->count;
+
+	int bestIndex = 0;
+	float bestValue = b2Dot( points[0], direction );
+	for ( int i = 1; i < count; ++i )
+	{
+		float value = b2Dot( points[i], direction );
+		if ( value > bestValue )
+		{
+			bestIndex = i;
+			bestValue = value;
+		}
+	}
+
+	return bestIndex;
+}
+
+static b2Simplex b2MakeSimplexFromCache( const b2SimplexCache* cache, const b2ShapeProxy* proxyA, const b2ShapeProxy* proxyB )
+{
+	B2_ASSERT( cache->count <= 3 );
+	b2Simplex s;
+
+	// Copy data from cache.
+	s.count = cache->count;
+
+	b2SimplexVertex* vertices[] = { &s.v1, &s.v2, &s.v3 };
+	for ( int i = 0; i < s.count; ++i )
+	{
+		b2SimplexVertex* v = vertices[i];
+		v->indexA = cache->indexA[i];
+		v->indexB = cache->indexB[i];
+		v->wA = proxyA->points[v->indexA];
+		v->wB = proxyB->points[v->indexB];
+		v->w = b2Sub( v->wA, v->wB );
+
+		// invalid
+		v->a = -1.0f;
+	}
+
+	// If the cache is empty or invalid ...
+	if ( s.count == 0 )
+	{
+		b2SimplexVertex* v = vertices[0];
+		v->indexA = 0;
+		v->indexB = 0;
+		v->wA = proxyA->points[0];
+		v->wB = proxyB->points[0];
+		v->w = b2Sub( v->wA, v->wB );
+		v->a = 1.0f;
+		s.count = 1;
+	}
+
+	return s;
+}
+
+static void b2MakeSimplexCache( b2SimplexCache* cache, const b2Simplex* simplex )
+{
+	cache->count = (uint16_t)simplex->count;
+	const b2SimplexVertex* vertices[] = { &simplex->v1, &simplex->v2, &simplex->v3 };
+	for ( int i = 0; i < simplex->count; ++i )
+	{
+		cache->indexA[i] = (uint8_t)vertices[i]->indexA;
+		cache->indexB[i] = (uint8_t)vertices[i]->indexB;
+	}
+}
+
+static void b2ComputeSimplexWitnessPoints( b2Vec2* a, b2Vec2* b, const b2Simplex* s )
+{
+	switch ( s->count )
+	{
+		case 0:
+			B2_ASSERT( false );
+			break;
+
+		case 1:
+			*a = s->v1.wA;
+			*b = s->v1.wB;
+			break;
+
+		case 2:
+			*a = b2Weight2( s->v1.a, s->v1.wA, s->v2.a, s->v2.wA );
+			*b = b2Weight2( s->v1.a, s->v1.wB, s->v2.a, s->v2.wB );
+			break;
+
+		case 3:
+			*a = b2Weight3( s->v1.a, s->v1.wA, s->v2.a, s->v2.wA, s->v3.a, s->v3.wA );
+			// todo why are these not equal?
+			//*b = b2Weight3(s->v1.a, s->v1.wB, s->v2.a, s->v2.wB, s->v3.a, s->v3.wB);
+			*b = *a;
+			break;
+
+		default:
+			B2_ASSERT( false );
+			break;
+	}
+}
+
+// Solve a line segment using barycentric coordinates.
+//
+// p = a1 * w1 + a2 * w2
+// a1 + a2 = 1
+//
+// The vector from the origin to the closest point on the line is
+// perpendicular to the line.
+// e12 = w2 - w1
+// dot(p, e) = 0
+// a1 * dot(w1, e) + a2 * dot(w2, e) = 0
+//
+// 2-by-2 linear system
+// [1      1     ][a1] = [1]
+// [w1.e12 w2.e12][a2] = [0]
+//
+// Define
+// d12_1 =  dot(w2, e12)
+// d12_2 = -dot(w1, e12)
+// d12 = d12_1 + d12_2
+//
+// Solution
+// a1 = d12_1 / d12
+// a2 = d12_2 / d12
+//
+// returns a vector that points towards the origin
+static b2Vec2 b2SolveSimplex2( b2Simplex* s )
+{
+	b2Vec2 w1 = s->v1.w;
+	b2Vec2 w2 = s->v2.w;
+	b2Vec2 e12 = b2Sub( w2, w1 );
+
+	// w1 region
+	float d12_2 = -b2Dot( w1, e12 );
+	if ( d12_2 <= 0.0f )
+	{
+		// a2 <= 0, so we clamp it to 0
+		s->v1.a = 1.0f;
+		s->count = 1;
+		return b2Neg( w1 );
+	}
+
+	// w2 region
+	float d12_1 = b2Dot( w2, e12 );
+	if ( d12_1 <= 0.0f )
+	{
+		// a1 <= 0, so we clamp it to 0
+		s->v2.a = 1.0f;
+		s->count = 1;
+		s->v1 = s->v2;
+		return b2Neg( w2 );
+	}
+
+	// Must be in e12 region.
+	float inv_d12 = 1.0f / ( d12_1 + d12_2 );
+	s->v1.a = d12_1 * inv_d12;
+	s->v2.a = d12_2 * inv_d12;
+	s->count = 2;
+	return b2CrossSV( b2Cross( b2Add( w1, w2 ), e12 ), e12 );
+}
+
+static b2Vec2 b2SolveSimplex3( b2Simplex* s )
+{
+	b2Vec2 w1 = s->v1.w;
+	b2Vec2 w2 = s->v2.w;
+	b2Vec2 w3 = s->v3.w;
+
+	// Edge12
+	// [1      1     ][a1] = [1]
+	// [w1.e12 w2.e12][a2] = [0]
+	// a3 = 0
+	b2Vec2 e12 = b2Sub( w2, w1 );
+	float w1e12 = b2Dot( w1, e12 );
+	float w2e12 = b2Dot( w2, e12 );
+	float d12_1 = w2e12;
+	float d12_2 = -w1e12;
+
+	// Edge13
+	// [1      1     ][a1] = [1]
+	// [w1.e13 w3.e13][a3] = [0]
+	// a2 = 0
+	b2Vec2 e13 = b2Sub( w3, w1 );
+	float w1e13 = b2Dot( w1, e13 );
+	float w3e13 = b2Dot( w3, e13 );
+	float d13_1 = w3e13;
+	float d13_2 = -w1e13;
+
+	// Edge23
+	// [1      1     ][a2] = [1]
+	// [w2.e23 w3.e23][a3] = [0]
+	// a1 = 0
+	b2Vec2 e23 = b2Sub( w3, w2 );
+	float w2e23 = b2Dot( w2, e23 );
+	float w3e23 = b2Dot( w3, e23 );
+	float d23_1 = w3e23;
+	float d23_2 = -w2e23;
+
+	// Triangle123
+	float n123 = b2Cross( e12, e13 );
+
+	float d123_1 = n123 * b2Cross( w2, w3 );
+	float d123_2 = n123 * b2Cross( w3, w1 );
+	float d123_3 = n123 * b2Cross( w1, w2 );
+
+	// w1 region
+	if ( d12_2 <= 0.0f && d13_2 <= 0.0f )
+	{
+		s->v1.a = 1.0f;
+		s->count = 1;
+		return b2Neg( w1 );
+	}
+
+	// e12
+	if ( d12_1 > 0.0f && d12_2 > 0.0f && d123_3 <= 0.0f )
+	{
+		float inv_d12 = 1.0f / ( d12_1 + d12_2 );
+		s->v1.a = d12_1 * inv_d12;
+		s->v2.a = d12_2 * inv_d12;
+		s->count = 2;
+		return b2CrossSV( b2Cross( b2Add( w1, w2 ), e12 ), e12 );
+	}
+
+	// e13
+	if ( d13_1 > 0.0f && d13_2 > 0.0f && d123_2 <= 0.0f )
+	{
+		float inv_d13 = 1.0f / ( d13_1 + d13_2 );
+		s->v1.a = d13_1 * inv_d13;
+		s->v3.a = d13_2 * inv_d13;
+		s->count = 2;
+		s->v2 = s->v3;
+		return b2CrossSV( b2Cross( b2Add( w1, w3 ), e13 ), e13 );
+	}
+
+	// w2 region
+	if ( d12_1 <= 0.0f && d23_2 <= 0.0f )
+	{
+		s->v2.a = 1.0f;
+		s->count = 1;
+		s->v1 = s->v2;
+		return b2Neg( w2 );
+	}
+
+	// w3 region
+	if ( d13_1 <= 0.0f && d23_1 <= 0.0f )
+	{
+		s->v3.a = 1.0f;
+		s->count = 1;
+		s->v1 = s->v3;
+		return b2Neg( w3 );
+	}
+
+	// e23
+	if ( d23_1 > 0.0f && d23_2 > 0.0f && d123_1 <= 0.0f )
+	{
+		float inv_d23 = 1.0f / ( d23_1 + d23_2 );
+		s->v2.a = d23_1 * inv_d23;
+		s->v3.a = d23_2 * inv_d23;
+		s->count = 2;
+		s->v1 = s->v3;
+		return b2CrossSV( b2Cross( b2Add( w2, w3 ), e23 ), e23 );
+	}
+
+	// Must be in triangle123
+	float inv_d123 = 1.0f / ( d123_1 + d123_2 + d123_3 );
+	s->v1.a = d123_1 * inv_d123;
+	s->v2.a = d123_2 * inv_d123;
+	s->v3.a = d123_3 * inv_d123;
+	s->count = 3;
+
+	// No search direction
+	return b2Vec2_zero;
+}
+
+// Uses GJK for computing the distance between convex shapes.
+// https://box2d.org/files/ErinCatto_GJK_GDC2010.pdf
+// I spent time optimizing this and could find no further significant gains 3/30/2025
+b2DistanceOutput b2ShapeDistance( const b2DistanceInput* input, b2SimplexCache* cache, b2Simplex* simplexes, int simplexCapacity )
+{
+	B2_UNUSED( simplexes, simplexCapacity );
+	B2_ASSERT( input->proxyA.count > 0 && input->proxyB.count > 0 );
+	B2_ASSERT( input->proxyA.radius >= 0.0f );
+	B2_ASSERT( input->proxyB.radius >= 0.0f );
+
+	b2DistanceOutput output = { 0 };
+
+	const b2ShapeProxy* proxyA = &input->proxyA;
+
+	// Get proxyB in frame A to avoid further transforms in the main loop.
+	// This is still a performance gain at 8 points.
+	b2ShapeProxy localProxyB;
+	{
+		b2Transform transform = b2InvMulTransforms( input->transformA, input->transformB );
+		localProxyB.count = input->proxyB.count;
+		localProxyB.radius = input->proxyB.radius;
+		for ( int i = 0; i < localProxyB.count; ++i )
+		{
+			localProxyB.points[i] = b2TransformPoint( transform, input->proxyB.points[i] );
+		}
+	}
+
+	// Initialize the simplex.
+	b2Simplex simplex = b2MakeSimplexFromCache( cache, proxyA, &localProxyB );
+
+	int simplexIndex = 0;
+	if ( simplexes != NULL && simplexIndex < simplexCapacity )
+	{
+		simplexes[simplexIndex] = simplex;
+		simplexIndex += 1;
+	}
+
+	// Get simplex vertices as an array.
+	b2SimplexVertex* vertices[] = { &simplex.v1, &simplex.v2, &simplex.v3 };
+
+	b2Vec2 nonUnitNormal = b2Vec2_zero;
+
+	// These store the vertices of the last simplex so that we can check for duplicates and prevent cycling.
+	int saveA[3], saveB[3];
+
+	// Main iteration loop. All computations are done in frame A.
+	const int maxIterations = 20;
+	int iteration = 0;
+	while ( iteration < maxIterations )
+	{
+		// Copy simplex so we can identify duplicates.
+		int saveCount = simplex.count;
+		for ( int i = 0; i < saveCount; ++i )
+		{
+			saveA[i] = vertices[i]->indexA;
+			saveB[i] = vertices[i]->indexB;
+		}
+
+		b2Vec2 d = { 0 };
+		switch ( simplex.count )
+		{
+			case 1:
+				d = b2Neg( simplex.v1.w );
+				break;
+
+			case 2:
+				d = b2SolveSimplex2( &simplex );
+				break;
+
+			case 3:
+				d = b2SolveSimplex3( &simplex );
+				break;
+
+			default:
+				B2_ASSERT( false );
+		}
+
+		// If we have 3 points, then the origin is in the corresponding triangle.
+		if ( simplex.count == 3 )
+		{
+			break;
+		}
+
+#ifndef NDEBUG
+		if ( simplexes != NULL && simplexIndex < simplexCapacity )
+		{
+			simplexes[simplexIndex] = simplex;
+			simplexIndex += 1;
+		}
+#endif
+
+		// Save the normal
+		nonUnitNormal = d;
+
+		// Ensure the search direction is numerically fit.
+		if ( b2Dot( d, d ) < FLT_EPSILON * FLT_EPSILON )
+		{
+			// This is unlikely but could lead to bad cycling.
+			// The branch predictor seems to make this check have low cost.
+
+			// The origin is probably contained by a line segment
+			// or triangle. Thus the shapes are overlapped.
+
+			// We can't return zero here even though there may be overlap.
+			// In case the simplex is a point, segment, or triangle it is difficult
+			// to determine if the origin is contained in the CSO or very close to it.
+			break;
+		}
+
+		// Compute a tentative new simplex vertex using support points.
+		// support = support(a, d) - support(b, -d)
+		b2SimplexVertex* vertex = vertices[simplex.count];
+		vertex->indexA = b2FindSupport( proxyA, d );
+		vertex->wA = proxyA->points[vertex->indexA];
+		vertex->indexB = b2FindSupport( &localProxyB, b2Neg( d ) );
+		vertex->wB = localProxyB.points[vertex->indexB];
+		vertex->w = b2Sub( vertex->wA, vertex->wB );
+
+		// Iteration count is equated to the number of support point calls.
+		++iteration;
+
+		// Check for duplicate support points. This is the main termination criteria.
+		bool duplicate = false;
+		for ( int i = 0; i < saveCount; ++i )
+		{
+			if ( vertex->indexA == saveA[i] && vertex->indexB == saveB[i] )
+			{
+				duplicate = true;
+				break;
+			}
+		}
+
+		// If we found a duplicate support point we must exit to avoid cycling.
+		if ( duplicate )
+		{
+			break;
+		}
+
+		// New vertex is valid and needed.
+		simplex.count += 1;
+	}
+
+#ifndef NDEBUG
+	if ( simplexes != NULL && simplexIndex < simplexCapacity )
+	{
+		simplexes[simplexIndex] = simplex;
+		simplexIndex += 1;
+	}
+#endif
+
+	// Prepare output
+	b2Vec2 normal = b2Normalize( nonUnitNormal );
+	normal = b2RotateVector( input->transformA.q, normal );
+
+	b2Vec2 localPointA, localPointB;
+	b2ComputeSimplexWitnessPoints( &localPointA, &localPointB, &simplex );
+	output.normal = normal;
+	output.distance = b2Distance( localPointA, localPointB );
+	output.pointA = b2TransformPoint( input->transformA, localPointA );
+	output.pointB = b2TransformPoint( input->transformA, localPointB );
+	output.iterations = iteration;
+	output.simplexCount = simplexIndex;
+
+	// Cache the simplex
+	b2MakeSimplexCache( cache, &simplex );
+
+	// Apply radii if requested
+	if ( input->useRadii && output.distance > 0.1f * B2_LINEAR_SLOP )
+	{
+		float radiusA = input->proxyA.radius;
+		float radiusB = input->proxyB.radius;
+		output.distance = b2MaxFloat( 0.0f, output.distance - radiusA - radiusB );
+
+		// Keep closest points on perimeter even if overlapped, this way the points move smoothly.
+		output.pointA = b2MulAdd( output.pointA, radiusA, normal );
+		output.pointB = b2MulSub( output.pointB, radiusB, normal );
+	}
+
+	return output;
+}
+
+// Shape cast using conservative advancement
+b2CastOutput b2ShapeCast( const b2ShapeCastPairInput* input )
+{
+	// Compute tolerance
+	float linearSlop = B2_LINEAR_SLOP;
+	float totalRadius = input->proxyA.radius + input->proxyB.radius;
+	float target = b2MaxFloat( linearSlop, totalRadius - linearSlop );
+	float tolerance = 0.25f * linearSlop;
+
+	B2_ASSERT( target > tolerance );
+
+	// Prepare input for distance query
+	b2SimplexCache cache = { 0 };
+
+	float alpha = 0.0f;
+
+	b2DistanceInput distanceInput = { 0 };
+	distanceInput.proxyA = input->proxyA;
+	distanceInput.proxyB = input->proxyB;
+	distanceInput.transformA = input->transformA;
+	distanceInput.transformB = input->transformB;
+	distanceInput.useRadii = false;
+
+	b2Vec2 delta2 = input->translationB;
+	b2CastOutput output = { 0 };
+
+	int iteration = 0;
+	int maxIterations = 20;
+	for ( ; iteration < maxIterations; ++iteration )
+	{
+		output.iterations += 1;
+
+		b2DistanceOutput distanceOutput = b2ShapeDistance( &distanceInput, &cache, NULL, 0 );
+
+		if ( distanceOutput.distance < target + tolerance )
+		{
+			if ( iteration == 0 )
+			{
+				if ( input->canEncroach && distanceOutput.distance > 2.0f * linearSlop )
+				{
+					target = distanceOutput.distance - linearSlop;
+				}
+				else
+				{
+					if ( distanceOutput.distance == 0.0f )
+					{
+						// Normal may be invalid
+						return output;
+					}
+
+					// Initial overlap but distance is non-zero due to radius
+					B2_ASSERT( b2IsNormalized( distanceOutput.normal ) );
+					output.fraction = alpha;
+					output.point = b2MulAdd( distanceOutput.pointA, input->proxyA.radius, distanceOutput.normal );
+					output.normal = distanceOutput.normal;
+					output.hit = true;
+					return output;
+				}
+			}
+			else
+			{
+				// Regular hit
+				B2_ASSERT( distanceOutput.distance > 0.0f && b2IsNormalized( distanceOutput.normal ) );
+				output.fraction = alpha;
+				output.point = b2MulAdd( distanceOutput.pointA, input->proxyA.radius, distanceOutput.normal );
+				output.normal = distanceOutput.normal;
+				output.hit = true;
+				return output;
+			}
+		}
+
+		B2_ASSERT( distanceOutput.distance > 0.0f );
+		B2_ASSERT( b2IsNormalized( distanceOutput.normal ) );
+
+		// Check if shapes are approaching each other
+		float denominator = b2Dot( delta2, distanceOutput.normal );
+		if ( denominator >= 0.0f )
+		{
+			// Miss
+			output.fraction = 1.0f;
+			return output;
+		}
+
+		// Advance sweep
+		alpha += ( target - distanceOutput.distance ) / denominator;
+		if ( alpha >= input->maxFraction )
+		{
+			// Miss
+			output.fraction = 1.0f;
+			return output;
+		}
+
+		distanceInput.transformB.p = b2MulAdd( input->transformB.p, alpha, delta2 );
+	}
+
+	// Failure!
+	return output;
+}
+
+#if 0
+static inline b2Vec2 b2ComputeSimplexClosestPoint( const b2Simplex* s )
+{
+	if ( s->count == 1 )
+	{
+		return s->v1.w;
+	}
+
+	if ( s->count == 2 )
+	{
+		return b2Weight2( s->v1.a, s->v1.w, s->v2.a, s->v2.w );
+	}
+
+	return b2Vec2_zero;
+}
+
+typedef struct b2ShapeCastData
+{
+	b2Simplex simplex;
+	b2Vec2 closestA, closestB;
+	b2Vec2 normal;
+	b2Vec2 p0;
+	float fraction;
+} b2ShapeCastData;
+
+// GJK-raycast
+// Algorithm by Gino van den Bergen.
+// "Smooth Mesh Contacts with GJK" in Game Physics Pearls. 2010
+// This needs the simplex of A - B because the translation is for B and this
+// is how the relative motion works out when both shapes are translating.
+// This is similar to ray vs polygon and involves plane clipping. See b2RayCastPolygon.
+// In this case the polygon is just points and there are no planes. This uses a modified
+// version of GJK to generate planes for clipping.
+// The algorithm works by incrementally building clipping planes using GJK. Once a valid
+// clip plane is found the simplex origin is moved to the current fraction on the ray.
+// This resets the simplex after every clip. Later I should compare performance.
+// However, adapting this to work with encroachment is tricky and confusing because encroachment
+// needs distance.
+// Note: this algorithm is difficult to debug and not worth the effort in my opinion 4/1/2025
+b2CastOutput b2ShapeCastMerged( const b2ShapeCastPairInput* input, b2ShapeCastData* debugData, int debugCapacity )
+{
+	B2_UNUSED( debugData, debugCapacity );
+
+	b2CastOutput output = { 0 };
+	output.fraction = input->maxFraction;
+
+	b2ShapeProxy proxyA = input->proxyA;
+
+	b2Transform xf = b2InvMulTransforms( input->transformA, input->transformB );
+
+	// Put proxyB in proxyA's frame to reduce round-off error
+	b2ShapeProxy proxyB;
+	proxyB.count = input->proxyB.count;
+	proxyB.radius = input->proxyB.radius;
+	B2_ASSERT( proxyB.count <= B2_MAX_POLYGON_VERTICES );
+
+	for ( int i = 0; i < proxyB.count; ++i )
+	{
+		proxyB.points[i] = b2TransformPoint( xf, input->proxyB.points[i] );
+	}
+
+	float radius = proxyA.radius + proxyB.radius;
+
+	b2Vec2 r = b2RotateVector( xf.q, input->translationB );
+	float lambda = 0.0f;
+	float maxFraction = input->maxFraction;
+
+	// Initial simplex
+	b2Simplex simplex;
+	simplex.count = 0;
+
+	// Get simplex vertices as an array.
+	b2SimplexVertex* vertices[] = { &simplex.v1, &simplex.v2, &simplex.v3 };
+
+	// Get an initial point in A - B
+	b2Vec2 wA = proxyA.points[0];
+	b2Vec2 wB = proxyB.points[0];
+	b2Vec2 v = b2Sub( wA, wB );
+	b2Vec2 d = b2Neg( v );
+
+	// Sigma is the target distance between proxies
+	const float linearSlop = B2_LINEAR_SLOP;
+	const float sigma = b2MaxFloat( linearSlop, radius - linearSlop );
+	float tolerance = 0.5f * linearSlop;
+	float stolSquared = ( sigma + tolerance ) * ( sigma + tolerance );
+
+	// Main iteration loop.
+	const int maxIterations = 20;
+	int iteration = 0;
+	while ( iteration < maxIterations && b2LengthSquared( v ) > stolSquared )
+	{
+		B2_ASSERT( simplex.count < 3 );
+
+		// Support in direction d (A - B)
+		int indexA = b2FindSupport( &proxyA, d );
+		wA = proxyA.points[indexA];
+		int indexB = b2FindSupport( &proxyB, b2Neg( d ) );
+		wB = proxyB.points[indexB];
+		b2Vec2 p0 = b2Sub( wA, wB );
+
+		// d is a normal at p, normalize to work with sigma
+		b2Vec2 normal = b2Normalize( d );
+
+		// Intersect ray with plane
+		// p = origin + t * r
+		// dot(n, p - p0) = sigma
+		// dot(n, origin - p0) + t * dot(n, r) = sigma
+		// t = ( dot(n, p0) + sigma) / dot(n, r)
+		// if t < (dot(n, p0) + sigma) / dot(n, r) then t can be increased
+		// or (flipping sign because dot(n,r) < 0)
+		// dot(n, p0) + sigma < t * dot(n, r) && dot(n, r) < 0
+		float np0 = b2Dot( normal, p0 );
+		float nr = b2Dot( normal, r );
+		if ( np0 + sigma < lambda * nr )
+		{
+			if ( nr >= 0.0f )
+			{
+				// miss
+				return output;
+			}
+
+			lambda = ( np0 + sigma ) / nr;
+			if ( lambda > maxFraction )
+			{
+				// too far
+				return output;
+			}
+
+			// reset the simplex
+			simplex.count = 0;
+		}
+
+		// Shift by lambda * r because we want the closest point to the current clip point.
+		// Note that the support point p is not shifted because we want the plane equation
+		// to be formed in un-shifted space.
+		b2SimplexVertex* vertex = vertices[simplex.count];
+		vertex->indexA = indexB;
+		vertex->wA = wA;
+		vertex->indexB = indexA;
+		vertex->wB = (b2Vec2){ wB.x + lambda * r.x, wB.y + lambda * r.y };
+		vertex->w = b2Sub( vertex->wA, vertex->wB );
+		vertex->a = 1.0f;
+		simplex.count += 1;
+
+		switch ( simplex.count )
+		{
+			case 1:
+				d = b2Neg( simplex.v1.w );
+				break;
+
+			case 2:
+				d = b2SolveSimplex2( &simplex );
+				break;
+
+			case 3:
+				d = b2SolveSimplex3( &simplex );
+				break;
+
+			default:
+				B2_ASSERT( false );
+		}
+
+#ifndef NDEBUG
+		if ( debugData != NULL && output.iterations < debugCapacity )
+		{
+			debugData[output.iterations].simplex = simplex;
+			debugData[output.iterations].normal = normal;
+			debugData[output.iterations].p0 = p0;
+			b2Vec2 cA, cB;
+			b2ComputeSimplexWitnessPoints( &cA, &cB, &simplex );
+			debugData[output.iterations].closestA = cA;
+			debugData[output.iterations].closestB = cB;
+			debugData[output.iterations].fraction = lambda;
+		}
+#endif
+
+		output.iterations += 1;
+
+		// If we have 3 points, then the origin is in the corresponding triangle.
+		if ( simplex.count == 3 )
+		{
+			// Overlap
+			return output;
+		}
+
+		// Get distance vector
+		v = b2ComputeSimplexClosestPoint( &simplex );
+
+		// Iteration count is equated to the number of support point calls.
+		++iteration;
+	}
+
+	if ( iteration == 0 || lambda == 0.0f )
+	{
+		// Initial overlap
+		return output;
+	}
+
+	// Prepare output.
+	b2Vec2 pointA, pointB;
+	b2ComputeSimplexWitnessPoints( &pointB, &pointA, &simplex );
+
+	b2Vec2 n = b2Normalize( b2Neg( v ) );
+	b2Vec2 point = { pointA.x + proxyA.radius * n.x, pointA.y + proxyA.radius * n.y };
+
+	output.point = b2TransformPoint( input->transformA, point );
+	output.normal = b2RotateVector( input->transformA.q, n );
+	output.fraction = lambda;
+	output.iterations = iteration;
+	output.hit = true;
+	return output;
+}
+#endif
+
+// Warning: writing to these globals significantly slows multithreading performance
+#if B2_SNOOP_TOI_COUNTERS
+float b2_toiTime, b2_toiMaxTime;
+int b2_toiCalls, b2_toiDistanceIterations, b2_toiMaxDistanceIterations;
+int b2_toiRootIterations, b2_toiMaxRootIterations;
+int b2_toiFailedCount;
+int b2_toiOverlappedCount;
+int b2_toiHitCount;
+int b2_toiSeparatedCount;
+#endif
+
+typedef enum b2SeparationType
+{
+	b2_pointsType,
+	b2_faceAType,
+	b2_faceBType
+} b2SeparationType;
+
+typedef struct b2SeparationFunction
+{
+	const b2ShapeProxy* proxyA;
+	const b2ShapeProxy* proxyB;
+	b2Sweep sweepA, sweepB;
+	b2Vec2 localPoint;
+	b2Vec2 axis;
+	b2SeparationType type;
+} b2SeparationFunction;
+
+static b2SeparationFunction b2MakeSeparationFunction( const b2SimplexCache* cache, const b2ShapeProxy* proxyA,
+													  const b2Sweep* sweepA, const b2ShapeProxy* proxyB, const b2Sweep* sweepB,
+													  float t1 )
+{
+	b2SeparationFunction f;
+
+	f.proxyA = proxyA;
+	f.proxyB = proxyB;
+	int count = cache->count;
+	B2_ASSERT( 0 < count && count < 3 );
+
+	f.sweepA = *sweepA;
+	f.sweepB = *sweepB;
+
+	b2Transform xfA = b2GetSweepTransform( sweepA, t1 );
+	b2Transform xfB = b2GetSweepTransform( sweepB, t1 );
+
+	if ( count == 1 )
+	{
+		f.type = b2_pointsType;
+		b2Vec2 localPointA = proxyA->points[cache->indexA[0]];
+		b2Vec2 localPointB = proxyB->points[cache->indexB[0]];
+		b2Vec2 pointA = b2TransformPoint( xfA, localPointA );
+		b2Vec2 pointB = b2TransformPoint( xfB, localPointB );
+		f.axis = b2Normalize( b2Sub( pointB, pointA ) );
+		f.localPoint = b2Vec2_zero;
+		return f;
+	}
+
+	if ( cache->indexA[0] == cache->indexA[1] )
+	{
+		// Two points on B and one on A.
+		f.type = b2_faceBType;
+		b2Vec2 localPointB1 = proxyB->points[cache->indexB[0]];
+		b2Vec2 localPointB2 = proxyB->points[cache->indexB[1]];
+
+		f.axis = b2CrossVS( b2Sub( localPointB2, localPointB1 ), 1.0f );
+		f.axis = b2Normalize( f.axis );
+		b2Vec2 normal = b2RotateVector( xfB.q, f.axis );
+
+		f.localPoint = (b2Vec2){ 0.5f * ( localPointB1.x + localPointB2.x ), 0.5f * ( localPointB1.y + localPointB2.y ) };
+		b2Vec2 pointB = b2TransformPoint( xfB, f.localPoint );
+
+		b2Vec2 localPointA = proxyA->points[cache->indexA[0]];
+		b2Vec2 pointA = b2TransformPoint( xfA, localPointA );
+
+		float s = b2Dot( b2Sub( pointA, pointB ), normal );
+		if ( s < 0.0f )
+		{
+			f.axis = b2Neg( f.axis );
+		}
+		return f;
+	}
+
+	// Two points on A and one or two points on B.
+	f.type = b2_faceAType;
+	b2Vec2 localPointA1 = proxyA->points[cache->indexA[0]];
+	b2Vec2 localPointA2 = proxyA->points[cache->indexA[1]];
+
+	f.axis = b2CrossVS( b2Sub( localPointA2, localPointA1 ), 1.0f );
+	f.axis = b2Normalize( f.axis );
+	b2Vec2 normal = b2RotateVector( xfA.q, f.axis );
+
+	f.localPoint = (b2Vec2){ 0.5f * ( localPointA1.x + localPointA2.x ), 0.5f * ( localPointA1.y + localPointA2.y ) };
+	b2Vec2 pointA = b2TransformPoint( xfA, f.localPoint );
+
+	b2Vec2 localPointB = proxyB->points[cache->indexB[0]];
+	b2Vec2 pointB = b2TransformPoint( xfB, localPointB );
+
+	float s = b2Dot( b2Sub( pointB, pointA ), normal );
+	if ( s < 0.0f )
+	{
+		f.axis = b2Neg( f.axis );
+	}
+	return f;
+}
+
+static float b2FindMinSeparation( const b2SeparationFunction* f, int* indexA, int* indexB, float t )
+{
+	b2Transform xfA = b2GetSweepTransform( &f->sweepA, t );
+	b2Transform xfB = b2GetSweepTransform( &f->sweepB, t );
+
+	switch ( f->type )
+	{
+		case b2_pointsType:
+		{
+			b2Vec2 axisA = b2InvRotateVector( xfA.q, f->axis );
+			b2Vec2 axisB = b2InvRotateVector( xfB.q, b2Neg( f->axis ) );
+
+			*indexA = b2FindSupport( f->proxyA, axisA );
+			*indexB = b2FindSupport( f->proxyB, axisB );
+
+			b2Vec2 localPointA = f->proxyA->points[*indexA];
+			b2Vec2 localPointB = f->proxyB->points[*indexB];
+
+			b2Vec2 pointA = b2TransformPoint( xfA, localPointA );
+			b2Vec2 pointB = b2TransformPoint( xfB, localPointB );
+
+			float separation = b2Dot( b2Sub( pointB, pointA ), f->axis );
+			return separation;
+		}
+
+		case b2_faceAType:
+		{
+			b2Vec2 normal = b2RotateVector( xfA.q, f->axis );
+			b2Vec2 pointA = b2TransformPoint( xfA, f->localPoint );
+
+			b2Vec2 axisB = b2InvRotateVector( xfB.q, b2Neg( normal ) );
+
+			*indexA = -1;
+			*indexB = b2FindSupport( f->proxyB, axisB );
+
+			b2Vec2 localPointB = f->proxyB->points[*indexB];
+			b2Vec2 pointB = b2TransformPoint( xfB, localPointB );
+
+			float separation = b2Dot( b2Sub( pointB, pointA ), normal );
+			return separation;
+		}
+
+		case b2_faceBType:
+		{
+			b2Vec2 normal = b2RotateVector( xfB.q, f->axis );
+			b2Vec2 pointB = b2TransformPoint( xfB, f->localPoint );
+
+			b2Vec2 axisA = b2InvRotateVector( xfA.q, b2Neg( normal ) );
+
+			*indexB = -1;
+			*indexA = b2FindSupport( f->proxyA, axisA );
+
+			b2Vec2 localPointA = f->proxyA->points[*indexA];
+			b2Vec2 pointA = b2TransformPoint( xfA, localPointA );
+
+			float separation = b2Dot( b2Sub( pointA, pointB ), normal );
+			return separation;
+		}
+
+		default:
+			B2_ASSERT( false );
+			*indexA = -1;
+			*indexB = -1;
+			return 0.0f;
+	}
+}
+
+//
+static float b2EvaluateSeparation( const b2SeparationFunction* f, int indexA, int indexB, float t )
+{
+	b2Transform xfA = b2GetSweepTransform( &f->sweepA, t );
+	b2Transform xfB = b2GetSweepTransform( &f->sweepB, t );
+
+	switch ( f->type )
+	{
+		case b2_pointsType:
+		{
+			b2Vec2 localPointA = f->proxyA->points[indexA];
+			b2Vec2 localPointB = f->proxyB->points[indexB];
+
+			b2Vec2 pointA = b2TransformPoint( xfA, localPointA );
+			b2Vec2 pointB = b2TransformPoint( xfB, localPointB );
+
+			float separation = b2Dot( b2Sub( pointB, pointA ), f->axis );
+			return separation;
+		}
+
+		case b2_faceAType:
+		{
+			b2Vec2 normal = b2RotateVector( xfA.q, f->axis );
+			b2Vec2 pointA = b2TransformPoint( xfA, f->localPoint );
+
+			b2Vec2 localPointB = f->proxyB->points[indexB];
+			b2Vec2 pointB = b2TransformPoint( xfB, localPointB );
+
+			float separation = b2Dot( b2Sub( pointB, pointA ), normal );
+			return separation;
+		}
+
+		case b2_faceBType:
+		{
+			b2Vec2 normal = b2RotateVector( xfB.q, f->axis );
+			b2Vec2 pointB = b2TransformPoint( xfB, f->localPoint );
+
+			b2Vec2 localPointA = f->proxyA->points[indexA];
+			b2Vec2 pointA = b2TransformPoint( xfA, localPointA );
+
+			float separation = b2Dot( b2Sub( pointA, pointB ), normal );
+			return separation;
+		}
+
+		default:
+			B2_ASSERT( false );
+			return 0.0f;
+	}
+}
+
+// CCD via the local separating axis method. This seeks progression
+// by computing the largest time at which separation is maintained.
+b2TOIOutput b2TimeOfImpact( const b2TOIInput* input )
+{
+#if B2_SNOOP_TOI_COUNTERS
+	uint64_t ticks = b2GetTicks();
+	++b2_toiCalls;
+#endif
+
+	b2TOIOutput output;
+	output.state = b2_toiStateUnknown;
+	output.fraction = input->maxFraction;
+
+	b2Sweep sweepA = input->sweepA;
+	b2Sweep sweepB = input->sweepB;
+	B2_ASSERT( b2IsNormalizedRot( sweepA.q1 ) && b2IsNormalizedRot( sweepA.q2 ) );
+	B2_ASSERT( b2IsNormalizedRot( sweepB.q1 ) && b2IsNormalizedRot( sweepB.q2 ) );
+
+	// todo_erin
+	// c1 can be at the origin yet the points are far away
+	// b2Vec2 origin = b2Add(sweepA.c1, input->proxyA.points[0]);
+
+	const b2ShapeProxy* proxyA = &input->proxyA;
+	const b2ShapeProxy* proxyB = &input->proxyB;
+
+	float tMax = input->maxFraction;
+
+	float totalRadius = proxyA->radius + proxyB->radius;
+	// todo_erin consider different target
+	// float target = b2MaxFloat( B2_LINEAR_SLOP, totalRadius );
+	float target = b2MaxFloat( B2_LINEAR_SLOP, totalRadius - B2_LINEAR_SLOP );
+	float tolerance = 0.25f * B2_LINEAR_SLOP;
+	B2_ASSERT( target > tolerance );
+
+	float t1 = 0.0f;
+	const int k_maxIterations = 20;
+	int distanceIterations = 0;
+
+	// Prepare input for distance query.
+	b2SimplexCache cache = { 0 };
+	b2DistanceInput distanceInput;
+	distanceInput.proxyA = input->proxyA;
+	distanceInput.proxyB = input->proxyB;
+	distanceInput.useRadii = false;
+
+	// The outer loop progressively attempts to compute new separating axes.
+	// This loop terminates when an axis is repeated (no progress is made).
+	for ( ;; )
+	{
+		b2Transform xfA = b2GetSweepTransform( &sweepA, t1 );
+		b2Transform xfB = b2GetSweepTransform( &sweepB, t1 );
+
+		// Get the distance between shapes. We can also use the results
+		// to get a separating axis.
+		distanceInput.transformA = xfA;
+		distanceInput.transformB = xfB;
+		b2DistanceOutput distanceOutput = b2ShapeDistance( &distanceInput, &cache, NULL, 0 );
+
+		// Progressive time of impact. This handles slender geometry well but introduces
+		// significant time loss.
+		// if (distanceIterations == 0)
+		//{
+		//	if ( distanceOutput.distance > totalRadius + B2_SPECULATIVE_DISTANCE )
+		//	{
+		//		target = totalRadius + B2_SPECULATIVE_DISTANCE - tolerance;
+		//	}
+		//	else
+		//	{
+		//		target = distanceOutput.distance - 1.5f * tolerance;
+		//		target = b2MaxFloat( target, 2.0f * tolerance );
+		//	}
+		//}
+
+		distanceIterations += 1;
+#if B2_SNOOP_TOI_COUNTERS
+		b2_toiDistanceIterations += 1;
+#endif
+
+		// If the shapes are overlapped, we give up on continuous collision.
+		if ( distanceOutput.distance <= 0.0f )
+		{
+			// Failure!
+			output.state = b2_toiStateOverlapped;
+#if B2_SNOOP_TOI_COUNTERS
+			b2_toiOverlappedCount += 1;
+#endif
+			output.fraction = 0.0f;
+			break;
+		}
+
+		if ( distanceOutput.distance <= target + tolerance )
+		{
+			// Victory!
+			output.state = b2_toiStateHit;
+#if B2_SNOOP_TOI_COUNTERS
+			b2_toiHitCount += 1;
+#endif
+			output.fraction = t1;
+			break;
+		}
+
+		// Initialize the separating axis.
+		b2SeparationFunction fcn = b2MakeSeparationFunction( &cache, proxyA, &sweepA, proxyB, &sweepB, t1 );
+#if 0
+		// Dump the curve seen by the root finder
+		{
+			const int N = 100;
+			float dx = 1.0f / N;
+			float xs[N + 1];
+			float fs[N + 1];
+
+			float x = 0.0f;
+
+			for (int i = 0; i <= N; ++i)
+			{
+				sweepA.GetTransform(&xfA, x);
+				sweepB.GetTransform(&xfB, x);
+				float f = fcn.Evaluate(xfA, xfB) - target;
+
+				printf("%g %g\n", x, f);
+
+				xs[i] = x;
+				fs[i] = f;
+
+				x += dx;
+			}
+		}
+#endif
+
+		// Compute the TOI on the separating axis. We do this by successively
+		// resolving the deepest point. This loop is bounded by the number of vertices.
+		bool done = false;
+		float t2 = tMax;
+		int pushBackIterations = 0;
+		for ( ;; )
+		{
+			// Find the deepest point at t2. Store the witness point indices.
+			int indexA, indexB;
+			float s2 = b2FindMinSeparation( &fcn, &indexA, &indexB, t2 );
+
+			// Is the final configuration separated?
+			if ( s2 > target + tolerance )
+			{
+				// Victory!
+				output.state = b2_toiStateSeparated;
+#if B2_SNOOP_TOI_COUNTERS
+				b2_toiSeparatedCount += 1;
+#endif
+				output.fraction = tMax;
+				done = true;
+				break;
+			}
+
+			// Has the separation reached tolerance?
+			if ( s2 > target - tolerance )
+			{
+				// Advance the sweeps
+				t1 = t2;
+				break;
+			}
+
+			// Compute the initial separation of the witness points.
+			float s1 = b2EvaluateSeparation( &fcn, indexA, indexB, t1 );
+
+			// Check for initial overlap. This might happen if the root finder
+			// runs out of iterations.
+			if ( s1 < target - tolerance )
+			{
+				output.state = b2_toiStateFailed;
+#if B2_SNOOP_TOI_COUNTERS
+				b2_toiFailedCount += 1;
+#endif
+				output.fraction = t1;
+				done = true;
+				break;
+			}
+
+			// Check for touching
+			if ( s1 <= target + tolerance )
+			{
+				// Victory! t1 should hold the TOI (could be 0.0).
+				output.state = b2_toiStateHit;
+#if B2_SNOOP_TOI_COUNTERS
+				b2_toiHitCount += 1;
+#endif
+				output.fraction = t1;
+				done = true;
+				break;
+			}
+
+			// Compute 1D root of: f(x) - target = 0
+			int rootIterationCount = 0;
+			float a1 = t1, a2 = t2;
+			for ( ;; )
+			{
+				// Use a mix of the secant rule and bisection.
+				float t;
+				if ( rootIterationCount & 1 )
+				{
+					// Secant rule to improve convergence.
+					t = a1 + ( target - s1 ) * ( a2 - a1 ) / ( s2 - s1 );
+				}
+				else
+				{
+					// Bisection to guarantee progress.
+					t = 0.5f * ( a1 + a2 );
+				}
+
+				rootIterationCount += 1;
+
+#if B2_SNOOP_TOI_COUNTERS
+				++b2_toiRootIterations;
+#endif
+
+				float s = b2EvaluateSeparation( &fcn, indexA, indexB, t );
+
+				if ( b2AbsFloat( s - target ) < tolerance )
+				{
+					// t2 holds a tentative value for t1
+					t2 = t;
+					break;
+				}
+
+				// Ensure we continue to bracket the root.
+				if ( s > target )
+				{
+					a1 = t;
+					s1 = s;
+				}
+				else
+				{
+					a2 = t;
+					s2 = s;
+				}
+
+				if ( rootIterationCount == 50 )
+				{
+					break;
+				}
+			}
+
+#if B2_SNOOP_TOI_COUNTERS
+			b2_toiMaxRootIterations = b2MaxInt( b2_toiMaxRootIterations, rootIterationCount );
+#endif
+
+			pushBackIterations += 1;
+
+			if ( pushBackIterations == B2_MAX_POLYGON_VERTICES )
+			{
+				break;
+			}
+		}
+
+		if ( done )
+		{
+			break;
+		}
+
+		if ( distanceIterations == k_maxIterations )
+		{
+			// Root finder got stuck. Semi-victory.
+			output.state = b2_toiStateFailed;
+#if B2_SNOOP_TOI_COUNTERS
+			b2_toiFailedCount += 1;
+#endif
+			output.fraction = t1;
+			break;
+		}
+	}
+
+#if B2_SNOOP_TOI_COUNTERS
+	b2_toiMaxDistanceIterations = b2MaxInt( b2_toiMaxDistanceIterations, distanceIterations );
+
+	float time = b2GetMilliseconds( ticks );
+	b2_toiMaxTime = b2MaxFloat( b2_toiMaxTime, time );
+	b2_toiTime += time;
+#endif
+
+	return output;
+}
diff --git a/src/vendor/box2d/distance_joint.c b/src/vendor/box2d/distance_joint.c
new file mode 100644
index 0000000..016e8ac
--- /dev/null
+++ b/src/vendor/box2d/distance_joint.c
@@ -0,0 +1,556 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#if defined( _MSC_VER ) && !defined( _CRT_SECURE_NO_WARNINGS )
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "body.h"
+#include "core.h"
+#include "joint.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+#include <stdio.h>
+
+void b2DistanceJoint_SetLength( b2JointId jointId, float length )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+
+	joint->length = b2ClampFloat( length, B2_LINEAR_SLOP, B2_HUGE );
+	joint->impulse = 0.0f;
+	joint->lowerImpulse = 0.0f;
+	joint->upperImpulse = 0.0f;
+}
+
+float b2DistanceJoint_GetLength( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+	return joint->length;
+}
+
+void b2DistanceJoint_EnableLimit( b2JointId jointId, bool enableLimit )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+	joint->enableLimit = enableLimit;
+}
+
+bool b2DistanceJoint_IsLimitEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	return joint->distanceJoint.enableLimit;
+}
+
+void b2DistanceJoint_SetLengthRange( b2JointId jointId, float minLength, float maxLength )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+
+	minLength = b2ClampFloat( minLength, B2_LINEAR_SLOP, B2_HUGE );
+	maxLength = b2ClampFloat( maxLength, B2_LINEAR_SLOP, B2_HUGE );
+	joint->minLength = b2MinFloat( minLength, maxLength );
+	joint->maxLength = b2MaxFloat( minLength, maxLength );
+	joint->impulse = 0.0f;
+	joint->lowerImpulse = 0.0f;
+	joint->upperImpulse = 0.0f;
+}
+
+float b2DistanceJoint_GetMinLength( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+	return joint->minLength;
+}
+
+float b2DistanceJoint_GetMaxLength( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+	return joint->maxLength;
+}
+
+float b2DistanceJoint_GetCurrentLength( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+
+	b2World* world = b2GetWorld( jointId.world0 );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return 0.0f;
+	}
+
+	b2Transform transformA = b2GetBodyTransform( world, base->bodyIdA );
+	b2Transform transformB = b2GetBodyTransform( world, base->bodyIdB );
+
+	b2Vec2 pA = b2TransformPoint( transformA, base->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, base->localOriginAnchorB );
+	b2Vec2 d = b2Sub( pB, pA );
+	float length = b2Length( d );
+	return length;
+}
+
+void b2DistanceJoint_EnableSpring( b2JointId jointId, bool enableSpring )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	base->distanceJoint.enableSpring = enableSpring;
+}
+
+bool b2DistanceJoint_IsSpringEnabled( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	return base->distanceJoint.enableSpring;
+}
+
+void b2DistanceJoint_SetSpringHertz( b2JointId jointId, float hertz )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	base->distanceJoint.hertz = hertz;
+}
+
+void b2DistanceJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	base->distanceJoint.dampingRatio = dampingRatio;
+}
+
+float b2DistanceJoint_GetSpringHertz( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+	return joint->hertz;
+}
+
+float b2DistanceJoint_GetSpringDampingRatio( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	b2DistanceJoint* joint = &base->distanceJoint;
+	return joint->dampingRatio;
+}
+
+void b2DistanceJoint_EnableMotor( b2JointId jointId, bool enableMotor )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	if ( enableMotor != joint->distanceJoint.enableMotor )
+	{
+		joint->distanceJoint.enableMotor = enableMotor;
+		joint->distanceJoint.motorImpulse = 0.0f;
+	}
+}
+
+bool b2DistanceJoint_IsMotorEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	return joint->distanceJoint.enableMotor;
+}
+
+void b2DistanceJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	joint->distanceJoint.motorSpeed = motorSpeed;
+}
+
+float b2DistanceJoint_GetMotorSpeed( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	return joint->distanceJoint.motorSpeed;
+}
+
+float b2DistanceJoint_GetMotorForce( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	return world->inv_h * base->distanceJoint.motorImpulse;
+}
+
+void b2DistanceJoint_SetMaxMotorForce( b2JointId jointId, float force )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	joint->distanceJoint.maxMotorForce = force;
+}
+
+float b2DistanceJoint_GetMaxMotorForce( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_distanceJoint );
+	return joint->distanceJoint.maxMotorForce;
+}
+
+b2Vec2 b2GetDistanceJointForce( b2World* world, b2JointSim* base )
+{
+	b2DistanceJoint* joint = &base->distanceJoint;
+
+	b2Transform transformA = b2GetBodyTransform( world, base->bodyIdA );
+	b2Transform transformB = b2GetBodyTransform( world, base->bodyIdB );
+
+	b2Vec2 pA = b2TransformPoint( transformA, base->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, base->localOriginAnchorB );
+	b2Vec2 d = b2Sub( pB, pA );
+	b2Vec2 axis = b2Normalize( d );
+	float force = ( joint->impulse + joint->lowerImpulse - joint->upperImpulse + joint->motorImpulse ) * world->inv_h;
+	return b2MulSV( force, axis );
+}
+
+// 1-D constrained system
+// m (v2 - v1) = lambda
+// v2 + (beta/h) * x1 + gamma * lambda = 0, gamma has units of inverse mass.
+// x2 = x1 + h * v2
+
+// 1-D mass-damper-spring system
+// m (v2 - v1) + h * d * v2 + h * k *
+
+// C = norm(p2 - p1) - L
+// u = (p2 - p1) / norm(p2 - p1)
+// Cdot = dot(u, v2 + cross(w2, r2) - v1 - cross(w1, r1))
+// J = [-u -cross(r1, u) u cross(r2, u)]
+// K = J * invM * JT
+//   = invMass1 + invI1 * cross(r1, u)^2 + invMass2 + invI2 * cross(r2, u)^2
+
+void b2PrepareDistanceJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_distanceJoint );
+
+	// chase body id to the solver set where the body lives
+	int idA = base->bodyIdA;
+	int idB = base->bodyIdB;
+
+	b2World* world = context->world;
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, idA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, idB );
+
+	B2_ASSERT( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet );
+
+	b2SolverSet* setA = b2SolverSetArray_Get( &world->solverSets, bodyA->setIndex );
+	b2SolverSet* setB = b2SolverSetArray_Get( &world->solverSets, bodyB->setIndex );
+
+	int localIndexA = bodyA->localIndex;
+	int localIndexB = bodyB->localIndex;
+
+	b2BodySim* bodySimA = b2BodySimArray_Get( &setA->bodySims, localIndexA );
+	b2BodySim* bodySimB = b2BodySimArray_Get( &setB->bodySims, localIndexB );
+
+	float mA = bodySimA->invMass;
+	float iA = bodySimA->invInertia;
+	float mB = bodySimB->invMass;
+	float iB = bodySimB->invInertia;
+
+	base->invMassA = mA;
+	base->invMassB = mB;
+	base->invIA = iA;
+	base->invIB = iB;
+
+	b2DistanceJoint* joint = &base->distanceJoint;
+
+	joint->indexA = bodyA->setIndex == b2_awakeSet ? localIndexA : B2_NULL_INDEX;
+	joint->indexB = bodyB->setIndex == b2_awakeSet ? localIndexB : B2_NULL_INDEX;
+
+	// initial anchors in world space
+	joint->anchorA = b2RotateVector( bodySimA->transform.q, b2Sub( base->localOriginAnchorA, bodySimA->localCenter ) );
+	joint->anchorB = b2RotateVector( bodySimB->transform.q, b2Sub( base->localOriginAnchorB, bodySimB->localCenter ) );
+	joint->deltaCenter = b2Sub( bodySimB->center, bodySimA->center );
+
+	b2Vec2 rA = joint->anchorA;
+	b2Vec2 rB = joint->anchorB;
+	b2Vec2 separation = b2Add( b2Sub( rB, rA ), joint->deltaCenter );
+	b2Vec2 axis = b2Normalize( separation );
+
+	// compute effective mass
+	float crA = b2Cross( rA, axis );
+	float crB = b2Cross( rB, axis );
+	float k = mA + mB + iA * crA * crA + iB * crB * crB;
+	joint->axialMass = k > 0.0f ? 1.0f / k : 0.0f;
+
+	joint->distanceSoftness = b2MakeSoft( joint->hertz, joint->dampingRatio, context->h );
+
+	if ( context->enableWarmStarting == false )
+	{
+		joint->impulse = 0.0f;
+		joint->lowerImpulse = 0.0f;
+		joint->upperImpulse = 0.0f;
+		joint->motorImpulse = 0.0f;
+	}
+}
+
+void b2WarmStartDistanceJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_distanceJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2DistanceJoint* joint = &base->distanceJoint;
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	b2Vec2 ds = b2Add( b2Sub( stateB->deltaPosition, stateA->deltaPosition ), b2Sub( rB, rA ) );
+	b2Vec2 separation = b2Add( joint->deltaCenter, ds );
+	b2Vec2 axis = b2Normalize( separation );
+
+	float axialImpulse = joint->impulse + joint->lowerImpulse - joint->upperImpulse + joint->motorImpulse;
+	b2Vec2 P = b2MulSV( axialImpulse, axis );
+
+	stateA->linearVelocity = b2MulSub( stateA->linearVelocity, mA, P );
+	stateA->angularVelocity -= iA * b2Cross( rA, P );
+	stateB->linearVelocity = b2MulAdd( stateB->linearVelocity, mB, P );
+	stateB->angularVelocity += iB * b2Cross( rB, P );
+}
+
+void b2SolveDistanceJoint( b2JointSim* base, b2StepContext* context, bool useBias )
+{
+	B2_ASSERT( base->type == b2_distanceJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2DistanceJoint* joint = &base->distanceJoint;
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 vA = stateA->linearVelocity;
+	float wA = stateA->angularVelocity;
+	b2Vec2 vB = stateB->linearVelocity;
+	float wB = stateB->angularVelocity;
+
+	// current anchors
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	// current separation
+	b2Vec2 ds = b2Add( b2Sub( stateB->deltaPosition, stateA->deltaPosition ), b2Sub( rB, rA ) );
+	b2Vec2 separation = b2Add( joint->deltaCenter, ds );
+
+	float length = b2Length( separation );
+	b2Vec2 axis = b2Normalize( separation );
+
+	// joint is soft if
+	// - spring is enabled
+	// - and (joint limit is disabled or limits are not equal)
+	if ( joint->enableSpring && ( joint->minLength < joint->maxLength || joint->enableLimit == false ) )
+	{
+		// spring
+		if ( joint->hertz > 0.0f )
+		{
+			// Cdot = dot(u, v + cross(w, r))
+			b2Vec2 vr = b2Add( b2Sub( vB, vA ), b2Sub( b2CrossSV( wB, rB ), b2CrossSV( wA, rA ) ) );
+			float Cdot = b2Dot( axis, vr );
+			float C = length - joint->length;
+			float bias = joint->distanceSoftness.biasRate * C;
+
+			float m = joint->distanceSoftness.massScale * joint->axialMass;
+			float impulse = -m * ( Cdot + bias ) - joint->distanceSoftness.impulseScale * joint->impulse;
+			joint->impulse += impulse;
+
+			b2Vec2 P = b2MulSV( impulse, axis );
+			vA = b2MulSub( vA, mA, P );
+			wA -= iA * b2Cross( rA, P );
+			vB = b2MulAdd( vB, mB, P );
+			wB += iB * b2Cross( rB, P );
+		}
+
+		if ( joint->enableLimit )
+		{
+			// lower limit
+			{
+				b2Vec2 vr = b2Add( b2Sub( vB, vA ), b2Sub( b2CrossSV( wB, rB ), b2CrossSV( wA, rA ) ) );
+				float Cdot = b2Dot( axis, vr );
+
+				float C = length - joint->minLength;
+
+				float bias = 0.0f;
+				float massCoeff = 1.0f;
+				float impulseCoeff = 0.0f;
+				if ( C > 0.0f )
+				{
+					// speculative
+					bias = C * context->inv_h;
+				}
+				else if ( useBias )
+				{
+					bias = context->jointSoftness.biasRate * C;
+					massCoeff = context->jointSoftness.massScale;
+					impulseCoeff = context->jointSoftness.impulseScale;
+				}
+
+				float impulse = -massCoeff * joint->axialMass * ( Cdot + bias ) - impulseCoeff * joint->lowerImpulse;
+				float newImpulse = b2MaxFloat( 0.0f, joint->lowerImpulse + impulse );
+				impulse = newImpulse - joint->lowerImpulse;
+				joint->lowerImpulse = newImpulse;
+
+				b2Vec2 P = b2MulSV( impulse, axis );
+				vA = b2MulSub( vA, mA, P );
+				wA -= iA * b2Cross( rA, P );
+				vB = b2MulAdd( vB, mB, P );
+				wB += iB * b2Cross( rB, P );
+			}
+
+			// upper
+			{
+				b2Vec2 vr = b2Add( b2Sub( vA, vB ), b2Sub( b2CrossSV( wA, rA ), b2CrossSV( wB, rB ) ) );
+				float Cdot = b2Dot( axis, vr );
+
+				float C = joint->maxLength - length;
+
+				float bias = 0.0f;
+				float massScale = 1.0f;
+				float impulseScale = 0.0f;
+				if ( C > 0.0f )
+				{
+					// speculative
+					bias = C * context->inv_h;
+				}
+				else if ( useBias )
+				{
+					bias = context->jointSoftness.biasRate * C;
+					massScale = context->jointSoftness.massScale;
+					impulseScale = context->jointSoftness.impulseScale;
+				}
+
+				float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->upperImpulse;
+				float newImpulse = b2MaxFloat( 0.0f, joint->upperImpulse + impulse );
+				impulse = newImpulse - joint->upperImpulse;
+				joint->upperImpulse = newImpulse;
+
+				b2Vec2 P = b2MulSV( -impulse, axis );
+				vA = b2MulSub( vA, mA, P );
+				wA -= iA * b2Cross( rA, P );
+				vB = b2MulAdd( vB, mB, P );
+				wB += iB * b2Cross( rB, P );
+			}
+		}
+
+		if ( joint->enableMotor )
+		{
+			b2Vec2 vr = b2Add( b2Sub( vB, vA ), b2Sub( b2CrossSV( wB, rB ), b2CrossSV( wA, rA ) ) );
+			float Cdot = b2Dot( axis, vr );
+			float impulse = joint->axialMass * ( joint->motorSpeed - Cdot );
+			float oldImpulse = joint->motorImpulse;
+			float maxImpulse = context->h * joint->maxMotorForce;
+			joint->motorImpulse = b2ClampFloat( joint->motorImpulse + impulse, -maxImpulse, maxImpulse );
+			impulse = joint->motorImpulse - oldImpulse;
+
+			b2Vec2 P = b2MulSV( impulse, axis );
+			vA = b2MulSub( vA, mA, P );
+			wA -= iA * b2Cross( rA, P );
+			vB = b2MulAdd( vB, mB, P );
+			wB += iB * b2Cross( rB, P );
+		}
+	}
+	else
+	{
+		// rigid constraint
+		b2Vec2 vr = b2Add( b2Sub( vB, vA ), b2Sub( b2CrossSV( wB, rB ), b2CrossSV( wA, rA ) ) );
+		float Cdot = b2Dot( axis, vr );
+
+		float C = length - joint->length;
+
+		float bias = 0.0f;
+		float massScale = 1.0f;
+		float impulseScale = 0.0f;
+		if ( useBias )
+		{
+			bias = context->jointSoftness.biasRate * C;
+			massScale = context->jointSoftness.massScale;
+			impulseScale = context->jointSoftness.impulseScale;
+		}
+
+		float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->impulse;
+		joint->impulse += impulse;
+
+		b2Vec2 P = b2MulSV( impulse, axis );
+		vA = b2MulSub( vA, mA, P );
+		wA -= iA * b2Cross( rA, P );
+		vB = b2MulAdd( vB, mB, P );
+		wB += iB * b2Cross( rB, P );
+	}
+
+	stateA->linearVelocity = vA;
+	stateA->angularVelocity = wA;
+	stateB->linearVelocity = vB;
+	stateB->angularVelocity = wB;
+}
+
+#if 0
+void b2DistanceJoint::Dump()
+{
+	int32 indexA = m_bodyA->m_islandIndex;
+	int32 indexB = m_bodyB->m_islandIndex;
+
+	b2Dump("  b2DistanceJointDef jd;\n");
+	b2Dump("  jd.bodyA = sims[%d];\n", indexA);
+	b2Dump("  jd.bodyB = sims[%d];\n", indexB);
+	b2Dump("  jd.collideConnected = bool(%d);\n", m_collideConnected);
+	b2Dump("  jd.localAnchorA.Set(%.9g, %.9g);\n", m_localAnchorA.x, m_localAnchorA.y);
+	b2Dump("  jd.localAnchorB.Set(%.9g, %.9g);\n", m_localAnchorB.x, m_localAnchorB.y);
+	b2Dump("  jd.length = %.9g;\n", m_length);
+	b2Dump("  jd.minLength = %.9g;\n", m_minLength);
+	b2Dump("  jd.maxLength = %.9g;\n", m_maxLength);
+	b2Dump("  jd.stiffness = %.9g;\n", m_stiffness);
+	b2Dump("  jd.damping = %.9g;\n", m_damping);
+	b2Dump("  joints[%d] = m_world->CreateJoint(&jd);\n", m_index);
+}
+#endif
+
+void b2DrawDistanceJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB )
+{
+	B2_ASSERT( base->type == b2_distanceJoint );
+
+	b2DistanceJoint* joint = &base->distanceJoint;
+
+	b2Vec2 pA = b2TransformPoint( transformA, base->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, base->localOriginAnchorB );
+
+	b2Vec2 axis = b2Normalize( b2Sub( pB, pA ) );
+
+	if ( joint->minLength < joint->maxLength && joint->enableLimit )
+	{
+		b2Vec2 pMin = b2MulAdd( pA, joint->minLength, axis );
+		b2Vec2 pMax = b2MulAdd( pA, joint->maxLength, axis );
+		b2Vec2 offset = b2MulSV( 0.05f * b2_lengthUnitsPerMeter, b2RightPerp( axis ) );
+
+		if ( joint->minLength > B2_LINEAR_SLOP )
+		{
+			// draw->DrawPoint(pMin, 4.0f, c2, draw->context);
+			draw->DrawSegmentFcn( b2Sub( pMin, offset ), b2Add( pMin, offset ), b2_colorLightGreen, draw->context );
+		}
+
+		if ( joint->maxLength < B2_HUGE )
+		{
+			// draw->DrawPoint(pMax, 4.0f, c3, draw->context);
+			draw->DrawSegmentFcn( b2Sub( pMax, offset ), b2Add( pMax, offset ), b2_colorRed, draw->context );
+		}
+
+		if ( joint->minLength > B2_LINEAR_SLOP && joint->maxLength < B2_HUGE )
+		{
+			draw->DrawSegmentFcn( pMin, pMax, b2_colorGray, draw->context );
+		}
+	}
+
+	draw->DrawSegmentFcn( pA, pB, b2_colorWhite, draw->context );
+	draw->DrawPointFcn( pA, 4.0f, b2_colorWhite, draw->context );
+	draw->DrawPointFcn( pB, 4.0f, b2_colorWhite, draw->context );
+
+	if ( joint->hertz > 0.0f && joint->enableSpring )
+	{
+		b2Vec2 pRest = b2MulAdd( pA, joint->length, axis );
+		draw->DrawPointFcn( pRest, 4.0f, b2_colorBlue, draw->context );
+	}
+}
diff --git a/src/vendor/box2d/dynamic_tree.c b/src/vendor/box2d/dynamic_tree.c
new file mode 100644
index 0000000..03f9325
--- /dev/null
+++ b/src/vendor/box2d/dynamic_tree.c
@@ -0,0 +1,1989 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "aabb.h"
+#include "constants.h"
+#include "core.h"
+
+#include "box2d/collision.h"
+#include "box2d/math_functions.h"
+
+#include <float.h>
+#include <string.h>
+
+#define B2_TREE_STACK_SIZE 1024
+
+// todo externalize this to visualize internal nodes and speed up FindPairs
+
+// A node in the dynamic tree.
+typedef struct b2TreeNode
+{
+	// The node bounding box
+	b2AABB aabb; // 16
+
+	// Category bits for collision filtering
+	uint64_t categoryBits; // 8
+
+	union
+	{
+		// Children (internal node)
+		struct
+		{
+			int32_t child1, child2;
+		} children;
+
+		/// User data (leaf node)
+		uint64_t userData;
+	}; // 8
+
+	union
+	{
+		/// The node parent index (allocated node)
+		int32_t parent;
+
+		/// The node freelist next index (free node)
+		int32_t next;
+	}; // 4
+
+	uint16_t height; // 2
+	uint16_t flags;	 // 2
+} b2TreeNode;
+
+static b2TreeNode b2_defaultTreeNode = {
+	.aabb = { { 0.0f, 0.0f }, { 0.0f, 0.0f } },
+	.categoryBits = B2_DEFAULT_CATEGORY_BITS,
+	.children =
+		{
+			.child1 = B2_NULL_INDEX,
+			.child2 = B2_NULL_INDEX,
+		},
+	.parent = B2_NULL_INDEX,
+	.height = 0,
+	.flags = b2_allocatedNode,
+};
+
+static bool b2IsLeaf( const b2TreeNode* node )
+{
+	return node->flags & b2_leafNode;
+}
+
+static bool b2IsAllocated( const b2TreeNode* node )
+{
+	return node->flags & b2_allocatedNode;
+}
+
+static uint16_t b2MaxUInt16( uint16_t a, uint16_t b )
+{
+	return a > b ? a : b;
+}
+
+b2DynamicTree b2DynamicTree_Create( void )
+{
+	b2DynamicTree tree;
+	tree.root = B2_NULL_INDEX;
+
+	tree.nodeCapacity = 16;
+	tree.nodeCount = 0;
+	tree.nodes = (b2TreeNode*)b2Alloc( tree.nodeCapacity * sizeof( b2TreeNode ) );
+	memset( tree.nodes, 0, tree.nodeCapacity * sizeof( b2TreeNode ) );
+
+	// Build a linked list for the free list.
+	for ( int i = 0; i < tree.nodeCapacity - 1; ++i )
+	{
+		tree.nodes[i].next = i + 1;
+	}
+
+	tree.nodes[tree.nodeCapacity - 1].next = B2_NULL_INDEX;
+	tree.freeList = 0;
+
+	tree.proxyCount = 0;
+
+	tree.leafIndices = NULL;
+	tree.leafBoxes = NULL;
+	tree.leafCenters = NULL;
+	tree.binIndices = NULL;
+	tree.rebuildCapacity = 0;
+
+	return tree;
+}
+
+void b2DynamicTree_Destroy( b2DynamicTree* tree )
+{
+	b2Free( tree->nodes, tree->nodeCapacity * sizeof( b2TreeNode ) );
+	b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int32_t ) );
+	b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) );
+	b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) );
+	b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int32_t ) );
+
+	memset( tree, 0, sizeof( b2DynamicTree ) );
+}
+
+// Allocate a node from the pool. Grow the pool if necessary.
+static int b2AllocateNode( b2DynamicTree* tree )
+{
+	// Expand the node pool as needed.
+	if ( tree->freeList == B2_NULL_INDEX )
+	{
+		B2_ASSERT( tree->nodeCount == tree->nodeCapacity );
+
+		// The free list is empty. Rebuild a bigger pool.
+		b2TreeNode* oldNodes = tree->nodes;
+		int oldCapacity = tree->nodeCapacity;
+		tree->nodeCapacity += oldCapacity >> 1;
+		tree->nodes = (b2TreeNode*)b2Alloc( tree->nodeCapacity * sizeof( b2TreeNode ) );
+		B2_ASSERT( oldNodes != NULL );
+		memcpy( tree->nodes, oldNodes, tree->nodeCount * sizeof( b2TreeNode ) );
+		memset( tree->nodes + tree->nodeCount, 0, ( tree->nodeCapacity - tree->nodeCount ) * sizeof( b2TreeNode ) );
+		b2Free( oldNodes, oldCapacity * sizeof( b2TreeNode ) );
+
+		// Build a linked list for the free list. The parent pointer becomes the "next" pointer.
+		// todo avoid building freelist?
+		for ( int i = tree->nodeCount; i < tree->nodeCapacity - 1; ++i )
+		{
+			tree->nodes[i].next = i + 1;
+		}
+
+		tree->nodes[tree->nodeCapacity - 1].next = B2_NULL_INDEX;
+		tree->freeList = tree->nodeCount;
+	}
+
+	// Peel a node off the free list.
+	int nodeIndex = tree->freeList;
+	b2TreeNode* node = tree->nodes + nodeIndex;
+	tree->freeList = node->next;
+	*node = b2_defaultTreeNode;
+	++tree->nodeCount;
+	return nodeIndex;
+}
+
+// Return a node to the pool.
+static void b2FreeNode( b2DynamicTree* tree, int nodeId )
+{
+	B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity );
+	B2_ASSERT( 0 < tree->nodeCount );
+	tree->nodes[nodeId].next = tree->freeList;
+	tree->nodes[nodeId].flags = 0;
+	tree->freeList = nodeId;
+	--tree->nodeCount;
+}
+
+// Greedy algorithm for sibling selection using the SAH
+// We have three nodes A-(B,C) and want to add a leaf D, there are three choices.
+// 1: make a new parent for A and D : E-(A-(B,C), D)
+// 2: associate D with B
+//   a: B is a leaf : A-(E-(B,D), C)
+//   b: B is an internal node: A-(B{D},C)
+// 3: associate D with C
+//   a: C is a leaf : A-(B, E-(C,D))
+//   b: C is an internal node: A-(B, C{D})
+// All of these have a clear cost except when B or C is an internal node. Hence we need to be greedy.
+
+// The cost for cases 1, 2a, and 3a can be computed using the sibling cost formula.
+// cost of sibling H = area(union(H, D)) + increased area of ancestors
+
+// Suppose B (or C) is an internal node, then the lowest cost would be one of two cases:
+// case1: D becomes a sibling of B
+// case2: D becomes a descendant of B along with a new internal node of area(D).
+static int b2FindBestSibling( const b2DynamicTree* tree, b2AABB boxD )
+{
+	b2Vec2 centerD = b2AABB_Center( boxD );
+	float areaD = b2Perimeter( boxD );
+
+	const b2TreeNode* nodes = tree->nodes;
+	int rootIndex = tree->root;
+
+	b2AABB rootBox = nodes[rootIndex].aabb;
+
+	// Area of current node
+	float areaBase = b2Perimeter( rootBox );
+
+	// Area of inflated node
+	float directCost = b2Perimeter( b2AABB_Union( rootBox, boxD ) );
+	float inheritedCost = 0.0f;
+
+	int bestSibling = rootIndex;
+	float bestCost = directCost;
+
+	// Descend the tree from root, following a single greedy path.
+	int index = rootIndex;
+	while ( nodes[index].height > 0 )
+	{
+		int child1 = nodes[index].children.child1;
+		int child2 = nodes[index].children.child2;
+
+		// Cost of creating a new parent for this node and the new leaf
+		float cost = directCost + inheritedCost;
+
+		// Sometimes there are multiple identical costs within tolerance.
+		// This breaks the ties using the centroid distance.
+		if ( cost < bestCost )
+		{
+			bestSibling = index;
+			bestCost = cost;
+		}
+
+		// Inheritance cost seen by children
+		inheritedCost += directCost - areaBase;
+
+		bool leaf1 = nodes[child1].height == 0;
+		bool leaf2 = nodes[child2].height == 0;
+
+		// Cost of descending into child 1
+		float lowerCost1 = FLT_MAX;
+		b2AABB box1 = nodes[child1].aabb;
+		float directCost1 = b2Perimeter( b2AABB_Union( box1, boxD ) );
+		float area1 = 0.0f;
+		if ( leaf1 )
+		{
+			// Child 1 is a leaf
+			// Cost of creating new node and increasing area of node P
+			float cost1 = directCost1 + inheritedCost;
+
+			// Need this here due to while condition above
+			if ( cost1 < bestCost )
+			{
+				bestSibling = child1;
+				bestCost = cost1;
+			}
+		}
+		else
+		{
+			// Child 1 is an internal node
+			area1 = b2Perimeter( box1 );
+
+			// Lower bound cost of inserting under child 1. The minimum accounts for two possibilities:
+			// 1. Child1 could be the sibling with cost1 = inheritedCost + directCost1
+			// 2. A descendent of child1 could be the sibling with the lower bound cost of
+			//       cost1 = inheritedCost + (directCost1 - area1) + areaD
+			// This minimum here leads to the minimum of these two costs.
+			lowerCost1 = inheritedCost + directCost1 + b2MinFloat( areaD - area1, 0.0f );
+		}
+
+		// Cost of descending into child 2
+		float lowerCost2 = FLT_MAX;
+		b2AABB box2 = nodes[child2].aabb;
+		float directCost2 = b2Perimeter( b2AABB_Union( box2, boxD ) );
+		float area2 = 0.0f;
+		if ( leaf2 )
+		{
+			float cost2 = directCost2 + inheritedCost;
+
+			if ( cost2 < bestCost )
+			{
+				bestSibling = child2;
+				bestCost = cost2;
+			}
+		}
+		else
+		{
+			area2 = b2Perimeter( box2 );
+			lowerCost2 = inheritedCost + directCost2 + b2MinFloat( areaD - area2, 0.0f );
+		}
+
+		if ( leaf1 && leaf2 )
+		{
+			break;
+		}
+
+		// Can the cost possibly be decreased?
+		if ( bestCost <= lowerCost1 && bestCost <= lowerCost2 )
+		{
+			break;
+		}
+
+		if ( lowerCost1 == lowerCost2 && leaf1 == false )
+		{
+			B2_ASSERT( lowerCost1 < FLT_MAX );
+			B2_ASSERT( lowerCost2 < FLT_MAX );
+
+			// No clear choice based on lower bound surface area. This can happen when both
+			// children fully contain D. Fall back to node distance.
+			b2Vec2 d1 = b2Sub( b2AABB_Center( box1 ), centerD );
+			b2Vec2 d2 = b2Sub( b2AABB_Center( box2 ), centerD );
+			lowerCost1 = b2LengthSquared( d1 );
+			lowerCost2 = b2LengthSquared( d2 );
+		}
+
+		// Descend
+		if ( lowerCost1 < lowerCost2 && leaf1 == false )
+		{
+			index = child1;
+			areaBase = area1;
+			directCost = directCost1;
+		}
+		else
+		{
+			index = child2;
+			areaBase = area2;
+			directCost = directCost2;
+		}
+
+		B2_ASSERT( nodes[index].height > 0 );
+	}
+
+	return bestSibling;
+}
+
+enum b2RotateType
+{
+	b2_rotateNone,
+	b2_rotateBF,
+	b2_rotateBG,
+	b2_rotateCD,
+	b2_rotateCE
+};
+
+// Perform a left or right rotation if node A is imbalanced.
+// Returns the new root index.
+static void b2RotateNodes( b2DynamicTree* tree, int iA )
+{
+	B2_ASSERT( iA != B2_NULL_INDEX );
+
+	b2TreeNode* nodes = tree->nodes;
+
+	b2TreeNode* A = nodes + iA;
+	if ( A->height < 2 )
+	{
+		return;
+	}
+
+	int iB = A->children.child1;
+	int iC = A->children.child2;
+	B2_ASSERT( 0 <= iB && iB < tree->nodeCapacity );
+	B2_ASSERT( 0 <= iC && iC < tree->nodeCapacity );
+
+	b2TreeNode* B = nodes + iB;
+	b2TreeNode* C = nodes + iC;
+
+	if ( B->height == 0 )
+	{
+		// B is a leaf and C is internal
+		B2_ASSERT( C->height > 0 );
+
+		int iF = C->children.child1;
+		int iG = C->children.child2;
+		b2TreeNode* F = nodes + iF;
+		b2TreeNode* G = nodes + iG;
+		B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity );
+		B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity );
+
+		// Base cost
+		float costBase = b2Perimeter( C->aabb );
+
+		// Cost of swapping B and F
+		b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb );
+		float costBF = b2Perimeter( aabbBG );
+
+		// Cost of swapping B and G
+		b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb );
+		float costBG = b2Perimeter( aabbBF );
+
+		if ( costBase < costBF && costBase < costBG )
+		{
+			// Rotation does not improve cost
+			return;
+		}
+
+		if ( costBF < costBG )
+		{
+			// Swap B and F
+			A->children.child1 = iF;
+			C->children.child1 = iB;
+
+			B->parent = iC;
+			F->parent = iA;
+
+			C->aabb = aabbBG;
+
+			C->height = 1 + b2MaxUInt16( B->height, G->height );
+			A->height = 1 + b2MaxUInt16( C->height, F->height );
+			C->categoryBits = B->categoryBits | G->categoryBits;
+			A->categoryBits = C->categoryBits | F->categoryBits;
+			C->flags |= ( B->flags | G->flags ) & b2_enlargedNode;
+			A->flags |= ( C->flags | F->flags ) & b2_enlargedNode;
+		}
+		else
+		{
+			// Swap B and G
+			A->children.child1 = iG;
+			C->children.child2 = iB;
+
+			B->parent = iC;
+			G->parent = iA;
+
+			C->aabb = aabbBF;
+
+			C->height = 1 + b2MaxUInt16( B->height, F->height );
+			A->height = 1 + b2MaxUInt16( C->height, G->height );
+			C->categoryBits = B->categoryBits | F->categoryBits;
+			A->categoryBits = C->categoryBits | G->categoryBits;
+			C->flags |= ( B->flags | F->flags ) & b2_enlargedNode;
+			A->flags |= ( C->flags | G->flags ) & b2_enlargedNode;
+		}
+	}
+	else if ( C->height == 0 )
+	{
+		// C is a leaf and B is internal
+		B2_ASSERT( B->height > 0 );
+
+		int iD = B->children.child1;
+		int iE = B->children.child2;
+		b2TreeNode* D = nodes + iD;
+		b2TreeNode* E = nodes + iE;
+		B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity );
+		B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity );
+
+		// Base cost
+		float costBase = b2Perimeter( B->aabb );
+
+		// Cost of swapping C and D
+		b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb );
+		float costCD = b2Perimeter( aabbCE );
+
+		// Cost of swapping C and E
+		b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb );
+		float costCE = b2Perimeter( aabbCD );
+
+		if ( costBase < costCD && costBase < costCE )
+		{
+			// Rotation does not improve cost
+			return;
+		}
+
+		if ( costCD < costCE )
+		{
+			// Swap C and D
+			A->children.child2 = iD;
+			B->children.child1 = iC;
+
+			C->parent = iB;
+			D->parent = iA;
+
+			B->aabb = aabbCE;
+
+			B->height = 1 + b2MaxUInt16( C->height, E->height );
+			A->height = 1 + b2MaxUInt16( B->height, D->height );
+			B->categoryBits = C->categoryBits | E->categoryBits;
+			A->categoryBits = B->categoryBits | D->categoryBits;
+			B->flags |= ( C->flags | E->flags ) & b2_enlargedNode;
+			A->flags |= ( B->flags | D->flags ) & b2_enlargedNode;
+		}
+		else
+		{
+			// Swap C and E
+			A->children.child2 = iE;
+			B->children.child2 = iC;
+
+			C->parent = iB;
+			E->parent = iA;
+
+			B->aabb = aabbCD;
+			B->height = 1 + b2MaxUInt16( C->height, D->height );
+			A->height = 1 + b2MaxUInt16( B->height, E->height );
+			B->categoryBits = C->categoryBits | D->categoryBits;
+			A->categoryBits = B->categoryBits | E->categoryBits;
+			B->flags |= ( C->flags | D->flags ) & b2_enlargedNode;
+			A->flags |= ( B->flags | E->flags ) & b2_enlargedNode;
+		}
+	}
+	else
+	{
+		int iD = B->children.child1;
+		int iE = B->children.child2;
+		int iF = C->children.child1;
+		int iG = C->children.child2;
+
+		b2TreeNode* D = nodes + iD;
+		b2TreeNode* E = nodes + iE;
+		b2TreeNode* F = nodes + iF;
+		b2TreeNode* G = nodes + iG;
+
+		B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity );
+		B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity );
+		B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity );
+		B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity );
+
+		// Base cost
+		float areaB = b2Perimeter( B->aabb );
+		float areaC = b2Perimeter( C->aabb );
+		float costBase = areaB + areaC;
+		enum b2RotateType bestRotation = b2_rotateNone;
+		float bestCost = costBase;
+
+		// Cost of swapping B and F
+		b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb );
+		float costBF = areaB + b2Perimeter( aabbBG );
+		if ( costBF < bestCost )
+		{
+			bestRotation = b2_rotateBF;
+			bestCost = costBF;
+		}
+
+		// Cost of swapping B and G
+		b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb );
+		float costBG = areaB + b2Perimeter( aabbBF );
+		if ( costBG < bestCost )
+		{
+			bestRotation = b2_rotateBG;
+			bestCost = costBG;
+		}
+
+		// Cost of swapping C and D
+		b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb );
+		float costCD = areaC + b2Perimeter( aabbCE );
+		if ( costCD < bestCost )
+		{
+			bestRotation = b2_rotateCD;
+			bestCost = costCD;
+		}
+
+		// Cost of swapping C and E
+		b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb );
+		float costCE = areaC + b2Perimeter( aabbCD );
+		if ( costCE < bestCost )
+		{
+			bestRotation = b2_rotateCE;
+			// bestCost = costCE;
+		}
+
+		switch ( bestRotation )
+		{
+			case b2_rotateNone:
+				break;
+
+			case b2_rotateBF:
+				A->children.child1 = iF;
+				C->children.child1 = iB;
+
+				B->parent = iC;
+				F->parent = iA;
+
+				C->aabb = aabbBG;
+				C->height = 1 + b2MaxUInt16( B->height, G->height );
+				A->height = 1 + b2MaxUInt16( C->height, F->height );
+				C->categoryBits = B->categoryBits | G->categoryBits;
+				A->categoryBits = C->categoryBits | F->categoryBits;
+				C->flags |= ( B->flags | G->flags ) & b2_enlargedNode;
+				A->flags |= ( C->flags | F->flags ) & b2_enlargedNode;
+				break;
+
+			case b2_rotateBG:
+				A->children.child1 = iG;
+				C->children.child2 = iB;
+
+				B->parent = iC;
+				G->parent = iA;
+
+				C->aabb = aabbBF;
+				C->height = 1 + b2MaxUInt16( B->height, F->height );
+				A->height = 1 + b2MaxUInt16( C->height, G->height );
+				C->categoryBits = B->categoryBits | F->categoryBits;
+				A->categoryBits = C->categoryBits | G->categoryBits;
+				C->flags |= ( B->flags | F->flags ) & b2_enlargedNode;
+				A->flags |= ( C->flags | G->flags ) & b2_enlargedNode;
+				break;
+
+			case b2_rotateCD:
+				A->children.child2 = iD;
+				B->children.child1 = iC;
+
+				C->parent = iB;
+				D->parent = iA;
+
+				B->aabb = aabbCE;
+				B->height = 1 + b2MaxUInt16( C->height, E->height );
+				A->height = 1 + b2MaxUInt16( B->height, D->height );
+				B->categoryBits = C->categoryBits | E->categoryBits;
+				A->categoryBits = B->categoryBits | D->categoryBits;
+				B->flags |= ( C->flags | E->flags ) & b2_enlargedNode;
+				A->flags |= ( B->flags | D->flags ) & b2_enlargedNode;
+				break;
+
+			case b2_rotateCE:
+				A->children.child2 = iE;
+				B->children.child2 = iC;
+
+				C->parent = iB;
+				E->parent = iA;
+
+				B->aabb = aabbCD;
+				B->height = 1 + b2MaxUInt16( C->height, D->height );
+				A->height = 1 + b2MaxUInt16( B->height, E->height );
+				B->categoryBits = C->categoryBits | D->categoryBits;
+				A->categoryBits = B->categoryBits | E->categoryBits;
+				B->flags |= ( C->flags | D->flags ) & b2_enlargedNode;
+				A->flags |= ( B->flags | E->flags ) & b2_enlargedNode;
+				break;
+
+			default:
+				B2_ASSERT( false );
+				break;
+		}
+	}
+}
+
+static void b2InsertLeaf( b2DynamicTree* tree, int leaf, bool shouldRotate )
+{
+	if ( tree->root == B2_NULL_INDEX )
+	{
+		tree->root = leaf;
+		tree->nodes[tree->root].parent = B2_NULL_INDEX;
+		return;
+	}
+
+	// Stage 1: find the best sibling for this node
+	b2AABB leafAABB = tree->nodes[leaf].aabb;
+	int sibling = b2FindBestSibling( tree, leafAABB );
+
+	// Stage 2: create a new parent for the leaf and sibling
+	int oldParent = tree->nodes[sibling].parent;
+	int newParent = b2AllocateNode( tree );
+
+	// warning: node pointer can change after allocation
+	b2TreeNode* nodes = tree->nodes;
+	nodes[newParent].parent = oldParent;
+	nodes[newParent].userData = UINT64_MAX;
+	nodes[newParent].aabb = b2AABB_Union( leafAABB, nodes[sibling].aabb );
+	nodes[newParent].categoryBits = nodes[leaf].categoryBits | nodes[sibling].categoryBits;
+	nodes[newParent].height = nodes[sibling].height + 1;
+
+	if ( oldParent != B2_NULL_INDEX )
+	{
+		// The sibling was not the root.
+		if ( nodes[oldParent].children.child1 == sibling )
+		{
+			nodes[oldParent].children.child1 = newParent;
+		}
+		else
+		{
+			nodes[oldParent].children.child2 = newParent;
+		}
+
+		nodes[newParent].children.child1 = sibling;
+		nodes[newParent].children.child2 = leaf;
+		nodes[sibling].parent = newParent;
+		nodes[leaf].parent = newParent;
+	}
+	else
+	{
+		// The sibling was the root.
+		nodes[newParent].children.child1 = sibling;
+		nodes[newParent].children.child2 = leaf;
+		nodes[sibling].parent = newParent;
+		nodes[leaf].parent = newParent;
+		tree->root = newParent;
+	}
+
+	// Stage 3: walk back up the tree fixing heights and AABBs
+	int index = nodes[leaf].parent;
+	while ( index != B2_NULL_INDEX )
+	{
+		int child1 = nodes[index].children.child1;
+		int child2 = nodes[index].children.child2;
+
+		B2_ASSERT( child1 != B2_NULL_INDEX );
+		B2_ASSERT( child2 != B2_NULL_INDEX );
+
+		nodes[index].aabb = b2AABB_Union( nodes[child1].aabb, nodes[child2].aabb );
+		nodes[index].categoryBits = nodes[child1].categoryBits | nodes[child2].categoryBits;
+		nodes[index].height = 1 + b2MaxUInt16( nodes[child1].height, nodes[child2].height );
+		nodes[index].flags |= ( nodes[child1].flags | nodes[child2].flags ) & b2_enlargedNode;
+
+		if ( shouldRotate )
+		{
+			b2RotateNodes( tree, index );
+		}
+
+		index = nodes[index].parent;
+	}
+}
+
+static void b2RemoveLeaf( b2DynamicTree* tree, int leaf )
+{
+	if ( leaf == tree->root )
+	{
+		tree->root = B2_NULL_INDEX;
+		return;
+	}
+
+	b2TreeNode* nodes = tree->nodes;
+
+	int parent = nodes[leaf].parent;
+	int grandParent = nodes[parent].parent;
+	int sibling;
+	if ( nodes[parent].children.child1 == leaf )
+	{
+		sibling = nodes[parent].children.child2;
+	}
+	else
+	{
+		sibling = nodes[parent].children.child1;
+	}
+
+	if ( grandParent != B2_NULL_INDEX )
+	{
+		// Destroy parent and connect sibling to grandParent.
+		if ( nodes[grandParent].children.child1 == parent )
+		{
+			nodes[grandParent].children.child1 = sibling;
+		}
+		else
+		{
+			nodes[grandParent].children.child2 = sibling;
+		}
+		nodes[sibling].parent = grandParent;
+		b2FreeNode( tree, parent );
+
+		// Adjust ancestor bounds.
+		int index = grandParent;
+		while ( index != B2_NULL_INDEX )
+		{
+			b2TreeNode* node = nodes + index;
+			b2TreeNode* child1 = nodes + node->children.child1;
+			b2TreeNode* child2 = nodes + node->children.child2;
+
+			// Fast union using SSE
+			//__m128 aabb1 = _mm_load_ps(&child1->aabb.lowerBound.x);
+			//__m128 aabb2 = _mm_load_ps(&child2->aabb.lowerBound.x);
+			//__m128 lower = _mm_min_ps(aabb1, aabb2);
+			//__m128 upper = _mm_max_ps(aabb1, aabb2);
+			//__m128 aabb = _mm_shuffle_ps(lower, upper, _MM_SHUFFLE(3, 2, 1, 0));
+			//_mm_store_ps(&node->aabb.lowerBound.x, aabb);
+
+			node->aabb = b2AABB_Union( child1->aabb, child2->aabb );
+			node->categoryBits = child1->categoryBits | child2->categoryBits;
+			node->height = 1 + b2MaxUInt16( child1->height, child2->height );
+
+			index = node->parent;
+		}
+	}
+	else
+	{
+		tree->root = sibling;
+		tree->nodes[sibling].parent = B2_NULL_INDEX;
+		b2FreeNode( tree, parent );
+	}
+}
+
+// Create a proxy in the tree as a leaf node. We return the index of the node instead of a pointer so that we can grow
+// the node pool.
+int b2DynamicTree_CreateProxy( b2DynamicTree* tree, b2AABB aabb, uint64_t categoryBits, uint64_t userData )
+{
+	B2_ASSERT( -B2_HUGE < aabb.lowerBound.x && aabb.lowerBound.x < B2_HUGE );
+	B2_ASSERT( -B2_HUGE < aabb.lowerBound.y && aabb.lowerBound.y < B2_HUGE );
+	B2_ASSERT( -B2_HUGE < aabb.upperBound.x && aabb.upperBound.x < B2_HUGE );
+	B2_ASSERT( -B2_HUGE < aabb.upperBound.y && aabb.upperBound.y < B2_HUGE );
+
+	int proxyId = b2AllocateNode( tree );
+	b2TreeNode* node = tree->nodes + proxyId;
+
+	node->aabb = aabb;
+	node->userData = userData;
+	node->categoryBits = categoryBits;
+	node->height = 0;
+	node->flags = b2_allocatedNode | b2_leafNode;
+
+	bool shouldRotate = true;
+	b2InsertLeaf( tree, proxyId, shouldRotate );
+
+	tree->proxyCount += 1;
+
+	return proxyId;
+}
+
+void b2DynamicTree_DestroyProxy( b2DynamicTree* tree, int proxyId )
+{
+	B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity );
+	B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) );
+
+	b2RemoveLeaf( tree, proxyId );
+	b2FreeNode( tree, proxyId );
+
+	B2_ASSERT( tree->proxyCount > 0 );
+	tree->proxyCount -= 1;
+}
+
+int b2DynamicTree_GetProxyCount( const b2DynamicTree* tree )
+{
+	return tree->proxyCount;
+}
+
+void b2DynamicTree_MoveProxy( b2DynamicTree* tree, int proxyId, b2AABB aabb )
+{
+	B2_ASSERT( b2IsValidAABB( aabb ) );
+	B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < B2_HUGE );
+	B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < B2_HUGE );
+	B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity );
+	B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) );
+
+	b2RemoveLeaf( tree, proxyId );
+
+	tree->nodes[proxyId].aabb = aabb;
+
+	bool shouldRotate = false;
+	b2InsertLeaf( tree, proxyId, shouldRotate );
+}
+
+void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int proxyId, b2AABB aabb )
+{
+	b2TreeNode* nodes = tree->nodes;
+
+	B2_ASSERT( b2IsValidAABB( aabb ) );
+	B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < B2_HUGE );
+	B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < B2_HUGE );
+	B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity );
+	B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) );
+
+	// Caller must ensure this
+	B2_ASSERT( b2AABB_Contains( nodes[proxyId].aabb, aabb ) == false );
+
+	nodes[proxyId].aabb = aabb;
+
+	int parentIndex = nodes[proxyId].parent;
+	while (parentIndex != B2_NULL_INDEX)
+	{
+		bool changed = b2EnlargeAABB( &nodes[parentIndex].aabb, aabb );
+		nodes[parentIndex].flags |= b2_enlargedNode;
+		parentIndex = nodes[parentIndex].parent;
+
+		if (changed == false)
+		{
+			break;
+		}
+	}
+
+	while (parentIndex != B2_NULL_INDEX)
+	{
+		if (nodes[parentIndex].flags & b2_enlargedNode)
+		{
+			// early out because this ancestor was previously ascended and marked as enlarged
+			break;
+		}
+
+		nodes[parentIndex].flags |= b2_enlargedNode;
+		parentIndex = nodes[parentIndex].parent;
+	}
+}
+
+void b2DynamicTree_SetCategoryBits( b2DynamicTree* tree, int proxyId, uint64_t categoryBits )
+{
+	b2TreeNode* nodes = tree->nodes;
+
+	B2_ASSERT( nodes[proxyId].children.child1 == B2_NULL_INDEX );
+	B2_ASSERT( nodes[proxyId].children.child2 == B2_NULL_INDEX );
+	B2_ASSERT( (nodes[proxyId].flags & b2_leafNode) == b2_leafNode );
+
+	nodes[proxyId].categoryBits = categoryBits;
+
+	// Fix up category bits in ancestor internal nodes
+	int nodeIndex = nodes[proxyId].parent;
+	while ( nodeIndex != B2_NULL_INDEX )
+	{
+		b2TreeNode* node = nodes + nodeIndex;
+		int child1 = node->children.child1;
+		B2_ASSERT( child1 != B2_NULL_INDEX );
+		int child2 = node->children.child2;
+		B2_ASSERT( child2 != B2_NULL_INDEX );
+		node->categoryBits = nodes[child1].categoryBits | nodes[child2].categoryBits;
+
+		nodeIndex = node->parent;
+	}
+}
+
+uint64_t b2DynamicTree_GetCategoryBits( b2DynamicTree* tree, int proxyId )
+{
+	B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity );
+	return tree->nodes[proxyId].categoryBits;
+}
+
+int b2DynamicTree_GetHeight( const b2DynamicTree* tree )
+{
+	if ( tree->root == B2_NULL_INDEX )
+	{
+		return 0;
+	}
+
+	return tree->nodes[tree->root].height;
+}
+
+float b2DynamicTree_GetAreaRatio( const b2DynamicTree* tree )
+{
+	if ( tree->root == B2_NULL_INDEX )
+	{
+		return 0.0f;
+	}
+
+	const b2TreeNode* root = tree->nodes + tree->root;
+	float rootArea = b2Perimeter( root->aabb );
+
+	float totalArea = 0.0f;
+	for ( int i = 0; i < tree->nodeCapacity; ++i )
+	{
+		const b2TreeNode* node = tree->nodes + i;
+		if ( b2IsAllocated(node) == false || b2IsLeaf( node ) || i == tree->root )
+		{
+			continue;
+		}
+
+		totalArea += b2Perimeter( node->aabb );
+	}
+
+	return totalArea / rootArea;
+}
+
+b2AABB b2DynamicTree_GetRootBounds( const b2DynamicTree* tree )
+{
+	if (tree->root != B2_NULL_INDEX)
+	{
+		return tree->nodes[tree->root].aabb;
+	}
+
+	b2AABB empty = { b2Vec2_zero, b2Vec2_zero };
+	return empty;
+}
+
+#if B2_VALIDATE
+// Compute the height of a sub-tree.
+static int b2ComputeHeight( const b2DynamicTree* tree, int nodeId )
+{
+	B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity );
+	b2TreeNode* node = tree->nodes + nodeId;
+
+	if ( b2IsLeaf( node ) )
+	{
+		return 0;
+	}
+
+	int height1 = b2ComputeHeight( tree, node->children.child1 );
+	int height2 = b2ComputeHeight( tree, node->children.child2 );
+	return 1 + b2MaxInt( height1, height2 );
+}
+
+static void b2ValidateStructure( const b2DynamicTree* tree, int index )
+{
+	if ( index == B2_NULL_INDEX )
+	{
+		return;
+	}
+
+	if ( index == tree->root )
+	{
+		B2_ASSERT( tree->nodes[index].parent == B2_NULL_INDEX );
+	}
+
+	const b2TreeNode* node = tree->nodes + index;
+
+	B2_ASSERT( node->flags == 0 || ( node->flags & b2_allocatedNode ) != 0 );
+
+	if ( b2IsLeaf( node ) )
+	{
+		B2_ASSERT( node->height == 0 );
+		return;
+	}
+
+	int child1 = node->children.child1;
+	int child2 = node->children.child2;
+
+	B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity );
+	B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity );
+
+	B2_ASSERT( tree->nodes[child1].parent == index );
+	B2_ASSERT( tree->nodes[child2].parent == index );
+
+	if ( ( tree->nodes[child1].flags | tree->nodes[child2].flags ) & b2_enlargedNode )
+	{
+		B2_ASSERT( node->flags & b2_enlargedNode );
+	}
+
+	b2ValidateStructure( tree, child1 );
+	b2ValidateStructure( tree, child2 );
+}
+
+static void b2ValidateMetrics( const b2DynamicTree* tree, int index )
+{
+	if ( index == B2_NULL_INDEX )
+	{
+		return;
+	}
+
+	const b2TreeNode* node = tree->nodes + index;
+
+	if ( b2IsLeaf( node ) )
+	{
+		B2_ASSERT( node->height == 0 );
+		return;
+	}
+
+	int child1 = node->children.child1;
+	int child2 = node->children.child2;
+
+	B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity );
+	B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity );
+
+	int height1 = tree->nodes[child1].height;
+	int height2 = tree->nodes[child2].height;
+	int height = 1 + b2MaxInt( height1, height2 );
+	B2_ASSERT( node->height == height );
+
+	// b2AABB aabb = b2AABB_Union(tree->nodes[child1].aabb, tree->nodes[child2].aabb);
+
+	B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child1].aabb ) );
+	B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child2].aabb ) );
+
+	// B2_ASSERT(aabb.lowerBound.x == node->aabb.lowerBound.x);
+	// B2_ASSERT(aabb.lowerBound.y == node->aabb.lowerBound.y);
+	// B2_ASSERT(aabb.upperBound.x == node->aabb.upperBound.x);
+	// B2_ASSERT(aabb.upperBound.y == node->aabb.upperBound.y);
+
+	uint64_t categoryBits = tree->nodes[child1].categoryBits | tree->nodes[child2].categoryBits;
+	B2_ASSERT( node->categoryBits == categoryBits );
+
+	b2ValidateMetrics( tree, child1 );
+	b2ValidateMetrics( tree, child2 );
+}
+#endif
+
+void b2DynamicTree_Validate( const b2DynamicTree* tree )
+{
+#if B2_VALIDATE
+	if ( tree->root == B2_NULL_INDEX )
+	{
+		return;
+	}
+
+	b2ValidateStructure( tree, tree->root );
+	b2ValidateMetrics( tree, tree->root );
+
+	int freeCount = 0;
+	int freeIndex = tree->freeList;
+	while ( freeIndex != B2_NULL_INDEX )
+	{
+		B2_ASSERT( 0 <= freeIndex && freeIndex < tree->nodeCapacity );
+		freeIndex = tree->nodes[freeIndex].next;
+		++freeCount;
+	}
+
+	int height = b2DynamicTree_GetHeight( tree );
+	int computedHeight = b2ComputeHeight( tree, tree->root );
+	B2_ASSERT( height == computedHeight );
+
+	B2_ASSERT( tree->nodeCount + freeCount == tree->nodeCapacity );
+#else
+	B2_UNUSED( tree );
+#endif
+}
+
+void b2DynamicTree_ValidateNoEnlarged(const b2DynamicTree* tree)
+{
+#if B2_VALIDATE == 1
+	int capacity = tree->nodeCapacity;
+	const b2TreeNode* nodes = tree->nodes;
+	for ( int i = 0; i < capacity; ++i )
+	{
+		const b2TreeNode* node = nodes + i;
+		if ( node->flags & b2_allocatedNode )
+		{
+			B2_ASSERT( ( node->flags & b2_enlargedNode ) == 0 );
+		}
+	}
+#else
+	B2_UNUSED( tree );
+#endif
+}
+
+int b2DynamicTree_GetByteCount( const b2DynamicTree* tree )
+{
+	size_t size = sizeof( b2DynamicTree ) + sizeof( b2TreeNode ) * tree->nodeCapacity +
+				  tree->rebuildCapacity * ( sizeof( int ) + sizeof( b2AABB ) + sizeof( b2Vec2 ) + sizeof( int ) );
+
+	return (int)size;
+}
+
+uint64_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int proxyId )
+{
+	B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity );
+	return tree->nodes[proxyId].userData;
+}
+
+b2AABB b2DynamicTree_GetAABB( const b2DynamicTree* tree, int proxyId )
+{
+	B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity );
+	return tree->nodes[proxyId].aabb;
+}
+
+b2TreeStats b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback,
+								 void* context )
+{
+	b2TreeStats result = { 0 };
+
+	if ( tree->nodeCount == 0 )
+	{
+		return result;
+	}
+
+	int stack[B2_TREE_STACK_SIZE];
+	int stackCount = 0;
+	stack[stackCount++] = tree->root;
+
+	while ( stackCount > 0 )
+	{
+		int nodeId = stack[--stackCount];
+		if ( nodeId == B2_NULL_INDEX )
+		{
+			// todo huh?
+			B2_ASSERT( false );
+			continue;
+		}
+
+		const b2TreeNode* node = tree->nodes + nodeId;
+		result.nodeVisits += 1;
+
+		if ( b2AABB_Overlaps( node->aabb, aabb ) && ( node->categoryBits & maskBits ) != 0 )
+		{
+			if ( b2IsLeaf( node ) )
+			{
+				// callback to user code with proxy id
+				bool proceed = callback( nodeId, node->userData, context );
+				result.leafVisits += 1;
+
+				if ( proceed == false )
+				{
+					return result;
+				}
+			}
+			else
+			{
+				if ( stackCount < B2_TREE_STACK_SIZE - 1 )
+				{
+					stack[stackCount++] = node->children.child1;
+					stack[stackCount++] = node->children.child2;
+				}
+				else
+				{
+					B2_ASSERT( stackCount < B2_TREE_STACK_SIZE - 1 );
+				}
+			}
+		}
+	}
+
+	return result;
+}
+
+b2TreeStats b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits,
+								   b2TreeRayCastCallbackFcn* callback, void* context )
+{
+	b2TreeStats result = { 0 };
+
+	if ( tree->nodeCount == 0 )
+	{
+		return result;
+	}
+
+	b2Vec2 p1 = input->origin;
+	b2Vec2 d = input->translation;
+
+	b2Vec2 r = b2Normalize( d );
+
+	// v is perpendicular to the segment.
+	b2Vec2 v = b2CrossSV( 1.0f, r );
+	b2Vec2 abs_v = b2Abs( v );
+
+	// Separating axis for segment (Gino, p80).
+	// |dot(v, p1 - c)| > dot(|v|, h)
+
+	float maxFraction = input->maxFraction;
+
+	b2Vec2 p2 = b2MulAdd( p1, maxFraction, d );
+
+	// Build a bounding box for the segment.
+	b2AABB segmentAABB = { b2Min( p1, p2 ), b2Max( p1, p2 ) };
+
+	int stack[B2_TREE_STACK_SIZE];
+	int stackCount = 0;
+	stack[stackCount++] = tree->root;
+
+	const b2TreeNode* nodes = tree->nodes;
+
+	b2RayCastInput subInput = *input;
+
+	while ( stackCount > 0 )
+	{
+		int nodeId = stack[--stackCount];
+		if ( nodeId == B2_NULL_INDEX )
+		{
+			// todo is this possible?
+			B2_ASSERT( false );
+			continue;
+		}
+
+		const b2TreeNode* node = nodes + nodeId;
+		result.nodeVisits += 1;
+
+		b2AABB nodeAABB = node->aabb;
+
+		if ( ( node->categoryBits & maskBits ) == 0 || b2AABB_Overlaps( nodeAABB, segmentAABB ) == false )
+		{
+			continue;
+		}
+
+		// Separating axis for segment (Gino, p80).
+		// |dot(v, p1 - c)| > dot(|v|, h)
+		// radius extension is added to the node in this case
+		b2Vec2 c = b2AABB_Center( nodeAABB );
+		b2Vec2 h = b2AABB_Extents( nodeAABB );
+		float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) );
+		float term2 = b2Dot( abs_v, h );
+		if ( term2 < term1 )
+		{
+			continue;
+		}
+
+		if ( b2IsLeaf( node ) )
+		{
+			subInput.maxFraction = maxFraction;
+
+			float value = callback( &subInput, nodeId, node->userData, context );
+			result.leafVisits += 1;
+
+			// The user may return -1 to indicate this shape should be skipped
+
+			if ( value == 0.0f )
+			{
+				// The client has terminated the ray cast.
+				return result;
+			}
+
+			if ( 0.0f < value && value <= maxFraction )
+			{
+				// Update segment bounding box.
+				maxFraction = value;
+				p2 = b2MulAdd( p1, maxFraction, d );
+				segmentAABB.lowerBound = b2Min( p1, p2 );
+				segmentAABB.upperBound = b2Max( p1, p2 );
+			}
+		}
+		else
+		{
+			if ( stackCount < B2_TREE_STACK_SIZE - 1 )
+			{
+				b2Vec2 c1 = b2AABB_Center( nodes[node->children.child1].aabb );
+				b2Vec2 c2 = b2AABB_Center( nodes[node->children.child2].aabb );
+				if ( b2DistanceSquared( c1, p1 ) < b2DistanceSquared( c2, p1 ) )
+				{
+					stack[stackCount++] = node->children.child2;
+					stack[stackCount++] = node->children.child1;
+				}
+				else
+				{
+					stack[stackCount++] = node->children.child1;
+					stack[stackCount++] = node->children.child2;
+				}
+			}
+			else
+			{
+				B2_ASSERT( stackCount < B2_TREE_STACK_SIZE - 1 );
+			}
+		}
+	}
+
+	return result;
+}
+
+b2TreeStats b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits,
+									 b2TreeShapeCastCallbackFcn* callback, void* context )
+{
+	b2TreeStats stats = { 0 };
+
+	if ( tree->nodeCount == 0 || input->proxy.count == 0 )
+	{
+		return stats;
+	}
+
+	b2AABB originAABB = { input->proxy.points[0], input->proxy.points[0] };
+	for ( int i = 1; i < input->proxy.count; ++i )
+	{
+		originAABB.lowerBound = b2Min( originAABB.lowerBound, input->proxy.points[i] );
+		originAABB.upperBound = b2Max( originAABB.upperBound, input->proxy.points[i] );
+	}
+
+	b2Vec2 radius = { input->proxy.radius, input->proxy.radius };
+
+	originAABB.lowerBound = b2Sub( originAABB.lowerBound, radius );
+	originAABB.upperBound = b2Add( originAABB.upperBound, radius );
+
+	b2Vec2 p1 = b2AABB_Center( originAABB );
+	b2Vec2 extension = b2AABB_Extents( originAABB );
+
+	// v is perpendicular to the segment.
+	b2Vec2 r = input->translation;
+	b2Vec2 v = b2CrossSV( 1.0f, r );
+	b2Vec2 abs_v = b2Abs( v );
+
+	// Separating axis for segment (Gino, p80).
+	// |dot(v, p1 - c)| > dot(|v|, h)
+
+	float maxFraction = input->maxFraction;
+
+	// Build total box for the shape cast
+	b2Vec2 t = b2MulSV( maxFraction, input->translation );
+	b2AABB totalAABB = {
+		b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) ),
+		b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) ),
+	};
+
+	b2ShapeCastInput subInput = *input;
+	const b2TreeNode* nodes = tree->nodes;
+
+	int stack[B2_TREE_STACK_SIZE];
+	int stackCount = 0;
+	stack[stackCount++] = tree->root;
+
+	while ( stackCount > 0 )
+	{
+		int nodeId = stack[--stackCount];
+		if ( nodeId == B2_NULL_INDEX )
+		{
+			// todo is this possible?
+			B2_ASSERT( false );
+			continue;
+		}
+
+		const b2TreeNode* node = nodes + nodeId;
+		stats.nodeVisits += 1;
+
+		if ( ( node->categoryBits & maskBits ) == 0 || b2AABB_Overlaps( node->aabb, totalAABB ) == false )
+		{
+			continue;
+		}
+
+		// Separating axis for segment (Gino, p80).
+		// |dot(v, p1 - c)| > dot(|v|, h)
+		// radius extension is added to the node in this case
+		b2Vec2 c = b2AABB_Center( node->aabb );
+		b2Vec2 h = b2Add( b2AABB_Extents( node->aabb ), extension );
+		float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) );
+		float term2 = b2Dot( abs_v, h );
+		if ( term2 < term1 )
+		{
+			continue;
+		}
+
+		if ( b2IsLeaf( node ) )
+		{
+			subInput.maxFraction = maxFraction;
+
+			float value = callback( &subInput, nodeId, node->userData, context );
+			stats.leafVisits += 1;
+
+			if ( value == 0.0f )
+			{
+				// The client has terminated the ray cast.
+				return stats;
+			}
+
+			if ( 0.0f < value && value < maxFraction )
+			{
+				// Update segment bounding box.
+				maxFraction = value;
+				t = b2MulSV( maxFraction, input->translation );
+				totalAABB.lowerBound = b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) );
+				totalAABB.upperBound = b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) );
+			}
+		}
+		else
+		{
+			if ( stackCount < B2_TREE_STACK_SIZE - 1 )
+			{
+				b2Vec2 c1 = b2AABB_Center( nodes[node->children.child1].aabb );
+				b2Vec2 c2 = b2AABB_Center( nodes[node->children.child2].aabb );
+				if ( b2DistanceSquared( c1, p1 ) < b2DistanceSquared( c2, p1 ) )
+				{
+					stack[stackCount++] = node->children.child2;
+					stack[stackCount++] = node->children.child1;
+				}
+				else
+				{
+					stack[stackCount++] = node->children.child1;
+					stack[stackCount++] = node->children.child2;
+				}
+			}
+			else
+			{
+				B2_ASSERT( stackCount < B2_TREE_STACK_SIZE - 1 );
+			}
+		}
+	}
+
+	return stats;
+}
+
+// Median split == 0, Surface area heuristic == 1
+#define B2_TREE_HEURISTIC 0
+
+#if B2_TREE_HEURISTIC == 0
+
+// Median split heuristic
+static int b2PartitionMid( int* indices, b2Vec2* centers, int count )
+{
+	// Handle trivial case
+	if ( count <= 2 )
+	{
+		return count / 2;
+	}
+
+	b2Vec2 lowerBound = centers[0];
+	b2Vec2 upperBound = centers[0];
+
+	for ( int i = 1; i < count; ++i )
+	{
+		lowerBound = b2Min( lowerBound, centers[i] );
+		upperBound = b2Max( upperBound, centers[i] );
+	}
+
+	b2Vec2 d = b2Sub( upperBound, lowerBound );
+	b2Vec2 c = { 0.5f * ( lowerBound.x + upperBound.x ), 0.5f * ( lowerBound.y + upperBound.y ) };
+
+	// Partition longest axis using the Hoare partition scheme
+	// https://en.wikipedia.org/wiki/Quicksort
+	// https://nicholasvadivelu.com/2021/01/11/array-partition/
+	int i1 = 0, i2 = count;
+	if ( d.x > d.y )
+	{
+		float pivot = c.x;
+
+		while ( i1 < i2 )
+		{
+			while ( i1 < i2 && centers[i1].x < pivot )
+			{
+				i1 += 1;
+			};
+
+			while ( i1 < i2 && centers[i2 - 1].x >= pivot )
+			{
+				i2 -= 1;
+			};
+
+			if ( i1 < i2 )
+			{
+				// Swap indices
+				{
+					int temp = indices[i1];
+					indices[i1] = indices[i2 - 1];
+					indices[i2 - 1] = temp;
+				}
+
+				// Swap centers
+				{
+					b2Vec2 temp = centers[i1];
+					centers[i1] = centers[i2 - 1];
+					centers[i2 - 1] = temp;
+				}
+
+				i1 += 1;
+				i2 -= 1;
+			}
+		}
+	}
+	else
+	{
+		float pivot = c.y;
+
+		while ( i1 < i2 )
+		{
+			while ( i1 < i2 && centers[i1].y < pivot )
+			{
+				i1 += 1;
+			};
+
+			while ( i1 < i2 && centers[i2 - 1].y >= pivot )
+			{
+				i2 -= 1;
+			};
+
+			if ( i1 < i2 )
+			{
+				// Swap indices
+				{
+					int temp = indices[i1];
+					indices[i1] = indices[i2 - 1];
+					indices[i2 - 1] = temp;
+				}
+
+				// Swap centers
+				{
+					b2Vec2 temp = centers[i1];
+					centers[i1] = centers[i2 - 1];
+					centers[i2 - 1] = temp;
+				}
+
+				i1 += 1;
+				i2 -= 1;
+			}
+		}
+	}
+	B2_ASSERT( i1 == i2 );
+
+	if ( i1 > 0 && i1 < count )
+	{
+		return i1;
+	}
+
+	return count / 2;
+}
+
+#else
+
+#define B2_BIN_COUNT 64
+
+typedef struct b2TreeBin
+{
+	b2AABB aabb;
+	int count;
+} b2TreeBin;
+
+typedef struct b2TreePlane
+{
+	b2AABB leftAABB;
+	b2AABB rightAABB;
+	int leftCount;
+	int rightCount;
+} b2TreePlane;
+
+// "On Fast Construction of SAH-based Bounding Volume Hierarchies" by Ingo Wald
+// Returns the left child count
+static int b2PartitionSAH( int* indices, int* binIndices, b2AABB* boxes, int count )
+{
+	B2_ASSERT( count > 0 );
+
+	b2TreeBin bins[B2_BIN_COUNT];
+	b2TreePlane planes[B2_BIN_COUNT - 1];
+
+	b2Vec2 center = b2AABB_Center( boxes[0] );
+	b2AABB centroidAABB;
+	centroidAABB.lowerBound = center;
+	centroidAABB.upperBound = center;
+
+	for ( int i = 1; i < count; ++i )
+	{
+		center = b2AABB_Center( boxes[i] );
+		centroidAABB.lowerBound = b2Min( centroidAABB.lowerBound, center );
+		centroidAABB.upperBound = b2Max( centroidAABB.upperBound, center );
+	}
+
+	b2Vec2 d = b2Sub( centroidAABB.upperBound, centroidAABB.lowerBound );
+
+	// Find longest axis
+	int axisIndex;
+	float invD;
+	if ( d.x > d.y )
+	{
+		axisIndex = 0;
+		invD = d.x;
+	}
+	else
+	{
+		axisIndex = 1;
+		invD = d.y;
+	}
+
+	invD = invD > 0.0f ? 1.0f / invD : 0.0f;
+
+	// Initialize bin bounds and count
+	for ( int i = 0; i < B2_BIN_COUNT; ++i )
+	{
+		bins[i].aabb.lowerBound = ( b2Vec2 ){ FLT_MAX, FLT_MAX };
+		bins[i].aabb.upperBound = ( b2Vec2 ){ -FLT_MAX, -FLT_MAX };
+		bins[i].count = 0;
+	}
+
+	// Assign boxes to bins and compute bin boxes
+	// TODO_ERIN optimize
+	float binCount = B2_BIN_COUNT;
+	float lowerBoundArray[2] = { centroidAABB.lowerBound.x, centroidAABB.lowerBound.y };
+	float minC = lowerBoundArray[axisIndex];
+	for ( int i = 0; i < count; ++i )
+	{
+		b2Vec2 c = b2AABB_Center( boxes[i] );
+		float cArray[2] = { c.x, c.y };
+		int binIndex = (int)( binCount * ( cArray[axisIndex] - minC ) * invD );
+		binIndex = b2ClampInt( binIndex, 0, B2_BIN_COUNT - 1 );
+		binIndices[i] = binIndex;
+		bins[binIndex].count += 1;
+		bins[binIndex].aabb = b2AABB_Union( bins[binIndex].aabb, boxes[i] );
+	}
+
+	int planeCount = B2_BIN_COUNT - 1;
+
+	// Prepare all the left planes, candidates for left child
+	planes[0].leftCount = bins[0].count;
+	planes[0].leftAABB = bins[0].aabb;
+	for ( int i = 1; i < planeCount; ++i )
+	{
+		planes[i].leftCount = planes[i - 1].leftCount + bins[i].count;
+		planes[i].leftAABB = b2AABB_Union( planes[i - 1].leftAABB, bins[i].aabb );
+	}
+
+	// Prepare all the right planes, candidates for right child
+	planes[planeCount - 1].rightCount = bins[planeCount].count;
+	planes[planeCount - 1].rightAABB = bins[planeCount].aabb;
+	for ( int i = planeCount - 2; i >= 0; --i )
+	{
+		planes[i].rightCount = planes[i + 1].rightCount + bins[i + 1].count;
+		planes[i].rightAABB = b2AABB_Union( planes[i + 1].rightAABB, bins[i + 1].aabb );
+	}
+
+	// Find best split to minimize SAH
+	float minCost = FLT_MAX;
+	int bestPlane = 0;
+	for ( int i = 0; i < planeCount; ++i )
+	{
+		float leftArea = b2Perimeter( planes[i].leftAABB );
+		float rightArea = b2Perimeter( planes[i].rightAABB );
+		int leftCount = planes[i].leftCount;
+		int rightCount = planes[i].rightCount;
+
+		float cost = leftCount * leftArea + rightCount * rightArea;
+		if ( cost < minCost )
+		{
+			bestPlane = i;
+			minCost = cost;
+		}
+	}
+
+	// Partition node indices and boxes using the Hoare partition scheme
+	// https://en.wikipedia.org/wiki/Quicksort
+	// https://nicholasvadivelu.com/2021/01/11/array-partition/
+	int i1 = 0, i2 = count;
+	while ( i1 < i2 )
+	{
+		while ( i1 < i2 && binIndices[i1] < bestPlane )
+		{
+			i1 += 1;
+		};
+
+		while ( i1 < i2 && binIndices[i2 - 1] >= bestPlane )
+		{
+			i2 -= 1;
+		};
+
+		if ( i1 < i2 )
+		{
+			// Swap indices
+			{
+				int temp = indices[i1];
+				indices[i1] = indices[i2 - 1];
+				indices[i2 - 1] = temp;
+			}
+
+			// Swap boxes
+			{
+				b2AABB temp = boxes[i1];
+				boxes[i1] = boxes[i2 - 1];
+				boxes[i2 - 1] = temp;
+			}
+
+			i1 += 1;
+			i2 -= 1;
+		}
+	}
+	B2_ASSERT( i1 == i2 );
+
+	if ( i1 > 0 && i1 < count )
+	{
+		return i1;
+	}
+	else
+	{
+		return count / 2;
+	}
+}
+
+#endif
+
+// Temporary data used to track the rebuild of a tree node
+struct b2RebuildItem
+{
+	int nodeIndex;
+	int childCount;
+
+	// Leaf indices
+	int startIndex;
+	int splitIndex;
+	int endIndex;
+};
+
+// Returns root node index
+static int b2BuildTree( b2DynamicTree* tree, int leafCount )
+{
+	b2TreeNode* nodes = tree->nodes;
+	int* leafIndices = tree->leafIndices;
+
+	if ( leafCount == 1 )
+	{
+		nodes[leafIndices[0]].parent = B2_NULL_INDEX;
+		return leafIndices[0];
+	}
+
+#if B2_TREE_HEURISTIC == 0
+	b2Vec2* leafCenters = tree->leafCenters;
+#else
+	b2AABB* leafBoxes = tree->leafBoxes;
+	int* binIndices = tree->binIndices;
+#endif
+
+	// todo large stack item
+	struct b2RebuildItem stack[B2_TREE_STACK_SIZE];
+	int top = 0;
+
+	stack[0].nodeIndex = b2AllocateNode( tree );
+	stack[0].childCount = -1;
+	stack[0].startIndex = 0;
+	stack[0].endIndex = leafCount;
+#if B2_TREE_HEURISTIC == 0
+	stack[0].splitIndex = b2PartitionMid( leafIndices, leafCenters, leafCount );
+#else
+	stack[0].splitIndex = b2PartitionSAH( leafIndices, binIndices, leafBoxes, leafCount );
+#endif
+
+	while ( true )
+	{
+		struct b2RebuildItem* item = stack + top;
+
+		item->childCount += 1;
+
+		if ( item->childCount == 2 )
+		{
+			// This internal node has both children established
+
+			if ( top == 0 )
+			{
+				// all done
+				break;
+			}
+
+			struct b2RebuildItem* parentItem = stack + ( top - 1 );
+			b2TreeNode* parentNode = nodes + parentItem->nodeIndex;
+
+			if ( parentItem->childCount == 0 )
+			{
+				B2_ASSERT( parentNode->children.child1 == B2_NULL_INDEX );
+				parentNode->children.child1 = item->nodeIndex;
+			}
+			else
+			{
+				B2_ASSERT( parentItem->childCount == 1 );
+				B2_ASSERT( parentNode->children.child2 == B2_NULL_INDEX );
+				parentNode->children.child2 = item->nodeIndex;
+			}
+
+			b2TreeNode* node = nodes + item->nodeIndex;
+
+			B2_ASSERT( node->parent == B2_NULL_INDEX );
+			node->parent = parentItem->nodeIndex;
+
+			B2_ASSERT( node->children.child1 != B2_NULL_INDEX );
+			B2_ASSERT( node->children.child2 != B2_NULL_INDEX );
+			b2TreeNode* child1 = nodes + node->children.child1;
+			b2TreeNode* child2 = nodes + node->children.child2;
+
+			node->aabb = b2AABB_Union( child1->aabb, child2->aabb );
+			node->height = 1 + b2MaxUInt16( child1->height, child2->height );
+			node->categoryBits = child1->categoryBits | child2->categoryBits;
+
+			// Pop stack
+			top -= 1;
+		}
+		else
+		{
+			int startIndex, endIndex;
+			if ( item->childCount == 0 )
+			{
+				startIndex = item->startIndex;
+				endIndex = item->splitIndex;
+			}
+			else
+			{
+				B2_ASSERT( item->childCount == 1 );
+				startIndex = item->splitIndex;
+				endIndex = item->endIndex;
+			}
+
+			int count = endIndex - startIndex;
+
+			if ( count == 1 )
+			{
+				int childIndex = leafIndices[startIndex];
+				b2TreeNode* node = nodes + item->nodeIndex;
+
+				if ( item->childCount == 0 )
+				{
+					B2_ASSERT( node->children.child1 == B2_NULL_INDEX );
+					node->children.child1 = childIndex;
+				}
+				else
+				{
+					B2_ASSERT( item->childCount == 1 );
+					B2_ASSERT( node->children.child2 == B2_NULL_INDEX );
+					node->children.child2 = childIndex;
+				}
+
+				b2TreeNode* childNode = nodes + childIndex;
+				B2_ASSERT( childNode->parent == B2_NULL_INDEX );
+				childNode->parent = item->nodeIndex;
+			}
+			else
+			{
+				B2_ASSERT( count > 0 );
+				B2_ASSERT( top < B2_TREE_STACK_SIZE );
+
+				top += 1;
+				struct b2RebuildItem* newItem = stack + top;
+				newItem->nodeIndex = b2AllocateNode( tree );
+				newItem->childCount = -1;
+				newItem->startIndex = startIndex;
+				newItem->endIndex = endIndex;
+#if B2_TREE_HEURISTIC == 0
+				newItem->splitIndex = b2PartitionMid( leafIndices + startIndex, leafCenters + startIndex, count );
+#else
+				newItem->splitIndex =
+					b2PartitionSAH( leafIndices + startIndex, binIndices + startIndex, leafBoxes + startIndex, count );
+#endif
+				newItem->splitIndex += startIndex;
+			}
+		}
+	}
+
+	b2TreeNode* rootNode = nodes + stack[0].nodeIndex;
+	B2_ASSERT( rootNode->parent == B2_NULL_INDEX );
+	B2_ASSERT( rootNode->children.child1 != B2_NULL_INDEX );
+	B2_ASSERT( rootNode->children.child2 != B2_NULL_INDEX );
+
+	b2TreeNode* child1 = nodes + rootNode->children.child1;
+	b2TreeNode* child2 = nodes + rootNode->children.child2;
+
+	rootNode->aabb = b2AABB_Union( child1->aabb, child2->aabb );
+	rootNode->height = 1 + b2MaxUInt16( child1->height, child2->height );
+	rootNode->categoryBits = child1->categoryBits | child2->categoryBits;
+
+	return stack[0].nodeIndex;
+}
+
+// Not safe to access tree during this operation because it may grow
+int b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild )
+{
+	int proxyCount = tree->proxyCount;
+	if ( proxyCount == 0 )
+	{
+		return 0;
+	}
+
+	// Ensure capacity for rebuild space
+	if ( proxyCount > tree->rebuildCapacity )
+	{
+		int newCapacity = proxyCount + proxyCount / 2;
+
+		b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int ) );
+		tree->leafIndices = b2Alloc( newCapacity * sizeof( int ) );
+
+#if B2_TREE_HEURISTIC == 0
+		b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) );
+		tree->leafCenters = b2Alloc( newCapacity * sizeof( b2Vec2 ) );
+#else
+		b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) );
+		tree->leafBoxes = b2Alloc( newCapacity * sizeof( b2AABB ) );
+		b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int ) );
+		tree->binIndices = b2Alloc( newCapacity * sizeof( int ) );
+#endif
+		tree->rebuildCapacity = newCapacity;
+	}
+
+	int leafCount = 0;
+	int stack[B2_TREE_STACK_SIZE];
+	int stackCount = 0;
+
+	int nodeIndex = tree->root;
+	b2TreeNode* nodes = tree->nodes;
+	b2TreeNode* node = nodes + nodeIndex;
+
+	// These are the nodes that get sorted to rebuild the tree.
+	// I'm using indices because the node pool may grow during the build.
+	int* leafIndices = tree->leafIndices;
+
+#if B2_TREE_HEURISTIC == 0
+	b2Vec2* leafCenters = tree->leafCenters;
+#else
+	b2AABB* leafBoxes = tree->leafBoxes;
+#endif
+
+	// Gather all proxy nodes that have grown and all internal nodes that haven't grown. Both are
+	// considered leaves in the tree rebuild.
+	// Free all internal nodes that have grown.
+	// todo use a node growth metric instead of simply enlarged to reduce rebuild size and frequency
+	// this should be weighed against B2_AABB_MARGIN
+	while ( true )
+	{
+		if ( node->height == 0 || ( ( node->flags & b2_enlargedNode ) == 0 && fullBuild == false ) )
+		{
+			leafIndices[leafCount] = nodeIndex;
+#if B2_TREE_HEURISTIC == 0
+			leafCenters[leafCount] = b2AABB_Center( node->aabb );
+#else
+			leafBoxes[leafCount] = node->aabb;
+#endif
+			leafCount += 1;
+
+			// Detach
+			node->parent = B2_NULL_INDEX;
+		}
+		else
+		{
+			int doomedNodeIndex = nodeIndex;
+
+			// Handle children
+			nodeIndex = node->children.child1;
+
+			if ( stackCount < B2_TREE_STACK_SIZE )
+			{
+				stack[stackCount++] = node->children.child2;
+			}
+			else
+			{
+				B2_ASSERT( stackCount < B2_TREE_STACK_SIZE );
+			}
+
+			node = nodes + nodeIndex;
+
+			// Remove doomed node
+			b2FreeNode( tree, doomedNodeIndex );
+
+			continue;
+		}
+
+		if ( stackCount == 0 )
+		{
+			break;
+		}
+
+		nodeIndex = stack[--stackCount];
+		node = nodes + nodeIndex;
+	}
+
+#if B2_VALIDATE == 1
+	int capacity = tree->nodeCapacity;
+	for ( int i = 0; i < capacity; ++i )
+	{
+		if ( nodes[i].flags & b2_allocatedNode )
+		{
+			B2_ASSERT( ( nodes[i].flags & b2_enlargedNode ) == 0 );
+		}
+	}
+#endif
+
+	B2_ASSERT( leafCount <= proxyCount );
+
+	tree->root = b2BuildTree( tree, leafCount );
+
+	b2DynamicTree_Validate( tree );
+
+	return leafCount;
+}
diff --git a/src/vendor/box2d/geometry.c b/src/vendor/box2d/geometry.c
new file mode 100644
index 0000000..a6a5919
--- /dev/null
+++ b/src/vendor/box2d/geometry.c
@@ -0,0 +1,1028 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "constants.h"
+#include "shape.h"
+
+#include "box2d/collision.h"
+#include "box2d/math_functions.h"
+
+#include <float.h>
+#include <stddef.h>
+
+_Static_assert( B2_MAX_POLYGON_VERTICES > 2, "must be 3 or more" );
+
+bool b2IsValidRay( const b2RayCastInput* input )
+{
+	bool isValid = b2IsValidVec2( input->origin ) && b2IsValidVec2( input->translation ) &&
+				   b2IsValidFloat( input->maxFraction ) && 0.0f <= input->maxFraction && input->maxFraction < B2_HUGE;
+	return isValid;
+}
+
+static b2Vec2 b2ComputePolygonCentroid( const b2Vec2* vertices, int count )
+{
+	b2Vec2 center = { 0.0f, 0.0f };
+	float area = 0.0f;
+
+	// Get a reference point for forming triangles.
+	// Use the first vertex to reduce round-off errors.
+	b2Vec2 origin = vertices[0];
+
+	const float inv3 = 1.0f / 3.0f;
+
+	for ( int i = 1; i < count - 1; ++i )
+	{
+		// Triangle edges
+		b2Vec2 e1 = b2Sub( vertices[i], origin );
+		b2Vec2 e2 = b2Sub( vertices[i + 1], origin );
+		float a = 0.5f * b2Cross( e1, e2 );
+
+		// Area weighted centroid
+		center = b2MulAdd( center, a * inv3, b2Add( e1, e2 ) );
+		area += a;
+	}
+
+	B2_ASSERT( area > FLT_EPSILON );
+	float invArea = 1.0f / area;
+	center.x *= invArea;
+	center.y *= invArea;
+
+	// Restore offset
+	center = b2Add( origin, center );
+
+	return center;
+}
+
+b2Polygon b2MakePolygon( const b2Hull* hull, float radius )
+{
+	B2_ASSERT( b2ValidateHull( hull ) );
+
+	if ( hull->count < 3 )
+	{
+		// Handle a bad hull when assertions are disabled
+		return b2MakeSquare( 0.5f );
+	}
+
+	b2Polygon shape = { 0 };
+	shape.count = hull->count;
+	shape.radius = radius;
+
+	// Copy vertices
+	for ( int i = 0; i < shape.count; ++i )
+	{
+		shape.vertices[i] = hull->points[i];
+	}
+
+	// Compute normals. Ensure the edges have non-zero length.
+	for ( int i = 0; i < shape.count; ++i )
+	{
+		int i1 = i;
+		int i2 = i + 1 < shape.count ? i + 1 : 0;
+		b2Vec2 edge = b2Sub( shape.vertices[i2], shape.vertices[i1] );
+		B2_ASSERT( b2Dot( edge, edge ) > FLT_EPSILON * FLT_EPSILON );
+		shape.normals[i] = b2Normalize( b2CrossVS( edge, 1.0f ) );
+	}
+
+	shape.centroid = b2ComputePolygonCentroid( shape.vertices, shape.count );
+
+	return shape;
+}
+
+b2Polygon b2MakeOffsetPolygon( const b2Hull* hull, b2Vec2 position, b2Rot rotation )
+{
+	return b2MakeOffsetRoundedPolygon( hull, position, rotation, 0.0f );
+}
+
+b2Polygon b2MakeOffsetRoundedPolygon( const b2Hull* hull, b2Vec2 position, b2Rot rotation, float radius )
+{
+	B2_ASSERT( b2ValidateHull( hull ) );
+
+	if ( hull->count < 3 )
+	{
+		// Handle a bad hull when assertions are disabled
+		return b2MakeSquare( 0.5f );
+	}
+
+	b2Transform transform = { position, rotation };
+
+	b2Polygon shape = { 0 };
+	shape.count = hull->count;
+	shape.radius = radius;
+
+	// Copy vertices
+	for ( int i = 0; i < shape.count; ++i )
+	{
+		shape.vertices[i] = b2TransformPoint( transform, hull->points[i] );
+	}
+
+	// Compute normals. Ensure the edges have non-zero length.
+	for ( int i = 0; i < shape.count; ++i )
+	{
+		int i1 = i;
+		int i2 = i + 1 < shape.count ? i + 1 : 0;
+		b2Vec2 edge = b2Sub( shape.vertices[i2], shape.vertices[i1] );
+		B2_ASSERT( b2Dot( edge, edge ) > FLT_EPSILON * FLT_EPSILON );
+		shape.normals[i] = b2Normalize( b2CrossVS( edge, 1.0f ) );
+	}
+
+	shape.centroid = b2ComputePolygonCentroid( shape.vertices, shape.count );
+
+	return shape;
+}
+
+b2Polygon b2MakeSquare( float halfWidth )
+{
+	return b2MakeBox( halfWidth, halfWidth );
+}
+
+b2Polygon b2MakeBox( float halfWidth, float halfHeight )
+{
+	B2_ASSERT( b2IsValidFloat( halfWidth ) && halfWidth > 0.0f );
+	B2_ASSERT( b2IsValidFloat( halfHeight ) && halfHeight > 0.0f );
+
+	b2Polygon shape = { 0 };
+	shape.count = 4;
+	shape.vertices[0] = (b2Vec2){ -halfWidth, -halfHeight };
+	shape.vertices[1] = (b2Vec2){ halfWidth, -halfHeight };
+	shape.vertices[2] = (b2Vec2){ halfWidth, halfHeight };
+	shape.vertices[3] = (b2Vec2){ -halfWidth, halfHeight };
+	shape.normals[0] = (b2Vec2){ 0.0f, -1.0f };
+	shape.normals[1] = (b2Vec2){ 1.0f, 0.0f };
+	shape.normals[2] = (b2Vec2){ 0.0f, 1.0f };
+	shape.normals[3] = (b2Vec2){ -1.0f, 0.0f };
+	shape.radius = 0.0f;
+	shape.centroid = b2Vec2_zero;
+	return shape;
+}
+
+b2Polygon b2MakeRoundedBox( float halfWidth, float halfHeight, float radius )
+{
+	B2_ASSERT( b2IsValidFloat( radius ) && radius >= 0.0f );
+	b2Polygon shape = b2MakeBox( halfWidth, halfHeight );
+	shape.radius = radius;
+	return shape;
+}
+
+b2Polygon b2MakeOffsetBox( float halfWidth, float halfHeight, b2Vec2 center, b2Rot rotation )
+{
+	b2Transform xf = { center, rotation };
+
+	b2Polygon shape = { 0 };
+	shape.count = 4;
+	shape.vertices[0] = b2TransformPoint( xf, (b2Vec2){ -halfWidth, -halfHeight } );
+	shape.vertices[1] = b2TransformPoint( xf, (b2Vec2){ halfWidth, -halfHeight } );
+	shape.vertices[2] = b2TransformPoint( xf, (b2Vec2){ halfWidth, halfHeight } );
+	shape.vertices[3] = b2TransformPoint( xf, (b2Vec2){ -halfWidth, halfHeight } );
+	shape.normals[0] = b2RotateVector( xf.q, (b2Vec2){ 0.0f, -1.0f } );
+	shape.normals[1] = b2RotateVector( xf.q, (b2Vec2){ 1.0f, 0.0f } );
+	shape.normals[2] = b2RotateVector( xf.q, (b2Vec2){ 0.0f, 1.0f } );
+	shape.normals[3] = b2RotateVector( xf.q, (b2Vec2){ -1.0f, 0.0f } );
+	shape.radius = 0.0f;
+	shape.centroid = xf.p;
+	return shape;
+}
+
+b2Polygon b2MakeOffsetRoundedBox( float halfWidth, float halfHeight, b2Vec2 center, b2Rot rotation, float radius )
+{
+	B2_ASSERT( b2IsValidFloat( radius ) && radius >= 0.0f );
+	b2Transform xf = { center, rotation };
+
+	b2Polygon shape = { 0 };
+	shape.count = 4;
+	shape.vertices[0] = b2TransformPoint( xf, (b2Vec2){ -halfWidth, -halfHeight } );
+	shape.vertices[1] = b2TransformPoint( xf, (b2Vec2){ halfWidth, -halfHeight } );
+	shape.vertices[2] = b2TransformPoint( xf, (b2Vec2){ halfWidth, halfHeight } );
+	shape.vertices[3] = b2TransformPoint( xf, (b2Vec2){ -halfWidth, halfHeight } );
+	shape.normals[0] = b2RotateVector( xf.q, (b2Vec2){ 0.0f, -1.0f } );
+	shape.normals[1] = b2RotateVector( xf.q, (b2Vec2){ 1.0f, 0.0f } );
+	shape.normals[2] = b2RotateVector( xf.q, (b2Vec2){ 0.0f, 1.0f } );
+	shape.normals[3] = b2RotateVector( xf.q, (b2Vec2){ -1.0f, 0.0f } );
+	shape.radius = radius;
+	shape.centroid = xf.p;
+	return shape;
+}
+
+b2Polygon b2TransformPolygon( b2Transform transform, const b2Polygon* polygon )
+{
+	b2Polygon p = *polygon;
+
+	for ( int i = 0; i < p.count; ++i )
+	{
+		p.vertices[i] = b2TransformPoint( transform, p.vertices[i] );
+		p.normals[i] = b2RotateVector( transform.q, p.normals[i] );
+	}
+
+	p.centroid = b2TransformPoint( transform, p.centroid );
+
+	return p;
+}
+
+b2MassData b2ComputeCircleMass( const b2Circle* shape, float density )
+{
+	float rr = shape->radius * shape->radius;
+
+	b2MassData massData;
+	massData.mass = density * B2_PI * rr;
+	massData.center = shape->center;
+
+	// inertia about the local origin
+	massData.rotationalInertia = massData.mass * ( 0.5f * rr + b2Dot( shape->center, shape->center ) );
+
+	return massData;
+}
+
+b2MassData b2ComputeCapsuleMass( const b2Capsule* shape, float density )
+{
+	float radius = shape->radius;
+	float rr = radius * radius;
+	b2Vec2 p1 = shape->center1;
+	b2Vec2 p2 = shape->center2;
+	float length = b2Length( b2Sub( p2, p1 ) );
+	float ll = length * length;
+
+	float circleMass = density * ( B2_PI * radius * radius );
+	float boxMass = density * ( 2.0f * radius * length );
+
+	b2MassData massData;
+	massData.mass = circleMass + boxMass;
+	massData.center.x = 0.5f * ( p1.x + p2.x );
+	massData.center.y = 0.5f * ( p1.y + p2.y );
+
+	// two offset half circles, both halves add up to full circle and each half is offset by half length
+	// semi-circle centroid = 4 r / 3 pi
+	// Need to apply parallel-axis theorem twice:
+	// 1. shift semi-circle centroid to origin
+	// 2. shift semi-circle to box end
+	// m * ((h + lc)^2 - lc^2) = m * (h^2 + 2 * h * lc)
+	// See: https://en.wikipedia.org/wiki/Parallel_axis_theorem
+	// I verified this formula by computing the convex hull of a 128 vertex capsule
+
+	// half circle centroid
+	float lc = 4.0f * radius / ( 3.0f * B2_PI );
+
+	// half length of rectangular portion of capsule
+	float h = 0.5f * length;
+
+	float circleInertia = circleMass * ( 0.5f * rr + h * h + 2.0f * h * lc );
+	float boxInertia = boxMass * ( 4.0f * rr + ll ) / 12.0f;
+	massData.rotationalInertia = circleInertia + boxInertia;
+
+	// inertia about the local origin
+	massData.rotationalInertia += massData.mass * b2Dot( massData.center, massData.center );
+
+	return massData;
+}
+
+b2MassData b2ComputePolygonMass( const b2Polygon* shape, float density )
+{
+	// Polygon mass, centroid, and inertia.
+	// Let rho be the polygon density in mass per unit area.
+	// Then:
+	// mass = rho * int(dA)
+	// centroid.x = (1/mass) * rho * int(x * dA)
+	// centroid.y = (1/mass) * rho * int(y * dA)
+	// I = rho * int((x*x + y*y) * dA)
+	//
+	// We can compute these integrals by summing all the integrals
+	// for each triangle of the polygon. To evaluate the integral
+	// for a single triangle, we make a change of variables to
+	// the (u,v) coordinates of the triangle:
+	// x = x0 + e1x * u + e2x * v
+	// y = y0 + e1y * u + e2y * v
+	// where 0 <= u && 0 <= v && u + v <= 1.
+	//
+	// We integrate u from [0,1-v] and then v from [0,1].
+	// We also need to use the Jacobian of the transformation:
+	// D = cross(e1, e2)
+	//
+	// Simplification: triangle centroid = (1/3) * (p1 + p2 + p3)
+	//
+	// The rest of the derivation is handled by computer algebra.
+
+	B2_ASSERT( shape->count > 0 );
+
+	if ( shape->count == 1 )
+	{
+		b2Circle circle;
+		circle.center = shape->vertices[0];
+		circle.radius = shape->radius;
+		return b2ComputeCircleMass( &circle, density );
+	}
+
+	if ( shape->count == 2 )
+	{
+		b2Capsule capsule;
+		capsule.center1 = shape->vertices[0];
+		capsule.center2 = shape->vertices[1];
+		capsule.radius = shape->radius;
+		return b2ComputeCapsuleMass( &capsule, density );
+	}
+
+	b2Vec2 vertices[B2_MAX_POLYGON_VERTICES] = { 0 };
+	int count = shape->count;
+	float radius = shape->radius;
+
+	if ( radius > 0.0f )
+	{
+		// Approximate mass of rounded polygons by pushing out the vertices.
+		float sqrt2 = 1.412f;
+		for ( int i = 0; i < count; ++i )
+		{
+			int j = i == 0 ? count - 1 : i - 1;
+			b2Vec2 n1 = shape->normals[j];
+			b2Vec2 n2 = shape->normals[i];
+
+			b2Vec2 mid = b2Normalize( b2Add( n1, n2 ) );
+			vertices[i] = b2MulAdd( shape->vertices[i], sqrt2 * radius, mid );
+		}
+	}
+	else
+	{
+		for ( int i = 0; i < count; ++i )
+		{
+			vertices[i] = shape->vertices[i];
+		}
+	}
+
+	b2Vec2 center = { 0.0f, 0.0f };
+	float area = 0.0f;
+	float rotationalInertia = 0.0f;
+
+	// Get a reference point for forming triangles.
+	// Use the first vertex to reduce round-off errors.
+	b2Vec2 r = vertices[0];
+
+	const float inv3 = 1.0f / 3.0f;
+
+	for ( int i = 1; i < count - 1; ++i )
+	{
+		// Triangle edges
+		b2Vec2 e1 = b2Sub( vertices[i], r );
+		b2Vec2 e2 = b2Sub( vertices[i + 1], r );
+
+		float D = b2Cross( e1, e2 );
+
+		float triangleArea = 0.5f * D;
+		area += triangleArea;
+
+		// Area weighted centroid, r at origin
+		center = b2MulAdd( center, triangleArea * inv3, b2Add( e1, e2 ) );
+
+		float ex1 = e1.x, ey1 = e1.y;
+		float ex2 = e2.x, ey2 = e2.y;
+
+		float intx2 = ex1 * ex1 + ex2 * ex1 + ex2 * ex2;
+		float inty2 = ey1 * ey1 + ey2 * ey1 + ey2 * ey2;
+
+		rotationalInertia += ( 0.25f * inv3 * D ) * ( intx2 + inty2 );
+	}
+
+	b2MassData massData;
+
+	// Total mass
+	massData.mass = density * area;
+
+	// Center of mass, shift back from origin at r
+	B2_ASSERT( area > FLT_EPSILON );
+	float invArea = 1.0f / area;
+	center.x *= invArea;
+	center.y *= invArea;
+	massData.center = b2Add( r, center );
+
+	// Inertia tensor relative to the local origin (point s).
+	massData.rotationalInertia = density * rotationalInertia;
+
+	// Shift to center of mass then to original body origin.
+	massData.rotationalInertia += massData.mass * ( b2Dot( massData.center, massData.center ) - b2Dot( center, center ) );
+
+	return massData;
+}
+
+b2AABB b2ComputeCircleAABB( const b2Circle* shape, b2Transform xf )
+{
+	b2Vec2 p = b2TransformPoint( xf, shape->center );
+	float r = shape->radius;
+
+	b2AABB aabb = { { p.x - r, p.y - r }, { p.x + r, p.y + r } };
+	return aabb;
+}
+
+b2AABB b2ComputeCapsuleAABB( const b2Capsule* shape, b2Transform xf )
+{
+	b2Vec2 v1 = b2TransformPoint( xf, shape->center1 );
+	b2Vec2 v2 = b2TransformPoint( xf, shape->center2 );
+
+	b2Vec2 r = { shape->radius, shape->radius };
+	b2Vec2 lower = b2Sub( b2Min( v1, v2 ), r );
+	b2Vec2 upper = b2Add( b2Max( v1, v2 ), r );
+
+	b2AABB aabb = { lower, upper };
+	return aabb;
+}
+
+b2AABB b2ComputePolygonAABB( const b2Polygon* shape, b2Transform xf )
+{
+	B2_ASSERT( shape->count > 0 );
+	b2Vec2 lower = b2TransformPoint( xf, shape->vertices[0] );
+	b2Vec2 upper = lower;
+
+	for ( int i = 1; i < shape->count; ++i )
+	{
+		b2Vec2 v = b2TransformPoint( xf, shape->vertices[i] );
+		lower = b2Min( lower, v );
+		upper = b2Max( upper, v );
+	}
+
+	b2Vec2 r = { shape->radius, shape->radius };
+	lower = b2Sub( lower, r );
+	upper = b2Add( upper, r );
+
+	b2AABB aabb = { lower, upper };
+	return aabb;
+}
+
+b2AABB b2ComputeSegmentAABB( const b2Segment* shape, b2Transform xf )
+{
+	b2Vec2 v1 = b2TransformPoint( xf, shape->point1 );
+	b2Vec2 v2 = b2TransformPoint( xf, shape->point2 );
+
+	b2Vec2 lower = b2Min( v1, v2 );
+	b2Vec2 upper = b2Max( v1, v2 );
+
+	b2AABB aabb = { lower, upper };
+	return aabb;
+}
+
+bool b2PointInCircle( b2Vec2 point, const b2Circle* shape )
+{
+	b2Vec2 center = shape->center;
+	return b2DistanceSquared( point, center ) <= shape->radius * shape->radius;
+}
+
+bool b2PointInCapsule( b2Vec2 point, const b2Capsule* shape )
+{
+	float rr = shape->radius * shape->radius;
+	b2Vec2 p1 = shape->center1;
+	b2Vec2 p2 = shape->center2;
+
+	b2Vec2 d = b2Sub( p2, p1 );
+	float dd = b2Dot( d, d );
+	if ( dd == 0.0f )
+	{
+		// Capsule is really a circle
+		return b2DistanceSquared( point, p1 ) <= rr;
+	}
+
+	// Get closest point on capsule segment
+	// c = p1 + t * d
+	// dot(point - c, d) = 0
+	// dot(point - p1 - t * d, d) = 0
+	// t = dot(point - p1, d) / dot(d, d)
+	float t = b2Dot( b2Sub( point, p1 ), d ) / dd;
+	t = b2ClampFloat( t, 0.0f, 1.0f );
+	b2Vec2 c = b2MulAdd( p1, t, d );
+
+	// Is query point within radius around closest point?
+	return b2DistanceSquared( point, c ) <= rr;
+}
+
+bool b2PointInPolygon( b2Vec2 point, const b2Polygon* shape )
+{
+	b2DistanceInput input = { 0 };
+	input.proxyA = b2MakeProxy( shape->vertices, shape->count, 0.0f );
+	input.proxyB = b2MakeProxy( &point, 1, 0.0f );
+	input.transformA = b2Transform_identity;
+	input.transformB = b2Transform_identity;
+	input.useRadii = false;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput output = b2ShapeDistance( &input, &cache, NULL, 0 );
+
+	return output.distance <= shape->radius;
+}
+
+// Precision Improvements for Ray / Sphere Intersection - Ray Tracing Gems 2019
+// http://www.codercorner.com/blog/?p=321
+b2CastOutput b2RayCastCircle( const b2RayCastInput* input, const b2Circle* shape )
+{
+	B2_ASSERT( b2IsValidRay( input ) );
+
+	b2Vec2 p = shape->center;
+
+	b2CastOutput output = { 0 };
+
+	// Shift ray so circle center is the origin
+	b2Vec2 s = b2Sub( input->origin, p );
+	float length;
+	b2Vec2 d = b2GetLengthAndNormalize( &length, input->translation );
+	if ( length == 0.0f )
+	{
+		// zero length ray
+		return output;
+	}
+
+	// Find closest point on ray to origin
+
+	// solve: dot(s + t * d, d) = 0
+	float t = -b2Dot( s, d );
+
+	// c is the closest point on the line to the origin
+	b2Vec2 c = b2MulAdd( s, t, d );
+
+	float cc = b2Dot( c, c );
+	float r = shape->radius;
+	float rr = r * r;
+
+	if ( cc > rr )
+	{
+		// closest point is outside the circle
+		return output;
+	}
+
+	// Pythagoras
+	float h = sqrtf( rr - cc );
+
+	float fraction = t - h;
+
+	if ( fraction < 0.0f || input->maxFraction * length < fraction )
+	{
+		// outside the range of the ray segment
+		return output;
+	}
+
+	// hit point relative to center
+	b2Vec2 hitPoint = b2MulAdd( s, fraction, d );
+
+	output.fraction = fraction / length;
+	output.normal = b2Normalize( hitPoint );
+	output.point = b2MulAdd( p, shape->radius, output.normal );
+	output.hit = true;
+
+	return output;
+}
+
+b2CastOutput b2RayCastCapsule( const b2RayCastInput* input, const b2Capsule* shape )
+{
+	B2_ASSERT( b2IsValidRay( input ) );
+
+	b2CastOutput output = { 0 };
+
+	b2Vec2 v1 = shape->center1;
+	b2Vec2 v2 = shape->center2;
+
+	b2Vec2 e = b2Sub( v2, v1 );
+
+	float capsuleLength;
+	b2Vec2 a = b2GetLengthAndNormalize( &capsuleLength, e );
+
+	if ( capsuleLength < FLT_EPSILON )
+	{
+		// Capsule is really a circle
+		b2Circle circle = { v1, shape->radius };
+		return b2RayCastCircle( input, &circle );
+	}
+
+	b2Vec2 p1 = input->origin;
+	b2Vec2 d = input->translation;
+
+	// Ray from capsule start to ray start
+	b2Vec2 q = b2Sub( p1, v1 );
+	float qa = b2Dot( q, a );
+
+	// Vector to ray start that is perpendicular to capsule axis
+	b2Vec2 qp = b2MulAdd( q, -qa, a );
+
+	float radius = shape->radius;
+
+	// Does the ray start within the infinite length capsule?
+	if ( b2Dot( qp, qp ) < radius * radius )
+	{
+		if ( qa < 0.0f )
+		{
+			// start point behind capsule segment
+			b2Circle circle = { v1, shape->radius };
+			return b2RayCastCircle( input, &circle );
+		}
+
+		if ( qa > 1.0f )
+		{
+			// start point ahead of capsule segment
+			b2Circle circle = { v2, shape->radius };
+			return b2RayCastCircle( input, &circle );
+		}
+
+		// ray starts inside capsule -> no hit
+		return output;
+	}
+
+	// Perpendicular to capsule axis, pointing right
+	b2Vec2 n = { a.y, -a.x };
+
+	float rayLength;
+	b2Vec2 u = b2GetLengthAndNormalize( &rayLength, d );
+
+	// Intersect ray with infinite length capsule
+	// v1 + radius * n + s1 * a = p1 + s2 * u
+	// v1 - radius * n + s1 * a = p1 + s2 * u
+
+	// s1 * a - s2 * u = b
+	// b = q - radius * ap
+	// or
+	// b = q + radius * ap
+
+	// Cramer's rule [a -u]
+	float den = -a.x * u.y + u.x * a.y;
+	if ( -FLT_EPSILON < den && den < FLT_EPSILON )
+	{
+		// Ray is parallel to capsule and outside infinite length capsule
+		return output;
+	}
+
+	b2Vec2 b1 = b2MulSub( q, radius, n );
+	b2Vec2 b2 = b2MulAdd( q, radius, n );
+
+	float invDen = 1.0f / den;
+
+	// Cramer's rule [a b1]
+	float s21 = ( a.x * b1.y - b1.x * a.y ) * invDen;
+
+	// Cramer's rule [a b2]
+	float s22 = ( a.x * b2.y - b2.x * a.y ) * invDen;
+
+	float s2;
+	b2Vec2 b;
+	if ( s21 < s22 )
+	{
+		s2 = s21;
+		b = b1;
+	}
+	else
+	{
+		s2 = s22;
+		b = b2;
+		n = b2Neg( n );
+	}
+
+	if ( s2 < 0.0f || input->maxFraction * rayLength < s2 )
+	{
+		return output;
+	}
+
+	// Cramer's rule [b -u]
+	float s1 = ( -b.x * u.y + u.x * b.y ) * invDen;
+
+	if ( s1 < 0.0f )
+	{
+		// ray passes behind capsule segment
+		b2Circle circle = { v1, shape->radius };
+		return b2RayCastCircle( input, &circle );
+	}
+	else if ( capsuleLength < s1 )
+	{
+		// ray passes ahead of capsule segment
+		b2Circle circle = { v2, shape->radius };
+		return b2RayCastCircle( input, &circle );
+	}
+	else
+	{
+		// ray hits capsule side
+		output.fraction = s2 / rayLength;
+		output.point = b2Add( b2Lerp( v1, v2, s1 / capsuleLength ), b2MulSV( shape->radius, n ) );
+		output.normal = n;
+		output.hit = true;
+		return output;
+	}
+}
+
+// Ray vs line segment
+b2CastOutput b2RayCastSegment( const b2RayCastInput* input, const b2Segment* shape, bool oneSided )
+{
+	if ( oneSided )
+	{
+		// Skip left-side collision
+		float offset = b2Cross( b2Sub( input->origin, shape->point1 ), b2Sub( shape->point2, shape->point1 ) );
+		if ( offset < 0.0f )
+		{
+			b2CastOutput output = { 0 };
+			return output;
+		}
+	}
+
+	// Put the ray into the edge's frame of reference.
+	b2Vec2 p1 = input->origin;
+	b2Vec2 d = input->translation;
+
+	b2Vec2 v1 = shape->point1;
+	b2Vec2 v2 = shape->point2;
+	b2Vec2 e = b2Sub( v2, v1 );
+
+	b2CastOutput output = { 0 };
+
+	float length;
+	b2Vec2 eUnit = b2GetLengthAndNormalize( &length, e );
+	if ( length == 0.0f )
+	{
+		return output;
+	}
+
+	// Normal points to the right, looking from v1 towards v2
+	b2Vec2 normal = b2RightPerp( eUnit );
+
+	// Intersect ray with infinite segment using normal
+	// Similar to intersecting a ray with an infinite plane
+	// p = p1 + t * d
+	// dot(normal, p - v1) = 0
+	// dot(normal, p1 - v1) + t * dot(normal, d) = 0
+	float numerator = b2Dot( normal, b2Sub( v1, p1 ) );
+	float denominator = b2Dot( normal, d );
+
+	if ( denominator == 0.0f )
+	{
+		// parallel
+		return output;
+	}
+
+	float t = numerator / denominator;
+	if ( t < 0.0f || input->maxFraction < t )
+	{
+		// out of ray range
+		return output;
+	}
+
+	// Intersection point on infinite segment
+	b2Vec2 p = b2MulAdd( p1, t, d );
+
+	// Compute position of p along segment
+	// p = v1 + s * e
+	// s = dot(p - v1, e) / dot(e, e)
+
+	float s = b2Dot( b2Sub( p, v1 ), eUnit );
+	if ( s < 0.0f || length < s )
+	{
+		// out of segment range
+		return output;
+	}
+
+	if ( numerator > 0.0f )
+	{
+		normal = b2Neg( normal );
+	}
+
+	output.fraction = t;
+	output.point = p;
+	output.normal = normal;
+	output.hit = true;
+
+	return output;
+}
+
+b2CastOutput b2RayCastPolygon( const b2RayCastInput* input, const b2Polygon* shape )
+{
+	B2_ASSERT( b2IsValidRay( input ) );
+
+	if ( shape->radius == 0.0f )
+	{
+		// Put the ray into the polygon's frame of reference.
+		b2Vec2 p1 = input->origin;
+		b2Vec2 d = input->translation;
+
+		float lower = 0.0f, upper = input->maxFraction;
+
+		int index = -1;
+
+		b2CastOutput output = { 0 };
+
+		for ( int i = 0; i < shape->count; ++i )
+		{
+			// p = p1 + a * d
+			// dot(normal, p - v) = 0
+			// dot(normal, p1 - v) + a * dot(normal, d) = 0
+			float numerator = b2Dot( shape->normals[i], b2Sub( shape->vertices[i], p1 ) );
+			float denominator = b2Dot( shape->normals[i], d );
+
+			if ( denominator == 0.0f )
+			{
+				if ( numerator < 0.0f )
+				{
+					return output;
+				}
+			}
+			else
+			{
+				// Note: we want this predicate without division:
+				// lower < numerator / denominator, where denominator < 0
+				// Since denominator < 0, we have to flip the inequality:
+				// lower < numerator / denominator <==> denominator * lower > numerator.
+				if ( denominator < 0.0f && numerator < lower * denominator )
+				{
+					// Increase lower.
+					// The segment enters this half-space.
+					lower = numerator / denominator;
+					index = i;
+				}
+				else if ( denominator > 0.0f && numerator < upper * denominator )
+				{
+					// Decrease upper.
+					// The segment exits this half-space.
+					upper = numerator / denominator;
+				}
+			}
+
+			// The use of epsilon here causes the B2_ASSERT on lower to trip
+			// in some cases. Apparently the use of epsilon was to make edge
+			// shapes work, but now those are handled separately.
+			// if (upper < lower - b2_epsilon)
+			if ( upper < lower )
+			{
+				return output;
+			}
+		}
+
+		B2_ASSERT( 0.0f <= lower && lower <= input->maxFraction );
+
+		if ( index >= 0 )
+		{
+			output.fraction = lower;
+			output.normal = shape->normals[index];
+			output.point = b2MulAdd( p1, lower, d );
+			output.hit = true;
+		}
+
+		return output;
+	}
+
+	// TODO_ERIN this is not working for ray vs box (zero radii)
+	b2ShapeCastPairInput castInput;
+	castInput.proxyA = b2MakeProxy( shape->vertices, shape->count, shape->radius );
+	castInput.proxyB = b2MakeProxy( &input->origin, 1, 0.0f );
+	castInput.transformA = b2Transform_identity;
+	castInput.transformB = b2Transform_identity;
+	castInput.translationB = input->translation;
+	castInput.maxFraction = input->maxFraction;
+	castInput.canEncroach = false;
+	return b2ShapeCast( &castInput );
+}
+
+b2CastOutput b2ShapeCastCircle( const b2ShapeCastInput* input, const b2Circle* shape )
+{
+	b2ShapeCastPairInput pairInput;
+	pairInput.proxyA = b2MakeProxy( &shape->center, 1, shape->radius );
+	pairInput.proxyB = input->proxy;
+	pairInput.transformA = b2Transform_identity;
+	pairInput.transformB = b2Transform_identity;
+	pairInput.translationB = input->translation;
+	pairInput.maxFraction = input->maxFraction;
+	pairInput.canEncroach = input->canEncroach;
+
+	b2CastOutput output = b2ShapeCast( &pairInput );
+	return output;
+}
+
+b2CastOutput b2ShapeCastCapsule( const b2ShapeCastInput* input, const b2Capsule* shape )
+{
+	b2ShapeCastPairInput pairInput;
+	pairInput.proxyA = b2MakeProxy( &shape->center1, 2, shape->radius );
+	pairInput.proxyB = input->proxy;
+	pairInput.transformA = b2Transform_identity;
+	pairInput.transformB = b2Transform_identity;
+	pairInput.translationB = input->translation;
+	pairInput.maxFraction = input->maxFraction;
+	pairInput.canEncroach = input->canEncroach;
+
+	b2CastOutput output = b2ShapeCast( &pairInput );
+	return output;
+}
+
+b2CastOutput b2ShapeCastSegment( const b2ShapeCastInput* input, const b2Segment* shape )
+{
+	b2ShapeCastPairInput pairInput;
+	pairInput.proxyA = b2MakeProxy( &shape->point1, 2, 0.0f );
+	pairInput.proxyB = input->proxy;
+	pairInput.transformA = b2Transform_identity;
+	pairInput.transformB = b2Transform_identity;
+	pairInput.translationB = input->translation;
+	pairInput.maxFraction = input->maxFraction;
+	pairInput.canEncroach = input->canEncroach;
+
+	b2CastOutput output = b2ShapeCast( &pairInput );
+	return output;
+}
+
+b2CastOutput b2ShapeCastPolygon( const b2ShapeCastInput* input, const b2Polygon* shape )
+{
+	b2ShapeCastPairInput pairInput;
+	pairInput.proxyA = b2MakeProxy( shape->vertices, shape->count, shape->radius );
+	pairInput.proxyB = input->proxy;
+	pairInput.transformA = b2Transform_identity;
+	pairInput.transformB = b2Transform_identity;
+	pairInput.translationB = input->translation;
+	pairInput.maxFraction = input->maxFraction;
+	pairInput.canEncroach = input->canEncroach;
+
+	b2CastOutput output = b2ShapeCast( &pairInput );
+	return output;
+}
+
+b2PlaneResult b2CollideMoverAndCircle( const b2Circle* shape, const b2Capsule* mover )
+{
+	b2DistanceInput distanceInput;
+	distanceInput.proxyA = b2MakeProxy( &shape->center, 1, 0.0f );
+	distanceInput.proxyB = b2MakeProxy( &mover->center1, 2, mover->radius );
+	distanceInput.transformA = b2Transform_identity;
+	distanceInput.transformB = b2Transform_identity;
+	distanceInput.useRadii = false;
+
+	float totalRadius = mover->radius + shape->radius;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput distanceOutput = b2ShapeDistance( &distanceInput, &cache, NULL, 0 );
+
+	if ( distanceOutput.distance <= totalRadius )
+	{
+		b2Plane plane = { distanceOutput.normal, totalRadius - distanceOutput.distance };
+		return (b2PlaneResult){
+			.plane = plane,
+			.hit = true,
+		};
+	}
+
+	return (b2PlaneResult){ 0 };
+}
+
+b2PlaneResult b2CollideMoverAndCapsule( const b2Capsule* shape, const b2Capsule* mover )
+{
+	b2DistanceInput distanceInput;
+	distanceInput.proxyA = b2MakeProxy( &shape->center1, 2, 0.0f );
+	distanceInput.proxyB = b2MakeProxy( &mover->center1, 2, mover->radius );
+	distanceInput.transformA = b2Transform_identity;
+	distanceInput.transformB = b2Transform_identity;
+	distanceInput.useRadii = false;
+
+	float totalRadius = mover->radius + shape->radius;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput distanceOutput = b2ShapeDistance( &distanceInput, &cache, NULL, 0 );
+
+	if ( distanceOutput.distance <= totalRadius )
+	{
+		b2Plane plane = { distanceOutput.normal, totalRadius - distanceOutput.distance };
+		return (b2PlaneResult){
+			.plane = plane,
+			.hit = true,
+		};
+	}
+
+	return (b2PlaneResult){ 0 };
+}
+
+b2PlaneResult b2CollideMoverAndPolygon( const b2Polygon* shape, const b2Capsule* mover )
+{
+	b2DistanceInput distanceInput;
+	distanceInput.proxyA = b2MakeProxy( shape->vertices, shape->count, shape->radius );
+	distanceInput.proxyB = b2MakeProxy( &mover->center1, 2, mover->radius );
+	distanceInput.transformA = b2Transform_identity;
+	distanceInput.transformB = b2Transform_identity;
+	distanceInput.useRadii = false;
+
+	float totalRadius = mover->radius + shape->radius;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput distanceOutput = b2ShapeDistance( &distanceInput, &cache, NULL, 0 );
+
+	if ( distanceOutput.distance <= totalRadius )
+	{
+		b2Plane plane = { distanceOutput.normal, totalRadius - distanceOutput.distance };
+		return (b2PlaneResult){
+			.plane = plane,
+			.hit = true,
+		};
+	}
+
+	return (b2PlaneResult){ 0 };
+}
+
+b2PlaneResult b2CollideMoverAndSegment( const b2Segment* shape, const b2Capsule* mover )
+{
+	b2DistanceInput distanceInput;
+	distanceInput.proxyA = b2MakeProxy( &shape->point1, 2, 0.0f );
+	distanceInput.proxyB = b2MakeProxy( &mover->center1, 2, mover->radius );
+	distanceInput.transformA = b2Transform_identity;
+	distanceInput.transformB = b2Transform_identity;
+	distanceInput.useRadii = false;
+
+	float totalRadius = mover->radius;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput distanceOutput = b2ShapeDistance( &distanceInput, &cache, NULL, 0 );
+
+	if ( distanceOutput.distance <= totalRadius )
+	{
+		b2Plane plane = { distanceOutput.normal, totalRadius - distanceOutput.distance };
+		return (b2PlaneResult){
+			.plane = plane,
+			.hit = true,
+		};
+	}
+
+	return (b2PlaneResult){ 0 };
+}
diff --git a/src/vendor/box2d/hull.c b/src/vendor/box2d/hull.c
new file mode 100644
index 0000000..27cc395
--- /dev/null
+++ b/src/vendor/box2d/hull.c
@@ -0,0 +1,328 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "constants.h"
+#include "core.h"
+
+#include "box2d/collision.h"
+#include "box2d/math_functions.h"
+
+#include <float.h>
+
+// quickhull recursion
+static b2Hull b2RecurseHull( b2Vec2 p1, b2Vec2 p2, b2Vec2* ps, int count )
+{
+	b2Hull hull;
+	hull.count = 0;
+
+	if ( count == 0 )
+	{
+		return hull;
+	}
+
+	// create an edge vector pointing from p1 to p2
+	b2Vec2 e = b2Normalize( b2Sub( p2, p1 ) );
+
+	// discard points left of e and find point furthest to the right of e
+	b2Vec2 rightPoints[B2_MAX_POLYGON_VERTICES];
+	int rightCount = 0;
+
+	int bestIndex = 0;
+	float bestDistance = b2Cross( b2Sub( ps[bestIndex], p1 ), e );
+	if ( bestDistance > 0.0f )
+	{
+		rightPoints[rightCount++] = ps[bestIndex];
+	}
+
+	for ( int i = 1; i < count; ++i )
+	{
+		float distance = b2Cross( b2Sub( ps[i], p1 ), e );
+		if ( distance > bestDistance )
+		{
+			bestIndex = i;
+			bestDistance = distance;
+		}
+
+		if ( distance > 0.0f )
+		{
+			rightPoints[rightCount++] = ps[i];
+		}
+	}
+
+	if ( bestDistance < 2.0f * B2_LINEAR_SLOP )
+	{
+		return hull;
+	}
+
+	b2Vec2 bestPoint = ps[bestIndex];
+
+	// compute hull to the right of p1-bestPoint
+	b2Hull hull1 = b2RecurseHull( p1, bestPoint, rightPoints, rightCount );
+
+	// compute hull to the right of bestPoint-p2
+	b2Hull hull2 = b2RecurseHull( bestPoint, p2, rightPoints, rightCount );
+
+	// stitch together hulls
+	for ( int i = 0; i < hull1.count; ++i )
+	{
+		hull.points[hull.count++] = hull1.points[i];
+	}
+
+	hull.points[hull.count++] = bestPoint;
+
+	for ( int i = 0; i < hull2.count; ++i )
+	{
+		hull.points[hull.count++] = hull2.points[i];
+	}
+
+	B2_ASSERT( hull.count < B2_MAX_POLYGON_VERTICES );
+
+	return hull;
+}
+
+// quickhull algorithm
+// - merges vertices based on B2_LINEAR_SLOP
+// - removes collinear points using B2_LINEAR_SLOP
+// - returns an empty hull if it fails
+b2Hull b2ComputeHull( const b2Vec2* points, int count )
+{
+	b2Hull hull;
+	hull.count = 0;
+
+	if ( count < 3 || count > B2_MAX_POLYGON_VERTICES )
+	{
+		// check your data
+		return hull;
+	}
+
+	count = b2MinInt( count, B2_MAX_POLYGON_VERTICES );
+
+	b2AABB aabb = { { FLT_MAX, FLT_MAX }, { -FLT_MAX, -FLT_MAX } };
+
+	// Perform aggressive point welding. First point always remains.
+	// Also compute the bounding box for later.
+	b2Vec2 ps[B2_MAX_POLYGON_VERTICES];
+	int n = 0;
+	const float linearSlop = B2_LINEAR_SLOP;
+	const float tolSqr = 16.0f * linearSlop * linearSlop;
+	for ( int i = 0; i < count; ++i )
+	{
+		aabb.lowerBound = b2Min( aabb.lowerBound, points[i] );
+		aabb.upperBound = b2Max( aabb.upperBound, points[i] );
+
+		b2Vec2 vi = points[i];
+
+		bool unique = true;
+		for ( int j = 0; j < i; ++j )
+		{
+			b2Vec2 vj = points[j];
+
+			float distSqr = b2DistanceSquared( vi, vj );
+			if ( distSqr < tolSqr )
+			{
+				unique = false;
+				break;
+			}
+		}
+
+		if ( unique )
+		{
+			ps[n++] = vi;
+		}
+	}
+
+	if ( n < 3 )
+	{
+		// all points very close together, check your data and check your scale
+		return hull;
+	}
+
+	// Find an extreme point as the first point on the hull
+	b2Vec2 c = b2AABB_Center( aabb );
+	int f1 = 0;
+	float dsq1 = b2DistanceSquared( c, ps[f1] );
+	for ( int i = 1; i < n; ++i )
+	{
+		float dsq = b2DistanceSquared( c, ps[i] );
+		if ( dsq > dsq1 )
+		{
+			f1 = i;
+			dsq1 = dsq;
+		}
+	}
+
+	// remove p1 from working set
+	b2Vec2 p1 = ps[f1];
+	ps[f1] = ps[n - 1];
+	n = n - 1;
+
+	int f2 = 0;
+	float dsq2 = b2DistanceSquared( p1, ps[f2] );
+	for ( int i = 1; i < n; ++i )
+	{
+		float dsq = b2DistanceSquared( p1, ps[i] );
+		if ( dsq > dsq2 )
+		{
+			f2 = i;
+			dsq2 = dsq;
+		}
+	}
+
+	// remove p2 from working set
+	b2Vec2 p2 = ps[f2];
+	ps[f2] = ps[n - 1];
+	n = n - 1;
+
+	// split the points into points that are left and right of the line p1-p2.
+	b2Vec2 rightPoints[B2_MAX_POLYGON_VERTICES - 2];
+	int rightCount = 0;
+
+	b2Vec2 leftPoints[B2_MAX_POLYGON_VERTICES - 2];
+	int leftCount = 0;
+
+	b2Vec2 e = b2Normalize( b2Sub( p2, p1 ) );
+
+	for ( int i = 0; i < n; ++i )
+	{
+		float d = b2Cross( b2Sub( ps[i], p1 ), e );
+
+		// slop used here to skip points that are very close to the line p1-p2
+		if ( d >= 2.0f * linearSlop )
+		{
+			rightPoints[rightCount++] = ps[i];
+		}
+		else if ( d <= -2.0f * linearSlop )
+		{
+			leftPoints[leftCount++] = ps[i];
+		}
+	}
+
+	// compute hulls on right and left
+	b2Hull hull1 = b2RecurseHull( p1, p2, rightPoints, rightCount );
+	b2Hull hull2 = b2RecurseHull( p2, p1, leftPoints, leftCount );
+
+	if ( hull1.count == 0 && hull2.count == 0 )
+	{
+		// all points collinear
+		return hull;
+	}
+
+	// stitch hulls together, preserving CCW winding order
+	hull.points[hull.count++] = p1;
+
+	for ( int i = 0; i < hull1.count; ++i )
+	{
+		hull.points[hull.count++] = hull1.points[i];
+	}
+
+	hull.points[hull.count++] = p2;
+
+	for ( int i = 0; i < hull2.count; ++i )
+	{
+		hull.points[hull.count++] = hull2.points[i];
+	}
+
+	B2_ASSERT( hull.count <= B2_MAX_POLYGON_VERTICES );
+
+	// merge collinear
+	bool searching = true;
+	while ( searching && hull.count > 2 )
+	{
+		searching = false;
+
+		for ( int i = 0; i < hull.count; ++i )
+		{
+			int i1 = i;
+			int i2 = ( i + 1 ) % hull.count;
+			int i3 = ( i + 2 ) % hull.count;
+
+			b2Vec2 s1 = hull.points[i1];
+			b2Vec2 s2 = hull.points[i2];
+			b2Vec2 s3 = hull.points[i3];
+
+			// unit edge vector for s1-s3
+			b2Vec2 r = b2Normalize( b2Sub( s3, s1 ) );
+
+			float distance = b2Cross( b2Sub( s2, s1 ), r );
+			if ( distance <= 2.0f * linearSlop )
+			{
+				// remove midpoint from hull
+				for ( int j = i2; j < hull.count - 1; ++j )
+				{
+					hull.points[j] = hull.points[j + 1];
+				}
+				hull.count -= 1;
+
+				// continue searching for collinear points
+				searching = true;
+
+				break;
+			}
+		}
+	}
+
+	if ( hull.count < 3 )
+	{
+		// all points collinear, shouldn't be reached since this was validated above
+		hull.count = 0;
+	}
+
+	return hull;
+}
+
+bool b2ValidateHull( const b2Hull* hull )
+{
+	if ( hull->count < 3 || B2_MAX_POLYGON_VERTICES < hull->count )
+	{
+		return false;
+	}
+
+	// test that every point is behind every edge
+	for ( int i = 0; i < hull->count; ++i )
+	{
+		// create an edge vector
+		int i1 = i;
+		int i2 = i < hull->count - 1 ? i1 + 1 : 0;
+		b2Vec2 p = hull->points[i1];
+		b2Vec2 e = b2Normalize( b2Sub( hull->points[i2], p ) );
+
+		for ( int j = 0; j < hull->count; ++j )
+		{
+			// skip points that subtend the current edge
+			if ( j == i1 || j == i2 )
+			{
+				continue;
+			}
+
+			float distance = b2Cross( b2Sub( hull->points[j], p ), e );
+			if ( distance >= 0.0f )
+			{
+				return false;
+			}
+		}
+	}
+
+	// test for collinear points
+	const float linearSlop = B2_LINEAR_SLOP;
+	for ( int i = 0; i < hull->count; ++i )
+	{
+		int i1 = i;
+		int i2 = ( i + 1 ) % hull->count;
+		int i3 = ( i + 2 ) % hull->count;
+
+		b2Vec2 p1 = hull->points[i1];
+		b2Vec2 p2 = hull->points[i2];
+		b2Vec2 p3 = hull->points[i3];
+
+		b2Vec2 e = b2Normalize( b2Sub( p3, p1 ) );
+
+		float distance = b2Cross( b2Sub( p2, p1 ), e );
+		if ( distance <= linearSlop )
+		{
+			// p1-p2-p3 are collinear
+			return false;
+		}
+	}
+
+	return true;
+}
diff --git a/src/vendor/box2d/id.h b/src/vendor/box2d/id.h
new file mode 100644
index 0000000..4daaaf2
--- /dev/null
+++ b/src/vendor/box2d/id.h
@@ -0,0 +1,144 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "base.h"
+
+#include <stdint.h>
+
+/**
+ * @defgroup id Ids
+ * These ids serve as handles to internal Box2D objects.
+ * These should be considered opaque data and passed by value.
+ * Include this header if you need the id types and not the whole Box2D API.
+ * All ids are considered null if initialized to zero.
+ *
+ * For example in C++:
+ *
+ * @code{.cxx}
+ * b2WorldId worldId = {};
+ * @endcode
+ *
+ * Or in C:
+ *
+ * @code{.c}
+ * b2WorldId worldId = {0};
+ * @endcode
+ *
+ * These are both considered null.
+ *
+ * @warning Do not use the internals of these ids. They are subject to change. Ids should be treated as opaque objects.
+ * @warning You should use ids to access objects in Box2D. Do not access files within the src folder. Such usage is unsupported.
+ * @{
+ */
+
+/// World id references a world instance. This should be treated as an opaque handle.
+typedef struct b2WorldId
+{
+	uint16_t index1;
+	uint16_t generation;
+} b2WorldId;
+
+/// Body id references a body instance. This should be treated as an opaque handle.
+typedef struct b2BodyId
+{
+	int32_t index1;
+	uint16_t world0;
+	uint16_t generation;
+} b2BodyId;
+
+/// Shape id references a shape instance. This should be treated as an opaque handle.
+typedef struct b2ShapeId
+{
+	int32_t index1;
+	uint16_t world0;
+	uint16_t generation;
+} b2ShapeId;
+
+/// Chain id references a chain instances. This should be treated as an opaque handle.
+typedef struct b2ChainId
+{
+	int32_t index1;
+	uint16_t world0;
+	uint16_t generation;
+} b2ChainId;
+
+/// Joint id references a joint instance. This should be treated as an opaque handle.
+typedef struct b2JointId
+{
+	int32_t index1;
+	uint16_t world0;
+	uint16_t generation;
+} b2JointId;
+
+/// Use these to make your identifiers null.
+/// You may also use zero initialization to get null.
+static const b2WorldId b2_nullWorldId = B2_ZERO_INIT;
+static const b2BodyId b2_nullBodyId = B2_ZERO_INIT;
+static const b2ShapeId b2_nullShapeId = B2_ZERO_INIT;
+static const b2ChainId b2_nullChainId = B2_ZERO_INIT;
+static const b2JointId b2_nullJointId = B2_ZERO_INIT;
+
+/// Macro to determine if any id is null.
+#define B2_IS_NULL( id ) ( id.index1 == 0 )
+
+/// Macro to determine if any id is non-null.
+#define B2_IS_NON_NULL( id ) ( id.index1 != 0 )
+
+/// Compare two ids for equality. Doesn't work for b2WorldId.
+#define B2_ID_EQUALS( id1, id2 ) ( id1.index1 == id2.index1 && id1.world0 == id2.world0 && id1.generation == id2.generation )
+
+/// Store a body id into a uint64_t.
+B2_INLINE uint64_t b2StoreBodyId( b2BodyId id )
+{
+	return ( (uint64_t)id.index1 << 32 ) | ( (uint64_t)id.world0 ) << 16 | (uint64_t)id.generation;
+}
+
+/// Load a uint64_t into a body id.
+B2_INLINE b2BodyId b2LoadBodyId( uint64_t x )
+{
+	b2BodyId id = { (int32_t)( x >> 32 ), (uint16_t)( x >> 16 ), (uint16_t)( x ) };
+	return id;
+}
+
+/// Store a shape id into a uint64_t.
+B2_INLINE uint64_t b2StoreShapeId( b2ShapeId id )
+{
+	return ( (uint64_t)id.index1 << 32 ) | ( (uint64_t)id.world0 ) << 16 | (uint64_t)id.generation;
+}
+
+/// Load a uint64_t into a shape id.
+B2_INLINE b2ShapeId b2LoadShapeId( uint64_t x )
+{
+	b2ShapeId id = { (int32_t)( x >> 32 ), (uint16_t)( x >> 16 ), (uint16_t)( x ) };
+	return id;
+}
+
+/// Store a chain id into a uint64_t.
+B2_INLINE uint64_t b2StoreChainId( b2ChainId id )
+{
+	return ( (uint64_t)id.index1 << 32 ) | ( (uint64_t)id.world0 ) << 16 | (uint64_t)id.generation;
+}
+
+/// Load a uint64_t into a chain id.
+B2_INLINE b2ChainId b2LoadChainId( uint64_t x )
+{
+	b2ChainId id = { (int32_t)( x >> 32 ), (uint16_t)( x >> 16 ), (uint16_t)( x ) };
+	return id;
+}
+
+/// Store a joint id into a uint64_t.
+B2_INLINE uint64_t b2StoreJointId( b2JointId id )
+{
+	return ( (uint64_t)id.index1 << 32 ) | ( (uint64_t)id.world0 ) << 16 | (uint64_t)id.generation;
+}
+
+/// Load a uint64_t into a joint id.
+B2_INLINE b2JointId b2LoadJointId( uint64_t x )
+{
+	b2JointId id = { (int32_t)( x >> 32 ), (uint16_t)( x >> 16 ), (uint16_t)( x ) };
+	return id;
+}
+
+/**@}*/
diff --git a/src/vendor/box2d/id_pool.c b/src/vendor/box2d/id_pool.c
new file mode 100644
index 0000000..03ef200
--- /dev/null
+++ b/src/vendor/box2d/id_pool.c
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "id_pool.h"
+
+b2IdPool b2CreateIdPool( void )
+{
+	b2IdPool pool = { 0 };
+	pool.freeArray = b2IntArray_Create( 32 );
+	return pool;
+}
+
+void b2DestroyIdPool( b2IdPool* pool )
+{
+	b2IntArray_Destroy( &pool->freeArray );
+	*pool = ( b2IdPool ){ 0 };
+}
+
+int b2AllocId( b2IdPool* pool )
+{
+	int count = pool->freeArray.count;
+	if ( count > 0 )
+	{
+		int id = b2IntArray_Pop( &pool->freeArray );
+		return id;
+	}
+
+	int id = pool->nextIndex;
+	pool->nextIndex += 1;
+	return id;
+}
+
+void b2FreeId( b2IdPool* pool, int id )
+{
+	B2_ASSERT( pool->nextIndex > 0 );
+	B2_ASSERT( 0 <= id && id < pool->nextIndex );
+	b2IntArray_Push( &pool->freeArray, id );
+}
+
+#if B2_VALIDATE
+
+void b2ValidateFreeId( b2IdPool* pool, int id )
+{
+	int freeCount = pool->freeArray.count;
+	for ( int i = 0; i < freeCount; ++i )
+	{
+		if ( pool->freeArray.data[i] == id )
+		{
+			return;
+		}
+	}
+
+	B2_ASSERT( 0 );
+}
+
+void b2ValidateUsedId( b2IdPool* pool, int id )
+{
+	int freeCount = pool->freeArray.count;
+	for ( int i = 0; i < freeCount; ++i )
+	{
+		if ( pool->freeArray.data[i] == id )
+		{
+			B2_ASSERT( 0 );
+		}
+	}
+}
+
+#else
+
+void b2ValidateFreeId( b2IdPool* pool, int id )
+{
+	B2_UNUSED( pool, id );
+}
+
+void b2ValidateUsedId( b2IdPool* pool, int id )
+{
+	B2_UNUSED( pool, id );
+}
+#endif
diff --git a/src/vendor/box2d/id_pool.h b/src/vendor/box2d/id_pool.h
new file mode 100644
index 0000000..16a3188
--- /dev/null
+++ b/src/vendor/box2d/id_pool.h
@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+
+typedef struct b2IdPool
+{
+	b2IntArray freeArray;
+	int nextIndex;
+} b2IdPool;
+
+b2IdPool b2CreateIdPool( void );
+void b2DestroyIdPool( b2IdPool* pool );
+
+int b2AllocId( b2IdPool* pool );
+void b2FreeId( b2IdPool* pool, int id );
+void b2ValidateFreeId( b2IdPool* pool, int id );
+void b2ValidateUsedId( b2IdPool* pool, int id );
+
+static inline int b2GetIdCount( b2IdPool* pool )
+{
+	return pool->nextIndex - pool->freeArray.count;
+}
+
+static inline int b2GetIdCapacity( b2IdPool* pool )
+{
+	return pool->nextIndex;
+}
+
+static inline int b2GetIdBytes( b2IdPool* pool )
+{
+	return b2IntArray_ByteCount(&pool->freeArray);
+}
diff --git a/src/vendor/box2d/island.c b/src/vendor/box2d/island.c
new file mode 100644
index 0000000..95c5a2f
--- /dev/null
+++ b/src/vendor/box2d/island.c
@@ -0,0 +1,977 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "island.h"
+
+#include "body.h"
+#include "contact.h"
+#include "core.h"
+#include "joint.h"
+#include "solver_set.h"
+#include "world.h"
+
+#include <stddef.h>
+
+B2_ARRAY_SOURCE( b2Island, b2Island )
+B2_ARRAY_SOURCE( b2IslandSim, b2IslandSim )
+
+b2Island* b2CreateIsland( b2World* world, int setIndex )
+{
+	B2_ASSERT( setIndex == b2_awakeSet || setIndex >= b2_firstSleepingSet );
+
+	int islandId = b2AllocId( &world->islandIdPool );
+
+	if ( islandId == world->islands.count )
+	{
+		b2Island emptyIsland = { 0 };
+		b2IslandArray_Push( &world->islands, emptyIsland );
+	}
+	else
+	{
+		B2_ASSERT( world->islands.data[islandId].setIndex == B2_NULL_INDEX );
+	}
+
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+	island->setIndex = setIndex;
+	island->localIndex = set->islandSims.count;
+	island->islandId = islandId;
+	island->headBody = B2_NULL_INDEX;
+	island->tailBody = B2_NULL_INDEX;
+	island->bodyCount = 0;
+	island->headContact = B2_NULL_INDEX;
+	island->tailContact = B2_NULL_INDEX;
+	island->contactCount = 0;
+	island->headJoint = B2_NULL_INDEX;
+	island->tailJoint = B2_NULL_INDEX;
+	island->jointCount = 0;
+	island->parentIsland = B2_NULL_INDEX;
+	island->constraintRemoveCount = 0;
+
+	b2IslandSim* islandSim = b2IslandSimArray_Add( &set->islandSims );
+	islandSim->islandId = islandId;
+
+	return island;
+}
+
+void b2DestroyIsland( b2World* world, int islandId )
+{
+	if (world->splitIslandId == islandId)
+	{
+		world->splitIslandId = B2_NULL_INDEX;
+	}
+
+	// assume island is empty
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, island->setIndex );
+	int movedIndex = b2IslandSimArray_RemoveSwap( &set->islandSims, island->localIndex );
+	if ( movedIndex != B2_NULL_INDEX )
+	{
+		// Fix index on moved element
+		b2IslandSim* movedElement = set->islandSims.data + island->localIndex;
+		int movedId = movedElement->islandId;
+		b2Island* movedIsland = b2IslandArray_Get( &world->islands, movedId );
+		B2_ASSERT( movedIsland->localIndex == movedIndex );
+		movedIsland->localIndex = island->localIndex;
+	}
+
+	// Free island and id (preserve island revision)
+	island->islandId = B2_NULL_INDEX;
+	island->setIndex = B2_NULL_INDEX;
+	island->localIndex = B2_NULL_INDEX;
+	b2FreeId( &world->islandIdPool, islandId );
+}
+
+static void b2AddContactToIsland( b2World* world, int islandId, b2Contact* contact )
+{
+	B2_ASSERT( contact->islandId == B2_NULL_INDEX );
+	B2_ASSERT( contact->islandPrev == B2_NULL_INDEX );
+	B2_ASSERT( contact->islandNext == B2_NULL_INDEX );
+
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+	if ( island->headContact != B2_NULL_INDEX )
+	{
+		contact->islandNext = island->headContact;
+		b2Contact* headContact = b2ContactArray_Get( &world->contacts, island->headContact);
+		headContact->islandPrev = contact->contactId;
+	}
+
+	island->headContact = contact->contactId;
+	if ( island->tailContact == B2_NULL_INDEX )
+	{
+		island->tailContact = island->headContact;
+	}
+
+	island->contactCount += 1;
+	contact->islandId = islandId;
+
+	b2ValidateIsland( world, islandId );
+}
+
+// Link a contact into an island.
+// This performs union-find and path compression to join islands.
+// https://en.wikipedia.org/wiki/Disjoint-set_data_structure
+void b2LinkContact( b2World* world, b2Contact* contact )
+{
+	B2_ASSERT( ( contact->flags & b2_contactTouchingFlag ) != 0 );
+
+	int bodyIdA = contact->edges[0].bodyId;
+	int bodyIdB = contact->edges[1].bodyId;
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, bodyIdA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, bodyIdB );
+
+	B2_ASSERT( bodyA->setIndex != b2_disabledSet && bodyB->setIndex != b2_disabledSet );
+	B2_ASSERT( bodyA->setIndex != b2_staticSet || bodyB->setIndex != b2_staticSet );
+
+	// Wake bodyB if bodyA is awake and bodyB is sleeping
+	if ( bodyA->setIndex == b2_awakeSet && bodyB->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeSolverSet( world, bodyB->setIndex );
+	}
+
+	// Wake bodyA if bodyB is awake and bodyA is sleeping
+	if ( bodyB->setIndex == b2_awakeSet && bodyA->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeSolverSet( world, bodyA->setIndex );
+	}
+
+	int islandIdA = bodyA->islandId;
+	int islandIdB = bodyB->islandId;
+
+	// Static bodies have null island indices.
+	B2_ASSERT( bodyA->setIndex != b2_staticSet || islandIdA == B2_NULL_INDEX );
+	B2_ASSERT( bodyB->setIndex != b2_staticSet || islandIdB == B2_NULL_INDEX );
+	B2_ASSERT( islandIdA != B2_NULL_INDEX || islandIdB != B2_NULL_INDEX );
+
+	if ( islandIdA == islandIdB )
+	{
+		// Contact in same island
+		b2AddContactToIsland( world, islandIdA, contact );
+		return;
+	}
+
+	// Union-find root of islandA
+	b2Island* islandA = NULL;
+	if ( islandIdA != B2_NULL_INDEX )
+	{
+		islandA = b2IslandArray_Get( &world->islands, islandIdA );
+		int parentId = islandA->parentIsland;
+		while ( parentId != B2_NULL_INDEX )
+		{
+			b2Island* parent = b2IslandArray_Get( &world->islands, parentId );
+			if ( parent->parentIsland != B2_NULL_INDEX )
+			{
+				// path compression
+				islandA->parentIsland = parent->parentIsland;
+			}
+
+			islandA = parent;
+			islandIdA = parentId;
+			parentId = islandA->parentIsland;
+		}
+	}
+
+	// Union-find root of islandB
+	b2Island* islandB = NULL;
+	if ( islandIdB != B2_NULL_INDEX )
+	{
+		islandB = b2IslandArray_Get( &world->islands, islandIdB );
+		int parentId = islandB->parentIsland;
+		while ( islandB->parentIsland != B2_NULL_INDEX )
+		{
+			b2Island* parent = b2IslandArray_Get( &world->islands, parentId );
+			if ( parent->parentIsland != B2_NULL_INDEX )
+			{
+				// path compression
+				islandB->parentIsland = parent->parentIsland;
+			}
+
+			islandB = parent;
+			islandIdB = parentId;
+			parentId = islandB->parentIsland;
+		}
+	}
+
+	B2_ASSERT( islandA != NULL || islandB != NULL );
+
+	// Union-Find link island roots
+	if ( islandA != islandB && islandA != NULL && islandB != NULL )
+	{
+		B2_ASSERT( islandA != islandB );
+		B2_ASSERT( islandB->parentIsland == B2_NULL_INDEX );
+		islandB->parentIsland = islandIdA;
+	}
+
+	if ( islandA != NULL )
+	{
+		b2AddContactToIsland( world, islandIdA, contact );
+	}
+	else
+	{
+		b2AddContactToIsland( world, islandIdB, contact );
+	}
+
+	// todo why not merge the islands right here?
+}
+
+// This is called when a contact no longer has contact points or when a contact is destroyed.
+void b2UnlinkContact( b2World* world, b2Contact* contact )
+{
+	B2_ASSERT( contact->islandId != B2_NULL_INDEX );
+
+	// remove from island
+	int islandId = contact->islandId;
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+	if ( contact->islandPrev != B2_NULL_INDEX )
+	{
+		b2Contact* prevContact = b2ContactArray_Get( &world->contacts, contact->islandPrev);
+		B2_ASSERT( prevContact->islandNext == contact->contactId );
+		prevContact->islandNext = contact->islandNext;
+	}
+
+	if ( contact->islandNext != B2_NULL_INDEX )
+	{
+		b2Contact* nextContact = b2ContactArray_Get( &world->contacts, contact->islandNext );
+		B2_ASSERT( nextContact->islandPrev == contact->contactId );
+		nextContact->islandPrev = contact->islandPrev;
+	}
+
+	if ( island->headContact == contact->contactId )
+	{
+		island->headContact = contact->islandNext;
+	}
+
+	if ( island->tailContact == contact->contactId )
+	{
+		island->tailContact = contact->islandPrev;
+	}
+
+	B2_ASSERT( island->contactCount > 0 );
+	island->contactCount -= 1;
+	island->constraintRemoveCount += 1;
+
+	contact->islandId = B2_NULL_INDEX;
+	contact->islandPrev = B2_NULL_INDEX;
+	contact->islandNext = B2_NULL_INDEX;
+
+	b2ValidateIsland( world, islandId );
+}
+
+static void b2AddJointToIsland( b2World* world, int islandId, b2Joint* joint )
+{
+	B2_ASSERT( joint->islandId == B2_NULL_INDEX );
+	B2_ASSERT( joint->islandPrev == B2_NULL_INDEX );
+	B2_ASSERT( joint->islandNext == B2_NULL_INDEX );
+
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+	if ( island->headJoint != B2_NULL_INDEX )
+	{
+		joint->islandNext = island->headJoint;
+		b2Joint* headJoint = b2JointArray_Get( &world->joints, island->headJoint );
+		headJoint->islandPrev = joint->jointId;
+	}
+
+	island->headJoint = joint->jointId;
+	if ( island->tailJoint == B2_NULL_INDEX )
+	{
+		island->tailJoint = island->headJoint;
+	}
+
+	island->jointCount += 1;
+	joint->islandId = islandId;
+
+	b2ValidateIsland( world, islandId );
+}
+
+void b2LinkJoint( b2World* world, b2Joint* joint, bool mergeIslands )
+{
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, joint->edges[0].bodyId );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, joint->edges[1].bodyId );
+
+	if ( bodyA->setIndex == b2_awakeSet && bodyB->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeSolverSet( world, bodyB->setIndex );
+	}
+	else if ( bodyB->setIndex == b2_awakeSet && bodyA->setIndex >= b2_firstSleepingSet )
+	{
+		b2WakeSolverSet( world, bodyA->setIndex );
+	}
+
+	int islandIdA = bodyA->islandId;
+	int islandIdB = bodyB->islandId;
+
+	B2_ASSERT( islandIdA != B2_NULL_INDEX || islandIdB != B2_NULL_INDEX );
+
+	if ( islandIdA == islandIdB )
+	{
+		// Joint in same island
+		b2AddJointToIsland( world, islandIdA, joint );
+		return;
+	}
+
+	// Union-find root of islandA
+	b2Island* islandA = NULL;
+	if ( islandIdA != B2_NULL_INDEX )
+	{
+		islandA = b2IslandArray_Get( &world->islands, islandIdA );
+		while ( islandA->parentIsland != B2_NULL_INDEX )
+		{
+			b2Island* parent = b2IslandArray_Get( &world->islands, islandA->parentIsland );
+			if ( parent->parentIsland != B2_NULL_INDEX )
+			{
+				// path compression
+				islandA->parentIsland = parent->parentIsland;
+			}
+
+			islandIdA = islandA->parentIsland;
+			islandA = parent;
+		}
+	}
+
+	// Union-find root of islandB
+	b2Island* islandB = NULL;
+	if ( islandIdB != B2_NULL_INDEX )
+	{
+		islandB = b2IslandArray_Get( &world->islands, islandIdB );
+		while ( islandB->parentIsland != B2_NULL_INDEX )
+		{
+			b2Island* parent = b2IslandArray_Get( &world->islands, islandB->parentIsland );
+			if ( parent->parentIsland != B2_NULL_INDEX )
+			{
+				// path compression
+				islandB->parentIsland = parent->parentIsland;
+			}
+
+			islandIdB = islandB->parentIsland;
+			islandB = parent;
+		}
+	}
+
+	B2_ASSERT( islandA != NULL || islandB != NULL );
+
+	// Union-Find link island roots
+	if ( islandA != islandB && islandA != NULL && islandB != NULL )
+	{
+		B2_ASSERT( islandA != islandB );
+		B2_ASSERT( islandB->parentIsland == B2_NULL_INDEX );
+		islandB->parentIsland = islandIdA;
+	}
+
+	if ( islandA != NULL )
+	{
+		b2AddJointToIsland( world, islandIdA, joint );
+	}
+	else
+	{
+		b2AddJointToIsland( world, islandIdB, joint );
+	}
+
+	// Joints need to have islands merged immediately when they are created
+	// to keep the island graph valid.
+	// However, when a body type is being changed the merge can be deferred until
+	// all joints are linked.
+	if (mergeIslands)
+	{
+		b2MergeAwakeIslands( world );
+	}
+}
+
+void b2UnlinkJoint( b2World* world, b2Joint* joint )
+{
+	B2_ASSERT( joint->islandId != B2_NULL_INDEX );
+
+	// remove from island
+	int islandId = joint->islandId;
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+	if ( joint->islandPrev != B2_NULL_INDEX )
+	{
+		b2Joint* prevJoint = b2JointArray_Get( &world->joints, joint->islandPrev );
+		B2_ASSERT( prevJoint->islandNext == joint->jointId );
+		prevJoint->islandNext = joint->islandNext;
+	}
+
+	if ( joint->islandNext != B2_NULL_INDEX )
+	{
+		b2Joint* nextJoint = b2JointArray_Get( &world->joints, joint->islandNext );
+		B2_ASSERT( nextJoint->islandPrev == joint->jointId );
+		nextJoint->islandPrev = joint->islandPrev;
+	}
+
+	if ( island->headJoint == joint->jointId )
+	{
+		island->headJoint = joint->islandNext;
+	}
+
+	if ( island->tailJoint == joint->jointId )
+	{
+		island->tailJoint = joint->islandPrev;
+	}
+
+	B2_ASSERT( island->jointCount > 0 );
+	island->jointCount -= 1;
+	island->constraintRemoveCount += 1;
+
+	joint->islandId = B2_NULL_INDEX;
+	joint->islandPrev = B2_NULL_INDEX;
+	joint->islandNext = B2_NULL_INDEX;
+
+	b2ValidateIsland( world, islandId );
+}
+
+// Merge an island into its root island.
+// todo we can assume all islands are awake here
+static void b2MergeIsland( b2World* world, b2Island* island )
+{
+	B2_ASSERT( island->parentIsland != B2_NULL_INDEX );
+
+	int rootId = island->parentIsland;
+	b2Island* rootIsland = b2IslandArray_Get( &world->islands, rootId );
+	B2_ASSERT( rootIsland->parentIsland == B2_NULL_INDEX );
+
+	// remap island indices
+	int bodyId = island->headBody;
+	while ( bodyId != B2_NULL_INDEX )
+	{
+		b2Body* body = b2BodyArray_Get( &world->bodies, bodyId );
+		body->islandId = rootId;
+		bodyId = body->islandNext;
+	}
+
+	int contactId = island->headContact;
+	while ( contactId != B2_NULL_INDEX )
+	{
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+		contact->islandId = rootId;
+		contactId = contact->islandNext;
+	}
+
+	int jointId = island->headJoint;
+	while ( jointId != B2_NULL_INDEX )
+	{
+		b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+		joint->islandId = rootId;
+		jointId = joint->islandNext;
+	}
+
+	// connect body lists
+	B2_ASSERT( rootIsland->tailBody != B2_NULL_INDEX );
+	b2Body* tailBody = b2BodyArray_Get( &world->bodies, rootIsland->tailBody );
+	B2_ASSERT( tailBody->islandNext == B2_NULL_INDEX );
+	tailBody->islandNext = island->headBody;
+
+	B2_ASSERT( island->headBody != B2_NULL_INDEX );
+	b2Body* headBody = b2BodyArray_Get( &world->bodies, island->headBody );
+	B2_ASSERT( headBody->islandPrev == B2_NULL_INDEX );
+	headBody->islandPrev = rootIsland->tailBody;
+
+	rootIsland->tailBody = island->tailBody;
+	rootIsland->bodyCount += island->bodyCount;
+
+	// connect contact lists
+	if ( rootIsland->headContact == B2_NULL_INDEX )
+	{
+		// Root island has no contacts
+		B2_ASSERT( rootIsland->tailContact == B2_NULL_INDEX && rootIsland->contactCount == 0 );
+		rootIsland->headContact = island->headContact;
+		rootIsland->tailContact = island->tailContact;
+		rootIsland->contactCount = island->contactCount;
+	}
+	else if ( island->headContact != B2_NULL_INDEX )
+	{
+		// Both islands have contacts
+		B2_ASSERT( island->tailContact != B2_NULL_INDEX && island->contactCount > 0 );
+		B2_ASSERT( rootIsland->tailContact != B2_NULL_INDEX && rootIsland->contactCount > 0 );
+
+		b2Contact* tailContact = b2ContactArray_Get( &world->contacts, rootIsland->tailContact );
+		B2_ASSERT( tailContact->islandNext == B2_NULL_INDEX );
+		tailContact->islandNext = island->headContact;
+
+		b2Contact* headContact = b2ContactArray_Get( &world->contacts, island->headContact );
+		B2_ASSERT( headContact->islandPrev == B2_NULL_INDEX );
+		headContact->islandPrev = rootIsland->tailContact;
+
+		rootIsland->tailContact = island->tailContact;
+		rootIsland->contactCount += island->contactCount;
+	}
+
+	if ( rootIsland->headJoint == B2_NULL_INDEX )
+	{
+		// Root island has no joints
+		B2_ASSERT( rootIsland->tailJoint == B2_NULL_INDEX && rootIsland->jointCount == 0 );
+		rootIsland->headJoint = island->headJoint;
+		rootIsland->tailJoint = island->tailJoint;
+		rootIsland->jointCount = island->jointCount;
+	}
+	else if ( island->headJoint != B2_NULL_INDEX )
+	{
+		// Both islands have joints
+		B2_ASSERT( island->tailJoint != B2_NULL_INDEX && island->jointCount > 0 );
+		B2_ASSERT( rootIsland->tailJoint != B2_NULL_INDEX && rootIsland->jointCount > 0 );
+
+		b2Joint* tailJoint = b2JointArray_Get( &world->joints, rootIsland->tailJoint );
+		B2_ASSERT( tailJoint->islandNext == B2_NULL_INDEX );
+		tailJoint->islandNext = island->headJoint;
+
+		b2Joint* headJoint = b2JointArray_Get( &world->joints, island->headJoint );
+		B2_ASSERT( headJoint->islandPrev == B2_NULL_INDEX );
+		headJoint->islandPrev = rootIsland->tailJoint;
+
+		rootIsland->tailJoint = island->tailJoint;
+		rootIsland->jointCount += island->jointCount;
+	}
+
+	// Track removed constraints
+	rootIsland->constraintRemoveCount += island->constraintRemoveCount;
+
+	b2ValidateIsland( world, rootId );
+}
+
+// Iterate over all awake islands and merge any that need merging
+// Islands that get merged into a root island will be removed from the awake island array
+// and returned to the pool.
+// todo this might be faster if b2IslandSim held the connectivity data
+void b2MergeAwakeIslands( b2World* world )
+{
+	b2TracyCZoneNC( merge_islands, "Merge Islands", b2_colorMediumTurquoise, true );
+
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	b2IslandSim* islandSims = awakeSet->islandSims.data;
+	int awakeIslandCount = awakeSet->islandSims.count;
+
+	// Step 1: Ensure every child island points to its root island. This avoids merging a child island with
+	// a parent island that has already been merged with a grand-parent island.
+	for ( int i = 0; i < awakeIslandCount; ++i )
+	{
+		int islandId = islandSims[i].islandId;
+
+		b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+		// find the root island
+		int rootId = islandId;
+		b2Island* rootIsland = island;
+		while ( rootIsland->parentIsland != B2_NULL_INDEX )
+		{
+			b2Island* parent = b2IslandArray_Get( &world->islands, rootIsland->parentIsland );
+			if ( parent->parentIsland != B2_NULL_INDEX )
+			{
+				// path compression
+				rootIsland->parentIsland = parent->parentIsland;
+			}
+
+			rootId = rootIsland->parentIsland;
+			rootIsland = parent;
+		}
+
+		if ( rootIsland != island )
+		{
+			island->parentIsland = rootId;
+		}
+	}
+
+	// Step 2: merge every awake island into its parent (which must be a root island)
+	// Reverse to support removal from awake array.
+	for ( int i = awakeIslandCount - 1; i >= 0; --i )
+	{
+		int islandId = islandSims[i].islandId;
+		b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+		if ( island->parentIsland == B2_NULL_INDEX )
+		{
+			continue;
+		}
+
+		b2MergeIsland( world, island );
+
+		// this call does a remove swap from the end of the island sim array
+		b2DestroyIsland( world, islandId );
+	}
+
+	b2ValidateConnectivity( world );
+
+	b2TracyCZoneEnd( merge_islands );
+}
+
+#define B2_CONTACT_REMOVE_THRESHOLD 1
+
+void b2SplitIsland( b2World* world, int baseId )
+{
+	b2Island* baseIsland = b2IslandArray_Get( &world->islands, baseId );
+	int setIndex = baseIsland->setIndex;
+
+	if ( setIndex != b2_awakeSet )
+	{
+		// can only split awake island
+		return;
+	}
+
+	if ( baseIsland->constraintRemoveCount == 0 )
+	{
+		// this island doesn't need to be split
+		return;
+	}
+
+	b2ValidateIsland( world, baseId );
+
+	int bodyCount = baseIsland->bodyCount;
+
+	b2Body* bodies = world->bodies.data;
+	b2ArenaAllocator* alloc = &world->arena;
+
+	// No lock is needed because I ensure the allocator is not used while this task is active.
+	int* stack = b2AllocateArenaItem( alloc, bodyCount * sizeof( int ), "island stack" );
+	int* bodyIds = b2AllocateArenaItem( alloc, bodyCount * sizeof( int ), "body ids" );
+
+	// Build array containing all body indices from base island. These
+	// serve as seed bodies for the depth first search (DFS).
+	int index = 0;
+	int nextBody = baseIsland->headBody;
+	while ( nextBody != B2_NULL_INDEX )
+	{
+		bodyIds[index++] = nextBody;
+		b2Body* body = bodies + nextBody;
+
+		// Clear visitation mark
+		body->isMarked = false;
+
+		nextBody = body->islandNext;
+	}
+	B2_ASSERT( index == bodyCount );
+
+	// Clear contact island flags. Only need to consider contacts
+	// already in the base island.
+	int nextContactId = baseIsland->headContact;
+	while ( nextContactId != B2_NULL_INDEX )
+	{
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, nextContactId );
+		contact->isMarked = false;
+		nextContactId = contact->islandNext;
+	}
+
+	// Clear joint island flags.
+	int nextJoint = baseIsland->headJoint;
+	while ( nextJoint != B2_NULL_INDEX )
+	{
+		b2Joint* joint = b2JointArray_Get( &world->joints, nextJoint );
+		joint->isMarked = false;
+		nextJoint = joint->islandNext;
+	}
+
+	// Done with the base split island.
+	b2DestroyIsland( world, baseId );
+
+	// Each island is found as a depth first search starting from a seed body
+	for ( int i = 0; i < bodyCount; ++i )
+	{
+		int seedIndex = bodyIds[i];
+		b2Body* seed = bodies + seedIndex;
+		B2_ASSERT( seed->setIndex == setIndex );
+
+		if ( seed->isMarked == true )
+		{
+			// The body has already been visited
+			continue;
+		}
+
+		int stackCount = 0;
+		stack[stackCount++] = seedIndex;
+		seed->isMarked = true;
+
+		// Create new island
+		// No lock needed because only a single island can split per time step. No islands are being used during the constraint
+		// solve. However, islands are touched during body finalization.
+		b2Island* island = b2CreateIsland( world, setIndex );
+
+		int islandId = island->islandId;
+
+		// Perform a depth first search (DFS) on the constraint graph.
+		while ( stackCount > 0 )
+		{
+			// Grab the next body off the stack and add it to the island.
+			int bodyId = stack[--stackCount];
+			b2Body* body = bodies + bodyId;
+			B2_ASSERT( body->setIndex == b2_awakeSet );
+			B2_ASSERT( body->isMarked == true );
+
+			// Add body to island
+			body->islandId = islandId;
+			if ( island->tailBody != B2_NULL_INDEX )
+			{
+				bodies[island->tailBody].islandNext = bodyId;
+			}
+			body->islandPrev = island->tailBody;
+			body->islandNext = B2_NULL_INDEX;
+			island->tailBody = bodyId;
+
+			if ( island->headBody == B2_NULL_INDEX )
+			{
+				island->headBody = bodyId;
+			}
+
+			island->bodyCount += 1;
+
+			// Search all contacts connected to this body.
+			int contactKey = body->headContactKey;
+			while ( contactKey != B2_NULL_INDEX )
+			{
+				int contactId = contactKey >> 1;
+				int edgeIndex = contactKey & 1;
+
+				b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+				B2_ASSERT( contact->contactId == contactId );
+
+				// Next key
+				contactKey = contact->edges[edgeIndex].nextKey;
+
+				// Has this contact already been added to this island?
+				if ( contact->isMarked )
+				{
+					continue;
+				}
+
+				// Is this contact enabled and touching?
+				if ( ( contact->flags & b2_contactTouchingFlag ) == 0 )
+				{
+					continue;
+				}
+
+				contact->isMarked = true;
+
+				int otherEdgeIndex = edgeIndex ^ 1;
+				int otherBodyId = contact->edges[otherEdgeIndex].bodyId;
+				b2Body* otherBody = bodies + otherBodyId;
+
+				// Maybe add other body to stack
+				if ( otherBody->isMarked == false && otherBody->setIndex != b2_staticSet )
+				{
+					B2_ASSERT( stackCount < bodyCount );
+					stack[stackCount++] = otherBodyId;
+					otherBody->isMarked = true;
+				}
+
+				// Add contact to island
+				contact->islandId = islandId;
+				if ( island->tailContact != B2_NULL_INDEX )
+				{
+					b2Contact* tailContact = b2ContactArray_Get( &world->contacts, island->tailContact );
+					tailContact->islandNext = contactId;
+				}
+				contact->islandPrev = island->tailContact;
+				contact->islandNext = B2_NULL_INDEX;
+				island->tailContact = contactId;
+
+				if ( island->headContact == B2_NULL_INDEX )
+				{
+					island->headContact = contactId;
+				}
+
+				island->contactCount += 1;
+			}
+
+			// Search all joints connect to this body.
+			int jointKey = body->headJointKey;
+			while ( jointKey != B2_NULL_INDEX )
+			{
+				int jointId = jointKey >> 1;
+				int edgeIndex = jointKey & 1;
+
+				b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+				B2_ASSERT( joint->jointId == jointId );
+
+				// Next key
+				jointKey = joint->edges[edgeIndex].nextKey;
+
+				// Has this joint already been added to this island?
+				if ( joint->isMarked )
+				{
+					continue;
+				}
+
+				joint->isMarked = true;
+
+				int otherEdgeIndex = edgeIndex ^ 1;
+				int otherBodyId = joint->edges[otherEdgeIndex].bodyId;
+				b2Body* otherBody = bodies + otherBodyId;
+
+				// Don't simulate joints connected to disabled bodies.
+				if ( otherBody->setIndex == b2_disabledSet )
+				{
+					continue;
+				}
+
+				// Maybe add other body to stack
+				if ( otherBody->isMarked == false && otherBody->setIndex == b2_awakeSet )
+				{
+					B2_ASSERT( stackCount < bodyCount );
+					stack[stackCount++] = otherBodyId;
+					otherBody->isMarked = true;
+				}
+
+				// Add joint to island
+				joint->islandId = islandId;
+				if ( island->tailJoint != B2_NULL_INDEX )
+				{
+					b2Joint* tailJoint = b2JointArray_Get( &world->joints, island->tailJoint );
+					tailJoint->islandNext = jointId;
+				}
+				joint->islandPrev = island->tailJoint;
+				joint->islandNext = B2_NULL_INDEX;
+				island->tailJoint = jointId;
+
+				if ( island->headJoint == B2_NULL_INDEX )
+				{
+					island->headJoint = jointId;
+				}
+
+				island->jointCount += 1;
+			}
+		}
+
+		b2ValidateIsland( world, islandId );
+	}
+
+	b2FreeArenaItem( alloc, bodyIds );
+	b2FreeArenaItem( alloc, stack );
+}
+
+// Split an island because some contacts and/or joints have been removed.
+// This is called during the constraint solve while islands are not being touched. This uses DFS and touches a lot of memory,
+// so it can be quite slow.
+// Note: contacts/joints connected to static bodies must belong to an island but don't affect island connectivity
+// Note: static bodies are never in an island
+// Note: this task interacts with some allocators without locks under the assumption that no other tasks
+// are interacting with these data structures.
+void b2SplitIslandTask( int startIndex, int endIndex, uint32_t threadIndex, void* context )
+{
+	b2TracyCZoneNC( split, "Split Island", b2_colorOlive, true );
+
+	B2_UNUSED( startIndex, endIndex, threadIndex );
+
+	uint64_t ticks = b2GetTicks();
+	b2World* world = context;
+
+	B2_ASSERT( world->splitIslandId != B2_NULL_INDEX );
+
+	b2SplitIsland( world, world->splitIslandId );
+
+	world->profile.splitIslands += b2GetMilliseconds( ticks );
+	b2TracyCZoneEnd( split );
+}
+
+#if B2_VALIDATE
+void b2ValidateIsland( b2World* world, int islandId )
+{
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+	B2_ASSERT( island->islandId == islandId );
+	B2_ASSERT( island->setIndex != B2_NULL_INDEX );
+	B2_ASSERT( island->headBody != B2_NULL_INDEX );
+
+	{
+		B2_ASSERT( island->tailBody != B2_NULL_INDEX );
+		B2_ASSERT( island->bodyCount > 0 );
+		if ( island->bodyCount > 1 )
+		{
+			B2_ASSERT( island->tailBody != island->headBody );
+		}
+		B2_ASSERT( island->bodyCount <= b2GetIdCount( &world->bodyIdPool ) );
+
+		int count = 0;
+		int bodyId = island->headBody;
+		while ( bodyId != B2_NULL_INDEX )
+		{
+			b2Body* body = b2BodyArray_Get(&world->bodies, bodyId);
+			B2_ASSERT( body->islandId == islandId );
+			B2_ASSERT( body->setIndex == island->setIndex );
+			count += 1;
+
+			if ( count == island->bodyCount )
+			{
+				B2_ASSERT( bodyId == island->tailBody );
+			}
+
+			bodyId = body->islandNext;
+		}
+		B2_ASSERT( count == island->bodyCount );
+	}
+
+	if ( island->headContact != B2_NULL_INDEX )
+	{
+		B2_ASSERT( island->tailContact != B2_NULL_INDEX );
+		B2_ASSERT( island->contactCount > 0 );
+		if ( island->contactCount > 1 )
+		{
+			B2_ASSERT( island->tailContact != island->headContact );
+		}
+		B2_ASSERT( island->contactCount <= b2GetIdCount( &world->contactIdPool ) );
+
+		int count = 0;
+		int contactId = island->headContact;
+		while ( contactId != B2_NULL_INDEX )
+		{
+			b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+			B2_ASSERT( contact->setIndex == island->setIndex );
+			B2_ASSERT( contact->islandId == islandId );
+			count += 1;
+
+			if ( count == island->contactCount )
+			{
+				B2_ASSERT( contactId == island->tailContact );
+			}
+
+			contactId = contact->islandNext;
+		}
+		B2_ASSERT( count == island->contactCount );
+	}
+	else
+	{
+		B2_ASSERT( island->tailContact == B2_NULL_INDEX );
+		B2_ASSERT( island->contactCount == 0 );
+	}
+
+	if ( island->headJoint != B2_NULL_INDEX )
+	{
+		B2_ASSERT( island->tailJoint != B2_NULL_INDEX );
+		B2_ASSERT( island->jointCount > 0 );
+		if ( island->jointCount > 1 )
+		{
+			B2_ASSERT( island->tailJoint != island->headJoint );
+		}
+		B2_ASSERT( island->jointCount <= b2GetIdCount( &world->jointIdPool ) );
+
+		int count = 0;
+		int jointId = island->headJoint;
+		while ( jointId != B2_NULL_INDEX )
+		{
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+			B2_ASSERT( joint->setIndex == island->setIndex );
+			count += 1;
+
+			if ( count == island->jointCount )
+			{
+				B2_ASSERT( jointId == island->tailJoint );
+			}
+
+			jointId = joint->islandNext;
+		}
+		B2_ASSERT( count == island->jointCount );
+	}
+	else
+	{
+		B2_ASSERT( island->tailJoint == B2_NULL_INDEX );
+		B2_ASSERT( island->jointCount == 0 );
+	}
+}
+
+#else
+
+void b2ValidateIsland( b2World* world, int islandId )
+{
+	B2_UNUSED( world );
+	B2_UNUSED( islandId );
+}
+#endif
diff --git a/src/vendor/box2d/island.h b/src/vendor/box2d/island.h
new file mode 100644
index 0000000..0243c01
--- /dev/null
+++ b/src/vendor/box2d/island.h
@@ -0,0 +1,89 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef struct b2Contact b2Contact;
+typedef struct b2Joint b2Joint;
+typedef struct b2World b2World;
+
+// Deterministic solver
+//
+// Collide all awake contacts
+// Use bit array to emit start/stop touching events in defined order, per thread. Try using contact index, assuming contacts are
+// created in a deterministic order. bit-wise OR together bit arrays and issue changes:
+// - start touching: merge islands - temporary linked list - mark root island dirty - wake all - largest island is root
+// - stop touching: increment constraintRemoveCount
+
+// Persistent island for awake bodies, joints, and contacts
+// https://en.wikipedia.org/wiki/Component_(graph_theory)
+// https://en.wikipedia.org/wiki/Dynamic_connectivity
+// map from int to solver set and index
+typedef struct b2Island
+{
+	// index of solver set stored in b2World
+	// may be B2_NULL_INDEX
+	int setIndex;
+
+	// island index within set
+	// may be B2_NULL_INDEX
+	int localIndex;
+
+	int islandId;
+
+	int headBody;
+	int tailBody;
+	int bodyCount;
+
+	int headContact;
+	int tailContact;
+	int contactCount;
+
+	int headJoint;
+	int tailJoint;
+	int jointCount;
+
+	// Union find
+	// todo this could go away if islands are merged immediately with b2LinkJoint and b2LinkContact
+	int parentIsland;
+
+	// Keeps track of how many contacts have been removed from this island.
+	// This is used to determine if an island is a candidate for splitting.
+	int constraintRemoveCount;
+} b2Island;
+
+// This is used to move islands across solver sets
+typedef struct b2IslandSim
+{
+	int islandId;
+} b2IslandSim;
+
+b2Island* b2CreateIsland( b2World* world, int setIndex );
+void b2DestroyIsland( b2World* world, int islandId );
+
+// Link contacts into the island graph when it starts having contact points
+void b2LinkContact( b2World* world, b2Contact* contact );
+
+// Unlink contact from the island graph when it stops having contact points
+void b2UnlinkContact( b2World* world, b2Contact* contact );
+
+// Link a joint into the island graph when it is created
+void b2LinkJoint( b2World* world, b2Joint* joint, bool mergeIslands );
+
+// Unlink a joint from the island graph when it is destroyed
+void b2UnlinkJoint( b2World* world, b2Joint* joint );
+
+void b2MergeAwakeIslands( b2World* world );
+
+void b2SplitIsland( b2World* world, int baseId );
+void b2SplitIslandTask( int startIndex, int endIndex, uint32_t threadIndex, void* context );
+
+void b2ValidateIsland( b2World* world, int islandId );
+
+B2_ARRAY_INLINE( b2Island, b2Island )
+B2_ARRAY_INLINE( b2IslandSim, b2IslandSim )
diff --git a/src/vendor/box2d/joint.c b/src/vendor/box2d/joint.c
new file mode 100644
index 0000000..9ab6207
--- /dev/null
+++ b/src/vendor/box2d/joint.c
@@ -0,0 +1,1268 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "joint.h"
+
+#include "body.h"
+#include "contact.h"
+#include "core.h"
+#include "island.h"
+#include "shape.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+#include <stddef.h>
+#include <string.h>
+
+B2_ARRAY_SOURCE( b2Joint, b2Joint )
+B2_ARRAY_SOURCE( b2JointSim, b2JointSim )
+
+b2DistanceJointDef b2DefaultDistanceJointDef( void )
+{
+	b2DistanceJointDef def = { 0 };
+	def.length = 1.0f;
+	def.maxLength = B2_HUGE;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2MotorJointDef b2DefaultMotorJointDef( void )
+{
+	b2MotorJointDef def = { 0 };
+	def.maxForce = 1.0f;
+	def.maxTorque = 1.0f;
+	def.correctionFactor = 0.3f;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2MouseJointDef b2DefaultMouseJointDef( void )
+{
+	b2MouseJointDef def = { 0 };
+	def.hertz = 4.0f;
+	def.dampingRatio = 1.0f;
+	def.maxForce = 1.0f;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2FilterJointDef b2DefaultFilterJointDef( void )
+{
+	b2FilterJointDef def = { 0 };
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2PrismaticJointDef b2DefaultPrismaticJointDef( void )
+{
+	b2PrismaticJointDef def = { 0 };
+	def.localAxisA = (b2Vec2){ 1.0f, 0.0f };
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2RevoluteJointDef b2DefaultRevoluteJointDef( void )
+{
+	b2RevoluteJointDef def = { 0 };
+	def.drawSize = 0.25f;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2WeldJointDef b2DefaultWeldJointDef( void )
+{
+	b2WeldJointDef def = { 0 };
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2WheelJointDef b2DefaultWheelJointDef( void )
+{
+	b2WheelJointDef def = { 0 };
+	def.localAxisA.y = 1.0f;
+	def.enableSpring = true;
+	def.hertz = 1.0f;
+	def.dampingRatio = 0.7f;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2ExplosionDef b2DefaultExplosionDef( void )
+{
+	b2ExplosionDef def = { 0 };
+	def.maskBits = B2_DEFAULT_MASK_BITS;
+	return def;
+}
+
+b2Joint* b2GetJointFullId( b2World* world, b2JointId jointId )
+{
+	int id = jointId.index1 - 1;
+	b2Joint* joint = b2JointArray_Get( &world->joints, id );
+	B2_ASSERT( joint->jointId == id && joint->generation == jointId.generation );
+	return joint;
+}
+
+b2JointSim* b2GetJointSim( b2World* world, b2Joint* joint )
+{
+	if ( joint->setIndex == b2_awakeSet )
+	{
+		B2_ASSERT( 0 <= joint->colorIndex && joint->colorIndex < B2_GRAPH_COLOR_COUNT );
+		b2GraphColor* color = world->constraintGraph.colors + joint->colorIndex;
+		return b2JointSimArray_Get( &color->jointSims, joint->localIndex );
+	}
+
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, joint->setIndex );
+	return b2JointSimArray_Get( &set->jointSims, joint->localIndex );
+}
+
+b2JointSim* b2GetJointSimCheckType( b2JointId jointId, b2JointType type )
+{
+	B2_UNUSED( type );
+
+	b2World* world = b2GetWorld( jointId.world0 );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return NULL;
+	}
+
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	B2_ASSERT( joint->type == type );
+	b2JointSim* jointSim = b2GetJointSim( world, joint );
+	B2_ASSERT( jointSim->type == type );
+	return jointSim;
+}
+
+typedef struct b2JointPair
+{
+	b2Joint* joint;
+	b2JointSim* jointSim;
+} b2JointPair;
+
+static b2JointPair b2CreateJoint( b2World* world, b2Body* bodyA, b2Body* bodyB, void* userData, float drawSize, b2JointType type,
+								  bool collideConnected )
+{
+	int bodyIdA = bodyA->id;
+	int bodyIdB = bodyB->id;
+	int maxSetIndex = b2MaxInt( bodyA->setIndex, bodyB->setIndex );
+
+	// Create joint id and joint
+	int jointId = b2AllocId( &world->jointIdPool );
+	if ( jointId == world->joints.count )
+	{
+		b2JointArray_Push( &world->joints, (b2Joint){ 0 } );
+	}
+
+	b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+	joint->jointId = jointId;
+	joint->userData = userData;
+	joint->generation += 1;
+	joint->setIndex = B2_NULL_INDEX;
+	joint->colorIndex = B2_NULL_INDEX;
+	joint->localIndex = B2_NULL_INDEX;
+	joint->islandId = B2_NULL_INDEX;
+	joint->islandPrev = B2_NULL_INDEX;
+	joint->islandNext = B2_NULL_INDEX;
+	joint->drawSize = drawSize;
+	joint->type = type;
+	joint->collideConnected = collideConnected;
+	joint->isMarked = false;
+
+	// Doubly linked list on bodyA
+	joint->edges[0].bodyId = bodyIdA;
+	joint->edges[0].prevKey = B2_NULL_INDEX;
+	joint->edges[0].nextKey = bodyA->headJointKey;
+
+	int keyA = ( jointId << 1 ) | 0;
+	if ( bodyA->headJointKey != B2_NULL_INDEX )
+	{
+		b2Joint* jointA = b2JointArray_Get( &world->joints, bodyA->headJointKey >> 1 );
+		b2JointEdge* edgeA = jointA->edges + ( bodyA->headJointKey & 1 );
+		edgeA->prevKey = keyA;
+	}
+	bodyA->headJointKey = keyA;
+	bodyA->jointCount += 1;
+
+	// Doubly linked list on bodyB
+	joint->edges[1].bodyId = bodyIdB;
+	joint->edges[1].prevKey = B2_NULL_INDEX;
+	joint->edges[1].nextKey = bodyB->headJointKey;
+
+	int keyB = ( jointId << 1 ) | 1;
+	if ( bodyB->headJointKey != B2_NULL_INDEX )
+	{
+		b2Joint* jointB = b2JointArray_Get( &world->joints, bodyB->headJointKey >> 1 );
+		b2JointEdge* edgeB = jointB->edges + ( bodyB->headJointKey & 1 );
+		edgeB->prevKey = keyB;
+	}
+	bodyB->headJointKey = keyB;
+	bodyB->jointCount += 1;
+
+	b2JointSim* jointSim;
+
+	if ( bodyA->setIndex == b2_disabledSet || bodyB->setIndex == b2_disabledSet )
+	{
+		// if either body is disabled, create in disabled set
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_disabledSet );
+		joint->setIndex = b2_disabledSet;
+		joint->localIndex = set->jointSims.count;
+
+		jointSim = b2JointSimArray_Add( &set->jointSims );
+		memset( jointSim, 0, sizeof( b2JointSim ) );
+
+		jointSim->jointId = jointId;
+		jointSim->bodyIdA = bodyIdA;
+		jointSim->bodyIdB = bodyIdB;
+	}
+	else if ( bodyA->setIndex == b2_staticSet && bodyB->setIndex == b2_staticSet )
+	{
+		// joint is connecting static bodies
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_staticSet );
+		joint->setIndex = b2_staticSet;
+		joint->localIndex = set->jointSims.count;
+
+		jointSim = b2JointSimArray_Add( &set->jointSims );
+		memset( jointSim, 0, sizeof( b2JointSim ) );
+
+		jointSim->jointId = jointId;
+		jointSim->bodyIdA = bodyIdA;
+		jointSim->bodyIdB = bodyIdB;
+	}
+	else if ( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet )
+	{
+		// if either body is sleeping, wake it
+		if ( maxSetIndex >= b2_firstSleepingSet )
+		{
+			b2WakeSolverSet( world, maxSetIndex );
+		}
+
+		joint->setIndex = b2_awakeSet;
+
+		jointSim = b2CreateJointInGraph( world, joint );
+		jointSim->jointId = jointId;
+		jointSim->bodyIdA = bodyIdA;
+		jointSim->bodyIdB = bodyIdB;
+	}
+	else
+	{
+		// joint connected between sleeping and/or static bodies
+		B2_ASSERT( bodyA->setIndex >= b2_firstSleepingSet || bodyB->setIndex >= b2_firstSleepingSet );
+		B2_ASSERT( bodyA->setIndex != b2_staticSet || bodyB->setIndex != b2_staticSet );
+
+		// joint should go into the sleeping set (not static set)
+		int setIndex = maxSetIndex;
+
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+		joint->setIndex = setIndex;
+		joint->localIndex = set->jointSims.count;
+
+		jointSim = b2JointSimArray_Add( &set->jointSims );
+		memset( jointSim, 0, sizeof( b2JointSim ) );
+
+		jointSim->jointId = jointId;
+		jointSim->bodyIdA = bodyIdA;
+		jointSim->bodyIdB = bodyIdB;
+
+		if ( bodyA->setIndex != bodyB->setIndex && bodyA->setIndex >= b2_firstSleepingSet &&
+			 bodyB->setIndex >= b2_firstSleepingSet )
+		{
+			// merge sleeping sets
+			b2MergeSolverSets( world, bodyA->setIndex, bodyB->setIndex );
+			B2_ASSERT( bodyA->setIndex == bodyB->setIndex );
+
+			// fix potentially invalid set index
+			setIndex = bodyA->setIndex;
+
+			b2SolverSet* mergedSet = b2SolverSetArray_Get( &world->solverSets, setIndex );
+
+			// Careful! The joint sim pointer was orphaned by the set merge.
+			jointSim = b2JointSimArray_Get( &mergedSet->jointSims, joint->localIndex );
+		}
+
+		B2_ASSERT( joint->setIndex == setIndex );
+	}
+
+	B2_ASSERT( jointSim->jointId == jointId );
+	B2_ASSERT( jointSim->bodyIdA == bodyIdA );
+	B2_ASSERT( jointSim->bodyIdB == bodyIdB );
+
+	if ( joint->setIndex > b2_disabledSet )
+	{
+		// Add edge to island graph
+		bool mergeIslands = true;
+		b2LinkJoint( world, joint, mergeIslands );
+	}
+
+	b2ValidateSolverSets( world );
+
+	return (b2JointPair){ joint, jointSim };
+}
+
+static void b2DestroyContactsBetweenBodies( b2World* world, b2Body* bodyA, b2Body* bodyB )
+{
+	int contactKey;
+	int otherBodyId;
+
+	// use the smaller of the two contact lists
+	if ( bodyA->contactCount < bodyB->contactCount )
+	{
+		contactKey = bodyA->headContactKey;
+		otherBodyId = bodyB->id;
+	}
+	else
+	{
+		contactKey = bodyB->headContactKey;
+		otherBodyId = bodyA->id;
+	}
+
+	// no need to wake bodies when a joint removes collision between them
+	bool wakeBodies = false;
+
+	// destroy the contacts
+	while ( contactKey != B2_NULL_INDEX )
+	{
+		int contactId = contactKey >> 1;
+		int edgeIndex = contactKey & 1;
+
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+		contactKey = contact->edges[edgeIndex].nextKey;
+
+		int otherEdgeIndex = edgeIndex ^ 1;
+		if ( contact->edges[otherEdgeIndex].bodyId == otherBodyId )
+		{
+			// Careful, this removes the contact from the current doubly linked list
+			b2DestroyContact( world, contact, wakeBodies );
+		}
+	}
+
+	b2ValidateSolverSets( world );
+}
+
+b2JointId b2CreateDistanceJoint( b2WorldId worldId, const b2DistanceJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	B2_ASSERT( b2Body_IsValid( def->bodyIdA ) );
+	B2_ASSERT( b2Body_IsValid( def->bodyIdB ) );
+	B2_ASSERT( b2IsValidFloat( def->length ) && def->length > 0.0f );
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	b2JointPair pair = b2CreateJoint( world, bodyA, bodyB, def->userData, 1.0f, b2_distanceJoint, def->collideConnected );
+
+	b2JointSim* joint = pair.jointSim;
+	joint->type = b2_distanceJoint;
+	joint->localOriginAnchorA = def->localAnchorA;
+	joint->localOriginAnchorB = def->localAnchorB;
+
+	b2DistanceJoint empty = { 0 };
+	joint->distanceJoint = empty;
+	joint->distanceJoint.length = b2MaxFloat( def->length, B2_LINEAR_SLOP );
+	joint->distanceJoint.hertz = def->hertz;
+	joint->distanceJoint.dampingRatio = def->dampingRatio;
+	joint->distanceJoint.minLength = b2MaxFloat( def->minLength, B2_LINEAR_SLOP );
+	joint->distanceJoint.maxLength = b2MaxFloat( def->minLength, def->maxLength );
+	joint->distanceJoint.maxMotorForce = def->maxMotorForce;
+	joint->distanceJoint.motorSpeed = def->motorSpeed;
+	joint->distanceJoint.enableSpring = def->enableSpring;
+	joint->distanceJoint.enableLimit = def->enableLimit;
+	joint->distanceJoint.enableMotor = def->enableMotor;
+	joint->distanceJoint.impulse = 0.0f;
+	joint->distanceJoint.lowerImpulse = 0.0f;
+	joint->distanceJoint.upperImpulse = 0.0f;
+	joint->distanceJoint.motorImpulse = 0.0f;
+
+	// If the joint prevents collisions, then destroy all contacts between attached bodies
+	if ( def->collideConnected == false )
+	{
+		b2DestroyContactsBetweenBodies( world, bodyA, bodyB );
+	}
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+b2JointId b2CreateMotorJoint( b2WorldId worldId, const b2MotorJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	b2JointPair pair = b2CreateJoint( world, bodyA, bodyB, def->userData, 1.0f, b2_motorJoint, def->collideConnected );
+	b2JointSim* joint = pair.jointSim;
+
+	joint->type = b2_motorJoint;
+	joint->localOriginAnchorA = (b2Vec2){ 0.0f, 0.0f };
+	joint->localOriginAnchorB = (b2Vec2){ 0.0f, 0.0f };
+	joint->motorJoint = (b2MotorJoint){ 0 };
+	joint->motorJoint.linearOffset = def->linearOffset;
+	joint->motorJoint.angularOffset = def->angularOffset;
+	joint->motorJoint.maxForce = def->maxForce;
+	joint->motorJoint.maxTorque = def->maxTorque;
+	joint->motorJoint.correctionFactor = b2ClampFloat( def->correctionFactor, 0.0f, 1.0f );
+
+	// If the joint prevents collisions, then destroy all contacts between attached bodies
+	if ( def->collideConnected == false )
+	{
+		b2DestroyContactsBetweenBodies( world, bodyA, bodyB );
+	}
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+b2JointId b2CreateMouseJoint( b2WorldId worldId, const b2MouseJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	b2Transform transformA = b2GetBodyTransformQuick( world, bodyA );
+	b2Transform transformB = b2GetBodyTransformQuick( world, bodyB );
+
+	b2JointPair pair = b2CreateJoint( world, bodyA, bodyB, def->userData, 1.0f, b2_mouseJoint, def->collideConnected );
+
+	b2JointSim* joint = pair.jointSim;
+	joint->type = b2_mouseJoint;
+	joint->localOriginAnchorA = b2InvTransformPoint( transformA, def->target );
+	joint->localOriginAnchorB = b2InvTransformPoint( transformB, def->target );
+
+	b2MouseJoint empty = { 0 };
+	joint->mouseJoint = empty;
+	joint->mouseJoint.targetA = def->target;
+	joint->mouseJoint.hertz = def->hertz;
+	joint->mouseJoint.dampingRatio = def->dampingRatio;
+	joint->mouseJoint.maxForce = def->maxForce;
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+b2JointId b2CreateFilterJoint( b2WorldId worldId, const b2FilterJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	bool collideConnected = false;
+	b2JointPair pair = b2CreateJoint( world, bodyA, bodyB, def->userData, 1.0f, b2_filterJoint, collideConnected );
+
+	b2JointSim* joint = pair.jointSim;
+	joint->type = b2_filterJoint;
+	joint->localOriginAnchorA = b2Vec2_zero;
+	joint->localOriginAnchorB = b2Vec2_zero;
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+b2JointId b2CreateRevoluteJoint( b2WorldId worldId, const b2RevoluteJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	B2_ASSERT( def->lowerAngle <= def->upperAngle );
+	B2_ASSERT( def->lowerAngle >= -0.95f * B2_PI );
+	B2_ASSERT( def->upperAngle <= 0.95f * B2_PI );
+
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	b2JointPair pair =
+		b2CreateJoint( world, bodyA, bodyB, def->userData, def->drawSize, b2_revoluteJoint, def->collideConnected );
+
+	b2JointSim* joint = pair.jointSim;
+	joint->type = b2_revoluteJoint;
+	joint->localOriginAnchorA = def->localAnchorA;
+	joint->localOriginAnchorB = def->localAnchorB;
+
+	b2RevoluteJoint empty = { 0 };
+	joint->revoluteJoint = empty;
+
+	joint->revoluteJoint.referenceAngle = b2ClampFloat( def->referenceAngle, -B2_PI, B2_PI );
+	joint->revoluteJoint.linearImpulse = b2Vec2_zero;
+	joint->revoluteJoint.axialMass = 0.0f;
+	joint->revoluteJoint.springImpulse = 0.0f;
+	joint->revoluteJoint.motorImpulse = 0.0f;
+	joint->revoluteJoint.lowerImpulse = 0.0f;
+	joint->revoluteJoint.upperImpulse = 0.0f;
+	joint->revoluteJoint.hertz = def->hertz;
+	joint->revoluteJoint.dampingRatio = def->dampingRatio;
+	joint->revoluteJoint.lowerAngle = def->lowerAngle;
+	joint->revoluteJoint.upperAngle = def->upperAngle;
+	joint->revoluteJoint.maxMotorTorque = def->maxMotorTorque;
+	joint->revoluteJoint.motorSpeed = def->motorSpeed;
+	joint->revoluteJoint.enableSpring = def->enableSpring;
+	joint->revoluteJoint.enableLimit = def->enableLimit;
+	joint->revoluteJoint.enableMotor = def->enableMotor;
+
+	// If the joint prevents collisions, then destroy all contacts between attached bodies
+	if ( def->collideConnected == false )
+	{
+		b2DestroyContactsBetweenBodies( world, bodyA, bodyB );
+	}
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+b2JointId b2CreatePrismaticJoint( b2WorldId worldId, const b2PrismaticJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	b2JointPair pair = b2CreateJoint( world, bodyA, bodyB, def->userData, 1.0f, b2_prismaticJoint, def->collideConnected );
+
+	b2JointSim* joint = pair.jointSim;
+	joint->type = b2_prismaticJoint;
+	joint->localOriginAnchorA = def->localAnchorA;
+	joint->localOriginAnchorB = def->localAnchorB;
+
+	b2PrismaticJoint empty = { 0 };
+	joint->prismaticJoint = empty;
+
+	joint->prismaticJoint.localAxisA = b2Normalize( def->localAxisA );
+	joint->prismaticJoint.referenceAngle = def->referenceAngle;
+	joint->prismaticJoint.impulse = b2Vec2_zero;
+	joint->prismaticJoint.axialMass = 0.0f;
+	joint->prismaticJoint.springImpulse = 0.0f;
+	joint->prismaticJoint.motorImpulse = 0.0f;
+	joint->prismaticJoint.lowerImpulse = 0.0f;
+	joint->prismaticJoint.upperImpulse = 0.0f;
+	joint->prismaticJoint.hertz = def->hertz;
+	joint->prismaticJoint.dampingRatio = def->dampingRatio;
+	joint->prismaticJoint.lowerTranslation = def->lowerTranslation;
+	joint->prismaticJoint.upperTranslation = def->upperTranslation;
+	joint->prismaticJoint.maxMotorForce = def->maxMotorForce;
+	joint->prismaticJoint.motorSpeed = def->motorSpeed;
+	joint->prismaticJoint.enableSpring = def->enableSpring;
+	joint->prismaticJoint.enableLimit = def->enableLimit;
+	joint->prismaticJoint.enableMotor = def->enableMotor;
+
+	// If the joint prevents collisions, then destroy all contacts between attached bodies
+	if ( def->collideConnected == false )
+	{
+		b2DestroyContactsBetweenBodies( world, bodyA, bodyB );
+	}
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+b2JointId b2CreateWeldJoint( b2WorldId worldId, const b2WeldJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	b2JointPair pair = b2CreateJoint( world, bodyA, bodyB, def->userData, 1.0f, b2_weldJoint, def->collideConnected );
+
+	b2JointSim* joint = pair.jointSim;
+	joint->type = b2_weldJoint;
+	joint->localOriginAnchorA = def->localAnchorA;
+	joint->localOriginAnchorB = def->localAnchorB;
+
+	b2WeldJoint empty = { 0 };
+	joint->weldJoint = empty;
+	joint->weldJoint.referenceAngle = def->referenceAngle;
+	joint->weldJoint.linearHertz = def->linearHertz;
+	joint->weldJoint.linearDampingRatio = def->linearDampingRatio;
+	joint->weldJoint.angularHertz = def->angularHertz;
+	joint->weldJoint.angularDampingRatio = def->angularDampingRatio;
+	joint->weldJoint.linearImpulse = b2Vec2_zero;
+	joint->weldJoint.angularImpulse = 0.0f;
+
+	// If the joint prevents collisions, then destroy all contacts between attached bodies
+	if ( def->collideConnected == false )
+	{
+		b2DestroyContactsBetweenBodies( world, bodyA, bodyB );
+	}
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+b2JointId b2CreateWheelJoint( b2WorldId worldId, const b2WheelJointDef* def )
+{
+	B2_CHECK_DEF( def );
+	b2World* world = b2GetWorldFromId( worldId );
+
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return (b2JointId){ 0 };
+	}
+
+	b2Body* bodyA = b2GetBodyFullId( world, def->bodyIdA );
+	b2Body* bodyB = b2GetBodyFullId( world, def->bodyIdB );
+
+	b2JointPair pair = b2CreateJoint( world, bodyA, bodyB, def->userData, 1.0f, b2_wheelJoint, def->collideConnected );
+
+	b2JointSim* joint = pair.jointSim;
+	joint->type = b2_wheelJoint;
+	joint->localOriginAnchorA = def->localAnchorA;
+	joint->localOriginAnchorB = def->localAnchorB;
+
+	joint->wheelJoint = (b2WheelJoint){ 0 };
+	joint->wheelJoint.localAxisA = b2Normalize( def->localAxisA );
+	joint->wheelJoint.perpMass = 0.0f;
+	joint->wheelJoint.axialMass = 0.0f;
+	joint->wheelJoint.motorImpulse = 0.0f;
+	joint->wheelJoint.lowerImpulse = 0.0f;
+	joint->wheelJoint.upperImpulse = 0.0f;
+	joint->wheelJoint.lowerTranslation = def->lowerTranslation;
+	joint->wheelJoint.upperTranslation = def->upperTranslation;
+	joint->wheelJoint.maxMotorTorque = def->maxMotorTorque;
+	joint->wheelJoint.motorSpeed = def->motorSpeed;
+	joint->wheelJoint.hertz = def->hertz;
+	joint->wheelJoint.dampingRatio = def->dampingRatio;
+	joint->wheelJoint.enableSpring = def->enableSpring;
+	joint->wheelJoint.enableLimit = def->enableLimit;
+	joint->wheelJoint.enableMotor = def->enableMotor;
+
+	// If the joint prevents collisions, then destroy all contacts between attached bodies
+	if ( def->collideConnected == false )
+	{
+		b2DestroyContactsBetweenBodies( world, bodyA, bodyB );
+	}
+
+	b2JointId jointId = { joint->jointId + 1, world->worldId, pair.joint->generation };
+	return jointId;
+}
+
+void b2DestroyJointInternal( b2World* world, b2Joint* joint, bool wakeBodies )
+{
+	int jointId = joint->jointId;
+
+	b2JointEdge* edgeA = joint->edges + 0;
+	b2JointEdge* edgeB = joint->edges + 1;
+
+	int idA = edgeA->bodyId;
+	int idB = edgeB->bodyId;
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, idA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, idB );
+
+	// Remove from body A
+	if ( edgeA->prevKey != B2_NULL_INDEX )
+	{
+		b2Joint* prevJoint = b2JointArray_Get( &world->joints, edgeA->prevKey >> 1 );
+		b2JointEdge* prevEdge = prevJoint->edges + ( edgeA->prevKey & 1 );
+		prevEdge->nextKey = edgeA->nextKey;
+	}
+
+	if ( edgeA->nextKey != B2_NULL_INDEX )
+	{
+		b2Joint* nextJoint = b2JointArray_Get( &world->joints, edgeA->nextKey >> 1 );
+		b2JointEdge* nextEdge = nextJoint->edges + ( edgeA->nextKey & 1 );
+		nextEdge->prevKey = edgeA->prevKey;
+	}
+
+	int edgeKeyA = ( jointId << 1 ) | 0;
+	if ( bodyA->headJointKey == edgeKeyA )
+	{
+		bodyA->headJointKey = edgeA->nextKey;
+	}
+
+	bodyA->jointCount -= 1;
+
+	// Remove from body B
+	if ( edgeB->prevKey != B2_NULL_INDEX )
+	{
+		b2Joint* prevJoint = b2JointArray_Get( &world->joints, edgeB->prevKey >> 1 );
+		b2JointEdge* prevEdge = prevJoint->edges + ( edgeB->prevKey & 1 );
+		prevEdge->nextKey = edgeB->nextKey;
+	}
+
+	if ( edgeB->nextKey != B2_NULL_INDEX )
+	{
+		b2Joint* nextJoint = b2JointArray_Get( &world->joints, edgeB->nextKey >> 1 );
+		b2JointEdge* nextEdge = nextJoint->edges + ( edgeB->nextKey & 1 );
+		nextEdge->prevKey = edgeB->prevKey;
+	}
+
+	int edgeKeyB = ( jointId << 1 ) | 1;
+	if ( bodyB->headJointKey == edgeKeyB )
+	{
+		bodyB->headJointKey = edgeB->nextKey;
+	}
+
+	bodyB->jointCount -= 1;
+
+	if ( joint->islandId != B2_NULL_INDEX )
+	{
+		B2_ASSERT( joint->setIndex > b2_disabledSet );
+		b2UnlinkJoint( world, joint );
+	}
+	else
+	{
+		B2_ASSERT( joint->setIndex <= b2_disabledSet );
+	}
+
+	// Remove joint from solver set that owns it
+	int setIndex = joint->setIndex;
+	int localIndex = joint->localIndex;
+
+	if ( setIndex == b2_awakeSet )
+	{
+		b2RemoveJointFromGraph( world, joint->edges[0].bodyId, joint->edges[1].bodyId, joint->colorIndex, localIndex );
+	}
+	else
+	{
+		b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+		int movedIndex = b2JointSimArray_RemoveSwap( &set->jointSims, localIndex );
+		if ( movedIndex != B2_NULL_INDEX )
+		{
+			// Fix moved joint
+			b2JointSim* movedJointSim = set->jointSims.data + localIndex;
+			int movedId = movedJointSim->jointId;
+			b2Joint* movedJoint = b2JointArray_Get( &world->joints, movedId );
+			B2_ASSERT( movedJoint->localIndex == movedIndex );
+			movedJoint->localIndex = localIndex;
+		}
+	}
+
+	// Free joint and id (preserve joint generation)
+	joint->setIndex = B2_NULL_INDEX;
+	joint->localIndex = B2_NULL_INDEX;
+	joint->colorIndex = B2_NULL_INDEX;
+	joint->jointId = B2_NULL_INDEX;
+	b2FreeId( &world->jointIdPool, jointId );
+
+	if ( wakeBodies )
+	{
+		b2WakeBody( world, bodyA );
+		b2WakeBody( world, bodyB );
+	}
+
+	b2ValidateSolverSets( world );
+}
+
+void b2DestroyJoint( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	B2_ASSERT( world->locked == false );
+
+	if ( world->locked )
+	{
+		return;
+	}
+
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+
+	b2DestroyJointInternal( world, joint, true );
+}
+
+b2JointType b2Joint_GetType( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	return joint->type;
+}
+
+b2BodyId b2Joint_GetBodyA( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	return b2MakeBodyId( world, joint->edges[0].bodyId );
+}
+
+b2BodyId b2Joint_GetBodyB( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	return b2MakeBodyId( world, joint->edges[1].bodyId );
+}
+
+b2WorldId b2Joint_GetWorld( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	return (b2WorldId){ jointId.world0 + 1, world->generation };
+}
+
+b2Vec2 b2Joint_GetLocalAnchorA( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	b2JointSim* jointSim = b2GetJointSim( world, joint );
+	return jointSim->localOriginAnchorA;
+}
+
+b2Vec2 b2Joint_GetLocalAnchorB( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	b2JointSim* jointSim = b2GetJointSim( world, joint );
+	return jointSim->localOriginAnchorB;
+}
+
+void b2Joint_SetCollideConnected( b2JointId jointId, bool shouldCollide )
+{
+	b2World* world = b2GetWorldLocked( jointId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	if ( joint->collideConnected == shouldCollide )
+	{
+		return;
+	}
+
+	joint->collideConnected = shouldCollide;
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, joint->edges[0].bodyId );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, joint->edges[1].bodyId );
+
+	if ( shouldCollide )
+	{
+		// need to tell the broad-phase to look for new pairs for one of the
+		// two bodies. Pick the one with the fewest shapes.
+		int shapeCountA = bodyA->shapeCount;
+		int shapeCountB = bodyB->shapeCount;
+
+		int shapeId = shapeCountA < shapeCountB ? bodyA->headShapeId : bodyB->headShapeId;
+		while ( shapeId != B2_NULL_INDEX )
+		{
+			b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+			if ( shape->proxyKey != B2_NULL_INDEX )
+			{
+				b2BufferMove( &world->broadPhase, shape->proxyKey );
+			}
+
+			shapeId = shape->nextShapeId;
+		}
+	}
+	else
+	{
+		b2DestroyContactsBetweenBodies( world, bodyA, bodyB );
+	}
+}
+
+bool b2Joint_GetCollideConnected( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	return joint->collideConnected;
+}
+
+void b2Joint_SetUserData( b2JointId jointId, void* userData )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	joint->userData = userData;
+}
+
+void* b2Joint_GetUserData( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	return joint->userData;
+}
+
+void b2Joint_WakeBodies( b2JointId jointId )
+{
+	b2World* world = b2GetWorldLocked( jointId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, joint->edges[0].bodyId );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, joint->edges[1].bodyId );
+
+	b2WakeBody( world, bodyA );
+	b2WakeBody( world, bodyB );
+}
+
+b2Vec2 b2Joint_GetConstraintForce( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	b2JointSim* base = b2GetJointSim( world, joint );
+
+	switch ( joint->type )
+	{
+		case b2_distanceJoint:
+			return b2GetDistanceJointForce( world, base );
+
+		case b2_motorJoint:
+			return b2GetMotorJointForce( world, base );
+
+		case b2_mouseJoint:
+			return b2GetMouseJointForce( world, base );
+
+		case b2_filterJoint:
+			return b2Vec2_zero;
+
+		case b2_prismaticJoint:
+			return b2GetPrismaticJointForce( world, base );
+
+		case b2_revoluteJoint:
+			return b2GetRevoluteJointForce( world, base );
+
+		case b2_weldJoint:
+			return b2GetWeldJointForce( world, base );
+
+		case b2_wheelJoint:
+			return b2GetWheelJointForce( world, base );
+
+		default:
+			B2_ASSERT( false );
+			return b2Vec2_zero;
+	}
+}
+
+float b2Joint_GetConstraintTorque( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	b2JointSim* base = b2GetJointSim( world, joint );
+
+	switch ( joint->type )
+	{
+		case b2_distanceJoint:
+			return 0.0f;
+
+		case b2_motorJoint:
+			return b2GetMotorJointTorque( world, base );
+
+		case b2_mouseJoint:
+			return b2GetMouseJointTorque( world, base );
+
+		case b2_filterJoint:
+			return 0.0f;
+
+		case b2_prismaticJoint:
+			return b2GetPrismaticJointTorque( world, base );
+
+		case b2_revoluteJoint:
+			return b2GetRevoluteJointTorque( world, base );
+
+		case b2_weldJoint:
+			return b2GetWeldJointTorque( world, base );
+
+		case b2_wheelJoint:
+			return b2GetWheelJointTorque( world, base );
+
+		default:
+			B2_ASSERT( false );
+			return 0.0f;
+	}
+}
+
+void b2PrepareJoint( b2JointSim* joint, b2StepContext* context )
+{
+	switch ( joint->type )
+	{
+		case b2_distanceJoint:
+			b2PrepareDistanceJoint( joint, context );
+			break;
+
+		case b2_motorJoint:
+			b2PrepareMotorJoint( joint, context );
+			break;
+
+		case b2_mouseJoint:
+			b2PrepareMouseJoint( joint, context );
+			break;
+
+		case b2_filterJoint:
+			break;
+
+		case b2_prismaticJoint:
+			b2PreparePrismaticJoint( joint, context );
+			break;
+
+		case b2_revoluteJoint:
+			b2PrepareRevoluteJoint( joint, context );
+			break;
+
+		case b2_weldJoint:
+			b2PrepareWeldJoint( joint, context );
+			break;
+
+		case b2_wheelJoint:
+			b2PrepareWheelJoint( joint, context );
+			break;
+
+		default:
+			B2_ASSERT( false );
+	}
+}
+
+void b2WarmStartJoint( b2JointSim* joint, b2StepContext* context )
+{
+	switch ( joint->type )
+	{
+		case b2_distanceJoint:
+			b2WarmStartDistanceJoint( joint, context );
+			break;
+
+		case b2_motorJoint:
+			b2WarmStartMotorJoint( joint, context );
+			break;
+
+		case b2_mouseJoint:
+			b2WarmStartMouseJoint( joint, context );
+			break;
+
+		case b2_filterJoint:
+			break;
+
+		case b2_prismaticJoint:
+			b2WarmStartPrismaticJoint( joint, context );
+			break;
+
+		case b2_revoluteJoint:
+			b2WarmStartRevoluteJoint( joint, context );
+			break;
+
+		case b2_weldJoint:
+			b2WarmStartWeldJoint( joint, context );
+			break;
+
+		case b2_wheelJoint:
+			b2WarmStartWheelJoint( joint, context );
+			break;
+
+		default:
+			B2_ASSERT( false );
+	}
+}
+
+void b2SolveJoint( b2JointSim* joint, b2StepContext* context, bool useBias )
+{
+	switch ( joint->type )
+	{
+		case b2_distanceJoint:
+			b2SolveDistanceJoint( joint, context, useBias );
+			break;
+
+		case b2_motorJoint:
+			b2SolveMotorJoint( joint, context, useBias );
+			break;
+
+		case b2_mouseJoint:
+			b2SolveMouseJoint( joint, context );
+			break;
+
+		case b2_filterJoint:
+			break;
+
+		case b2_prismaticJoint:
+			b2SolvePrismaticJoint( joint, context, useBias );
+			break;
+
+		case b2_revoluteJoint:
+			b2SolveRevoluteJoint( joint, context, useBias );
+			break;
+
+		case b2_weldJoint:
+			b2SolveWeldJoint( joint, context, useBias );
+			break;
+
+		case b2_wheelJoint:
+			b2SolveWheelJoint( joint, context, useBias );
+			break;
+
+		default:
+			B2_ASSERT( false );
+	}
+}
+
+void b2PrepareOverflowJoints( b2StepContext* context )
+{
+	b2TracyCZoneNC( prepare_joints, "PrepJoints", b2_colorOldLace, true );
+
+	b2ConstraintGraph* graph = context->graph;
+	b2JointSim* joints = graph->colors[B2_OVERFLOW_INDEX].jointSims.data;
+	int jointCount = graph->colors[B2_OVERFLOW_INDEX].jointSims.count;
+
+	for ( int i = 0; i < jointCount; ++i )
+	{
+		b2JointSim* joint = joints + i;
+		b2PrepareJoint( joint, context );
+	}
+
+	b2TracyCZoneEnd( prepare_joints );
+}
+
+void b2WarmStartOverflowJoints( b2StepContext* context )
+{
+	b2TracyCZoneNC( prepare_joints, "PrepJoints", b2_colorOldLace, true );
+
+	b2ConstraintGraph* graph = context->graph;
+	b2JointSim* joints = graph->colors[B2_OVERFLOW_INDEX].jointSims.data;
+	int jointCount = graph->colors[B2_OVERFLOW_INDEX].jointSims.count;
+
+	for ( int i = 0; i < jointCount; ++i )
+	{
+		b2JointSim* joint = joints + i;
+		b2WarmStartJoint( joint, context );
+	}
+
+	b2TracyCZoneEnd( prepare_joints );
+}
+
+void b2SolveOverflowJoints( b2StepContext* context, bool useBias )
+{
+	b2TracyCZoneNC( solve_joints, "SolveJoints", b2_colorLemonChiffon, true );
+
+	b2ConstraintGraph* graph = context->graph;
+	b2JointSim* joints = graph->colors[B2_OVERFLOW_INDEX].jointSims.data;
+	int jointCount = graph->colors[B2_OVERFLOW_INDEX].jointSims.count;
+
+	for ( int i = 0; i < jointCount; ++i )
+	{
+		b2JointSim* joint = joints + i;
+		b2SolveJoint( joint, context, useBias );
+	}
+
+	b2TracyCZoneEnd( solve_joints );
+}
+
+void b2DrawJoint( b2DebugDraw* draw, b2World* world, b2Joint* joint )
+{
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, joint->edges[0].bodyId );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, joint->edges[1].bodyId );
+	if ( bodyA->setIndex == b2_disabledSet || bodyB->setIndex == b2_disabledSet )
+	{
+		return;
+	}
+
+	b2JointSim* jointSim = b2GetJointSim( world, joint );
+
+	b2Transform transformA = b2GetBodyTransformQuick( world, bodyA );
+	b2Transform transformB = b2GetBodyTransformQuick( world, bodyB );
+	b2Vec2 pA = b2TransformPoint( transformA, jointSim->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, jointSim->localOriginAnchorB );
+
+	b2HexColor color = b2_colorDarkSeaGreen;
+
+	switch ( joint->type )
+	{
+		case b2_distanceJoint:
+			b2DrawDistanceJoint( draw, jointSim, transformA, transformB );
+			break;
+
+		case b2_mouseJoint:
+		{
+			b2Vec2 target = jointSim->mouseJoint.targetA;
+
+			b2HexColor c1 = b2_colorGreen;
+			draw->DrawPointFcn( target, 4.0f, c1, draw->context );
+			draw->DrawPointFcn( pB, 4.0f, c1, draw->context );
+
+			b2HexColor c2 = b2_colorLightGray;
+			draw->DrawSegmentFcn( target, pB, c2, draw->context );
+		}
+		break;
+
+		case b2_filterJoint:
+		{
+			draw->DrawSegmentFcn( pA, pB, b2_colorGold, draw->context );
+		}
+		break;
+
+		case b2_prismaticJoint:
+			b2DrawPrismaticJoint( draw, jointSim, transformA, transformB );
+			break;
+
+		case b2_revoluteJoint:
+			b2DrawRevoluteJoint( draw, jointSim, transformA, transformB, joint->drawSize );
+			break;
+
+		case b2_wheelJoint:
+			b2DrawWheelJoint( draw, jointSim, transformA, transformB );
+			break;
+
+		default:
+			draw->DrawSegmentFcn( transformA.p, pA, color, draw->context );
+			draw->DrawSegmentFcn( pA, pB, color, draw->context );
+			draw->DrawSegmentFcn( transformB.p, pB, color, draw->context );
+	}
+
+	if ( draw->drawGraphColors )
+	{
+		b2HexColor colors[B2_GRAPH_COLOR_COUNT] = { b2_colorRed,	   b2_colorOrange,	  b2_colorYellow, b2_colorGreen,
+													b2_colorCyan,	   b2_colorBlue,	  b2_colorViolet, b2_colorPink,
+													b2_colorChocolate, b2_colorGoldenRod, b2_colorCoral,  b2_colorBlack };
+
+		int colorIndex = joint->colorIndex;
+		if ( colorIndex != B2_NULL_INDEX )
+		{
+			b2Vec2 p = b2Lerp( pA, pB, 0.5f );
+			draw->DrawPointFcn( p, 5.0f, colors[colorIndex], draw->context );
+		}
+	}
+}
diff --git a/src/vendor/box2d/joint.h b/src/vendor/box2d/joint.h
new file mode 100644
index 0000000..e123661
--- /dev/null
+++ b/src/vendor/box2d/joint.h
@@ -0,0 +1,335 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+#pragma once
+
+#include "array.h"
+#include "solver.h"
+
+#include "box2d/types.h"
+
+typedef struct b2DebugDraw b2DebugDraw;
+typedef struct b2StepContext b2StepContext;
+typedef struct b2World b2World;
+
+/// A joint edge is used to connect bodies and joints together
+/// in a joint graph where each body is a node and each joint
+/// is an edge. A joint edge belongs to a doubly linked list
+/// maintained in each attached body. Each joint has two joint
+/// nodes, one for each attached body.
+typedef struct b2JointEdge
+{
+	int bodyId;
+	int prevKey;
+	int nextKey;
+} b2JointEdge;
+
+// Map from b2JointId to b2Joint in the solver sets
+typedef struct b2Joint
+{
+	void* userData;
+
+	// index of simulation set stored in b2World
+	// B2_NULL_INDEX when slot is free
+	int setIndex;
+
+	// index into the constraint graph color array, may be B2_NULL_INDEX for sleeping/disabled joints
+	// B2_NULL_INDEX when slot is free
+	int colorIndex;
+
+	// joint index within set or graph color
+	// B2_NULL_INDEX when slot is free
+	int localIndex;
+
+	b2JointEdge edges[2];
+
+	int jointId;
+	int islandId;
+	int islandPrev;
+	int islandNext;
+
+	float drawSize;
+
+	b2JointType type;
+
+	// This is monotonically advanced when a body is allocated in this slot
+	// Used to check for invalid b2JointId
+	uint16_t generation;
+
+	bool isMarked;
+	bool collideConnected;
+
+} b2Joint;
+
+typedef struct b2DistanceJoint
+{
+	float length;
+	float hertz;
+	float dampingRatio;
+	float minLength;
+	float maxLength;
+
+	float maxMotorForce;
+	float motorSpeed;
+
+	float impulse;
+	float lowerImpulse;
+	float upperImpulse;
+	float motorImpulse;
+
+	int indexA;
+	int indexB;
+	b2Vec2 anchorA;
+	b2Vec2 anchorB;
+	b2Vec2 deltaCenter;
+	b2Softness distanceSoftness;
+	float axialMass;
+
+	bool enableSpring;
+	bool enableLimit;
+	bool enableMotor;
+} b2DistanceJoint;
+
+typedef struct b2MotorJoint
+{
+	b2Vec2 linearOffset;
+	float angularOffset;
+	b2Vec2 linearImpulse;
+	float angularImpulse;
+	float maxForce;
+	float maxTorque;
+	float correctionFactor;
+
+	int indexA;
+	int indexB;
+	b2Vec2 anchorA;
+	b2Vec2 anchorB;
+	b2Vec2 deltaCenter;
+	float deltaAngle;
+	b2Mat22 linearMass;
+	float angularMass;
+} b2MotorJoint;
+
+typedef struct b2MouseJoint
+{
+	b2Vec2 targetA;
+	float hertz;
+	float dampingRatio;
+	float maxForce;
+
+	b2Vec2 linearImpulse;
+	float angularImpulse;
+
+	b2Softness linearSoftness;
+	b2Softness angularSoftness;
+	int indexB;
+	b2Vec2 anchorB;
+	b2Vec2 deltaCenter;
+	b2Mat22 linearMass;
+} b2MouseJoint;
+
+typedef struct b2PrismaticJoint
+{
+	b2Vec2 localAxisA;
+	b2Vec2 impulse;
+	float springImpulse;
+	float motorImpulse;
+	float lowerImpulse;
+	float upperImpulse;
+	float hertz;
+	float dampingRatio;
+	float maxMotorForce;
+	float motorSpeed;
+	float referenceAngle;
+	float lowerTranslation;
+	float upperTranslation;
+
+	int indexA;
+	int indexB;
+	b2Vec2 anchorA;
+	b2Vec2 anchorB;
+	b2Vec2 axisA;
+	b2Vec2 deltaCenter;
+	float deltaAngle;
+	float axialMass;
+	b2Softness springSoftness;
+
+	bool enableSpring;
+	bool enableLimit;
+	bool enableMotor;
+} b2PrismaticJoint;
+
+typedef struct b2RevoluteJoint
+{
+	b2Vec2 linearImpulse;
+	float springImpulse;
+	float motorImpulse;
+	float lowerImpulse;
+	float upperImpulse;
+	float hertz;
+	float dampingRatio;
+	float maxMotorTorque;
+	float motorSpeed;
+	float referenceAngle;
+	float lowerAngle;
+	float upperAngle;
+
+	int indexA;
+	int indexB;
+	b2Vec2 anchorA;
+	b2Vec2 anchorB;
+	b2Vec2 deltaCenter;
+	float deltaAngle;
+	float axialMass;
+	b2Softness springSoftness;
+
+	bool enableSpring;
+	bool enableMotor;
+	bool enableLimit;
+} b2RevoluteJoint;
+
+typedef struct b2WeldJoint
+{
+	float referenceAngle;
+	float linearHertz;
+	float linearDampingRatio;
+	float angularHertz;
+	float angularDampingRatio;
+
+	b2Softness linearSoftness;
+	b2Softness angularSoftness;
+	b2Vec2 linearImpulse;
+	float angularImpulse;
+
+	int indexA;
+	int indexB;
+	b2Vec2 anchorA;
+	b2Vec2 anchorB;
+	b2Vec2 deltaCenter;
+	float deltaAngle;
+	float axialMass;
+} b2WeldJoint;
+
+typedef struct b2WheelJoint
+{
+	b2Vec2 localAxisA;
+	float perpImpulse;
+	float motorImpulse;
+	float springImpulse;
+	float lowerImpulse;
+	float upperImpulse;
+	float maxMotorTorque;
+	float motorSpeed;
+	float lowerTranslation;
+	float upperTranslation;
+	float hertz;
+	float dampingRatio;
+
+	int indexA;
+	int indexB;
+	b2Vec2 anchorA;
+	b2Vec2 anchorB;
+	b2Vec2 axisA;
+	b2Vec2 deltaCenter;
+	float perpMass;
+	float motorMass;
+	float axialMass;
+	b2Softness springSoftness;
+
+	bool enableSpring;
+	bool enableMotor;
+	bool enableLimit;
+} b2WheelJoint;
+
+/// The base joint class. Joints are used to constraint two bodies together in
+/// various fashions. Some joints also feature limits and motors.
+typedef struct b2JointSim
+{
+	int jointId;
+
+	int bodyIdA;
+	int bodyIdB;
+
+	b2JointType type;
+
+	// Anchors relative to body origin
+	b2Vec2 localOriginAnchorA;
+	b2Vec2 localOriginAnchorB;
+
+	float invMassA, invMassB;
+	float invIA, invIB;
+
+	union
+	{
+		b2DistanceJoint distanceJoint;
+		b2MotorJoint motorJoint;
+		b2MouseJoint mouseJoint;
+		b2RevoluteJoint revoluteJoint;
+		b2PrismaticJoint prismaticJoint;
+		b2WeldJoint weldJoint;
+		b2WheelJoint wheelJoint;
+	};
+} b2JointSim;
+
+void b2DestroyJointInternal( b2World* world, b2Joint* joint, bool wakeBodies );
+
+b2Joint* b2GetJointFullId( b2World* world, b2JointId jointId );
+b2JointSim* b2GetJointSim( b2World* world, b2Joint* joint );
+b2JointSim* b2GetJointSimCheckType( b2JointId jointId, b2JointType type );
+
+void b2PrepareJoint( b2JointSim* joint, b2StepContext* context );
+void b2WarmStartJoint( b2JointSim* joint, b2StepContext* context );
+void b2SolveJoint( b2JointSim* joint, b2StepContext* context, bool useBias );
+
+void b2PrepareOverflowJoints( b2StepContext* context );
+void b2WarmStartOverflowJoints( b2StepContext* context );
+void b2SolveOverflowJoints( b2StepContext* context, bool useBias );
+
+void b2DrawJoint( b2DebugDraw* draw, b2World* world, b2Joint* joint );
+
+b2Vec2 b2GetDistanceJointForce( b2World* world, b2JointSim* base );
+b2Vec2 b2GetMotorJointForce( b2World* world, b2JointSim* base );
+b2Vec2 b2GetMouseJointForce( b2World* world, b2JointSim* base );
+b2Vec2 b2GetPrismaticJointForce( b2World* world, b2JointSim* base );
+b2Vec2 b2GetRevoluteJointForce( b2World* world, b2JointSim* base );
+b2Vec2 b2GetWeldJointForce( b2World* world, b2JointSim* base );
+b2Vec2 b2GetWheelJointForce( b2World* world, b2JointSim* base );
+
+float b2GetMotorJointTorque( b2World* world, b2JointSim* base );
+float b2GetMouseJointTorque( b2World* world, b2JointSim* base );
+float b2GetPrismaticJointTorque( b2World* world, b2JointSim* base );
+float b2GetRevoluteJointTorque( b2World* world, b2JointSim* base );
+float b2GetWeldJointTorque( b2World* world, b2JointSim* base );
+float b2GetWheelJointTorque( b2World* world, b2JointSim* base );
+
+void b2PrepareDistanceJoint( b2JointSim* base, b2StepContext* context );
+void b2PrepareMotorJoint( b2JointSim* base, b2StepContext* context );
+void b2PrepareMouseJoint( b2JointSim* base, b2StepContext* context );
+void b2PreparePrismaticJoint( b2JointSim* base, b2StepContext* context );
+void b2PrepareRevoluteJoint( b2JointSim* base, b2StepContext* context );
+void b2PrepareWeldJoint( b2JointSim* base, b2StepContext* context );
+void b2PrepareWheelJoint( b2JointSim* base, b2StepContext* context );
+
+void b2WarmStartDistanceJoint( b2JointSim* base, b2StepContext* context );
+void b2WarmStartMotorJoint( b2JointSim* base, b2StepContext* context );
+void b2WarmStartMouseJoint( b2JointSim* base, b2StepContext* context );
+void b2WarmStartPrismaticJoint( b2JointSim* base, b2StepContext* context );
+void b2WarmStartRevoluteJoint( b2JointSim* base, b2StepContext* context );
+void b2WarmStartWeldJoint( b2JointSim* base, b2StepContext* context );
+void b2WarmStartWheelJoint( b2JointSim* base, b2StepContext* context );
+
+void b2SolveDistanceJoint( b2JointSim* base, b2StepContext* context, bool useBias );
+void b2SolveMotorJoint( b2JointSim* base, b2StepContext* context, bool useBias );
+void b2SolveMouseJoint( b2JointSim* base, b2StepContext* context );
+void b2SolvePrismaticJoint( b2JointSim* base, b2StepContext* context, bool useBias );
+void b2SolveRevoluteJoint( b2JointSim* base, b2StepContext* context, bool useBias );
+void b2SolveWeldJoint( b2JointSim* base, b2StepContext* context, bool useBias );
+void b2SolveWheelJoint( b2JointSim* base, b2StepContext* context, bool useBias );
+
+void b2DrawDistanceJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB );
+void b2DrawPrismaticJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB );
+void b2DrawRevoluteJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB, float drawSize );
+void b2DrawWheelJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB );
+
+// Define inline functions for arrays
+B2_ARRAY_INLINE( b2Joint, b2Joint )
+B2_ARRAY_INLINE( b2JointSim, b2JointSim )
diff --git a/src/vendor/box2d/manifold.c b/src/vendor/box2d/manifold.c
new file mode 100644
index 0000000..6bfec83
--- /dev/null
+++ b/src/vendor/box2d/manifold.c
@@ -0,0 +1,1726 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "constants.h"
+#include "core.h"
+
+#include "box2d/collision.h"
+#include "box2d/math_functions.h"
+
+#include <float.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#define B2_MAKE_ID( A, B ) ( (uint8_t)( A ) << 8 | (uint8_t)( B ) )
+
+static b2Polygon b2MakeCapsule( b2Vec2 p1, b2Vec2 p2, float radius )
+{
+	b2Polygon shape = { 0 };
+	shape.vertices[0] = p1;
+	shape.vertices[1] = p2;
+	shape.centroid = b2Lerp( p1, p2, 0.5f );
+
+	b2Vec2 d = b2Sub( p2, p1 );
+	B2_ASSERT( b2LengthSquared( d ) > FLT_EPSILON );
+	b2Vec2 axis = b2Normalize( d );
+	b2Vec2 normal = b2RightPerp( axis );
+
+	shape.normals[0] = normal;
+	shape.normals[1] = b2Neg( normal );
+	shape.count = 2;
+	shape.radius = radius;
+
+	return shape;
+}
+
+// point = qA * localAnchorA + pA
+// localAnchorB = qBc * (point - pB)
+// anchorB = point - pB = qA * localAnchorA + pA - pB
+//         = anchorA + (pA - pB)
+b2Manifold b2CollideCircles( const b2Circle* circleA, b2Transform xfA, const b2Circle* circleB, b2Transform xfB )
+{
+	b2Manifold manifold = { 0 };
+
+	b2Transform xf = b2InvMulTransforms( xfA, xfB );
+
+	b2Vec2 pointA = circleA->center;
+	b2Vec2 pointB = b2TransformPoint( xf, circleB->center );
+
+	float distance;
+	b2Vec2 normal = b2GetLengthAndNormalize( &distance, b2Sub( pointB, pointA ) );
+
+	float radiusA = circleA->radius;
+	float radiusB = circleB->radius;
+
+	float separation = distance - radiusA - radiusB;
+	if ( separation > B2_SPECULATIVE_DISTANCE )
+	{
+		return manifold;
+	}
+
+	b2Vec2 cA = b2MulAdd( pointA, radiusA, normal );
+	b2Vec2 cB = b2MulAdd( pointB, -radiusB, normal );
+	b2Vec2 contactPointA = b2Lerp( cA, cB, 0.5f );
+
+	manifold.normal = b2RotateVector( xfA.q, normal );
+	b2ManifoldPoint* mp = manifold.points + 0;
+	mp->anchorA = b2RotateVector( xfA.q, contactPointA );
+	mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+	mp->point = b2Add( mp->anchorA, xfA.p );
+	mp->separation = separation;
+	mp->id = 0;
+	manifold.pointCount = 1;
+	return manifold;
+}
+
+/// Compute the collision manifold between a capsule and circle
+b2Manifold b2CollideCapsuleAndCircle( const b2Capsule* capsuleA, b2Transform xfA, const b2Circle* circleB, b2Transform xfB )
+{
+	b2Manifold manifold = { 0 };
+
+	b2Transform xf = b2InvMulTransforms( xfA, xfB );
+
+	// Compute circle position in the frame of the capsule.
+	b2Vec2 pB = b2TransformPoint( xf, circleB->center );
+
+	// Compute closest point
+	b2Vec2 p1 = capsuleA->center1;
+	b2Vec2 p2 = capsuleA->center2;
+
+	b2Vec2 e = b2Sub( p2, p1 );
+
+	// dot(p - pA, e) = 0
+	// dot(p - (p1 + s1 * e), e) = 0
+	// s1 = dot(p - p1, e)
+	b2Vec2 pA;
+	float s1 = b2Dot( b2Sub( pB, p1 ), e );
+	float s2 = b2Dot( b2Sub( p2, pB ), e );
+	if ( s1 < 0.0f )
+	{
+		// p1 region
+		pA = p1;
+	}
+	else if ( s2 < 0.0f )
+	{
+		// p2 region
+		pA = p2;
+	}
+	else
+	{
+		// circle colliding with segment interior
+		float s = s1 / b2Dot( e, e );
+		pA = b2MulAdd( p1, s, e );
+	}
+
+	float distance;
+	b2Vec2 normal = b2GetLengthAndNormalize( &distance, b2Sub( pB, pA ) );
+
+	float radiusA = capsuleA->radius;
+	float radiusB = circleB->radius;
+	float separation = distance - radiusA - radiusB;
+	if ( separation > B2_SPECULATIVE_DISTANCE )
+	{
+		return manifold;
+	}
+
+	b2Vec2 cA = b2MulAdd( pA, radiusA, normal );
+	b2Vec2 cB = b2MulAdd( pB, -radiusB, normal );
+	b2Vec2 contactPointA = b2Lerp( cA, cB, 0.5f );
+
+	manifold.normal = b2RotateVector( xfA.q, normal );
+	b2ManifoldPoint* mp = manifold.points + 0;
+	mp->anchorA = b2RotateVector( xfA.q, contactPointA );
+	mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+	mp->point = b2Add( xfA.p, mp->anchorA );
+	mp->separation = separation;
+	mp->id = 0;
+	manifold.pointCount = 1;
+	return manifold;
+}
+
+b2Manifold b2CollidePolygonAndCircle( const b2Polygon* polygonA, b2Transform xfA, const b2Circle* circleB, b2Transform xfB )
+{
+	b2Manifold manifold = { 0 };
+	const float speculativeDistance = B2_SPECULATIVE_DISTANCE;
+
+	b2Transform xf = b2InvMulTransforms( xfA, xfB );
+
+	// Compute circle position in the frame of the polygon.
+	b2Vec2 center = b2TransformPoint( xf, circleB->center );
+	float radiusA = polygonA->radius;
+	float radiusB = circleB->radius;
+	float radius = radiusA + radiusB;
+
+	// Find the min separating edge.
+	int normalIndex = 0;
+	float separation = -FLT_MAX;
+	int vertexCount = polygonA->count;
+	const b2Vec2* vertices = polygonA->vertices;
+	const b2Vec2* normals = polygonA->normals;
+
+	for ( int i = 0; i < vertexCount; ++i )
+	{
+		float s = b2Dot( normals[i], b2Sub( center, vertices[i] ) );
+		if ( s > separation )
+		{
+			separation = s;
+			normalIndex = i;
+		}
+	}
+
+	if ( separation > radius + speculativeDistance )
+	{
+		return manifold;
+	}
+
+	// Vertices of the reference edge.
+	int vertIndex1 = normalIndex;
+	int vertIndex2 = vertIndex1 + 1 < vertexCount ? vertIndex1 + 1 : 0;
+	b2Vec2 v1 = vertices[vertIndex1];
+	b2Vec2 v2 = vertices[vertIndex2];
+
+	// Compute barycentric coordinates
+	float u1 = b2Dot( b2Sub( center, v1 ), b2Sub( v2, v1 ) );
+	float u2 = b2Dot( b2Sub( center, v2 ), b2Sub( v1, v2 ) );
+
+	if ( u1 < 0.0f && separation > FLT_EPSILON )
+	{
+		// Circle center is closest to v1 and safely outside the polygon
+		b2Vec2 normal = b2Normalize( b2Sub( center, v1 ) );
+		separation = b2Dot( b2Sub( center, v1 ), normal );
+		if ( separation > radius + speculativeDistance )
+		{
+			return manifold;
+		}
+
+		b2Vec2 cA = b2MulAdd( v1, radiusA, normal );
+		b2Vec2 cB = b2MulSub( center, radiusB, normal );
+		b2Vec2 contactPointA = b2Lerp( cA, cB, 0.5f );
+
+		manifold.normal = b2RotateVector( xfA.q, normal );
+		b2ManifoldPoint* mp = manifold.points + 0;
+		mp->anchorA = b2RotateVector( xfA.q, contactPointA );
+		mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+		mp->point = b2Add( xfA.p, mp->anchorA );
+		mp->separation = b2Dot( b2Sub( cB, cA ), normal );
+		mp->id = 0;
+		manifold.pointCount = 1;
+	}
+	else if ( u2 < 0.0f && separation > FLT_EPSILON )
+	{
+		// Circle center is closest to v2 and safely outside the polygon
+		b2Vec2 normal = b2Normalize( b2Sub( center, v2 ) );
+		separation = b2Dot( b2Sub( center, v2 ), normal );
+		if ( separation > radius + speculativeDistance )
+		{
+			return manifold;
+		}
+
+		b2Vec2 cA = b2MulAdd( v2, radiusA, normal );
+		b2Vec2 cB = b2MulSub( center, radiusB, normal );
+		b2Vec2 contactPointA = b2Lerp( cA, cB, 0.5f );
+
+		manifold.normal = b2RotateVector( xfA.q, normal );
+		b2ManifoldPoint* mp = manifold.points + 0;
+		mp->anchorA = b2RotateVector( xfA.q, contactPointA );
+		mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+		mp->point = b2Add( xfA.p, mp->anchorA );
+		mp->separation = b2Dot( b2Sub( cB, cA ), normal );
+		mp->id = 0;
+		manifold.pointCount = 1;
+	}
+	else
+	{
+		// Circle center is between v1 and v2. Center may be inside polygon
+		b2Vec2 normal = normals[normalIndex];
+		manifold.normal = b2RotateVector( xfA.q, normal );
+
+		// cA is the projection of the circle center onto to the reference edge
+		b2Vec2 cA = b2MulAdd( center, radiusA - b2Dot( b2Sub( center, v1 ), normal ), normal );
+
+		// cB is the deepest point on the circle with respect to the reference edge
+		b2Vec2 cB = b2MulSub( center, radiusB, normal );
+
+		b2Vec2 contactPointA = b2Lerp( cA, cB, 0.5f );
+
+		// The contact point is the midpoint in world space
+		b2ManifoldPoint* mp = manifold.points + 0;
+		mp->anchorA = b2RotateVector( xfA.q, contactPointA );
+		mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+		mp->point = b2Add( xfA.p, mp->anchorA );
+		mp->separation = separation - radius;
+		mp->id = 0;
+		manifold.pointCount = 1;
+	}
+
+	return manifold;
+}
+
+// Follows Ericson 5.1.9 Closest Points of Two Line Segments
+// Adds some logic to support clipping to get two contact points
+b2Manifold b2CollideCapsules( const b2Capsule* capsuleA, b2Transform xfA, const b2Capsule* capsuleB, b2Transform xfB )
+{
+	b2Vec2 origin = capsuleA->center1;
+
+	// Shift polyA to origin
+	// pw = q * pb + p
+	// pw = q * (pbs + origin) + p
+	// pw = q * pbs + (p + q * origin)
+	b2Transform sfA = { b2Add( xfA.p, b2RotateVector( xfA.q, origin ) ), xfA.q };
+	b2Transform xf = b2InvMulTransforms( sfA, xfB );
+
+	b2Vec2 p1 = b2Vec2_zero;
+	b2Vec2 q1 = b2Sub( capsuleA->center2, origin );
+
+	b2Vec2 p2 = b2TransformPoint( xf, capsuleB->center1 );
+	b2Vec2 q2 = b2TransformPoint( xf, capsuleB->center2 );
+
+	b2Vec2 d1 = b2Sub( q1, p1 );
+	b2Vec2 d2 = b2Sub( q2, p2 );
+
+	float dd1 = b2Dot( d1, d1 );
+	float dd2 = b2Dot( d2, d2 );
+
+	const float epsSqr = FLT_EPSILON * FLT_EPSILON;
+	B2_ASSERT( dd1 > epsSqr && dd2 > epsSqr );
+
+	b2Vec2 r = b2Sub( p1, p2 );
+	float rd1 = b2Dot( r, d1 );
+	float rd2 = b2Dot( r, d2 );
+
+	float d12 = b2Dot( d1, d2 );
+
+	float denom = dd1 * dd2 - d12 * d12;
+
+	// Fraction on segment 1
+	float f1 = 0.0f;
+	if ( denom != 0.0f )
+	{
+		// not parallel
+		f1 = b2ClampFloat( ( d12 * rd2 - rd1 * dd2 ) / denom, 0.0f, 1.0f );
+	}
+
+	// Compute point on segment 2 closest to p1 + f1 * d1
+	float f2 = ( d12 * f1 + rd2 ) / dd2;
+
+	// Clamping of segment 2 requires a do over on segment 1
+	if ( f2 < 0.0f )
+	{
+		f2 = 0.0f;
+		f1 = b2ClampFloat( -rd1 / dd1, 0.0f, 1.0f );
+	}
+	else if ( f2 > 1.0f )
+	{
+		f2 = 1.0f;
+		f1 = b2ClampFloat( ( d12 - rd1 ) / dd1, 0.0f, 1.0f );
+	}
+
+	b2Vec2 closest1 = b2MulAdd( p1, f1, d1 );
+	b2Vec2 closest2 = b2MulAdd( p2, f2, d2 );
+	float distanceSquared = b2DistanceSquared( closest1, closest2 );
+
+	b2Manifold manifold = { 0 };
+	float radiusA = capsuleA->radius;
+	float radiusB = capsuleB->radius;
+	float radius = radiusA + radiusB;
+	float maxDistance = radius + B2_SPECULATIVE_DISTANCE;
+
+	if ( distanceSquared > maxDistance * maxDistance )
+	{
+		return manifold;
+	}
+
+	float distance = sqrtf( distanceSquared );
+
+	float length1, length2;
+	b2Vec2 u1 = b2GetLengthAndNormalize( &length1, d1 );
+	b2Vec2 u2 = b2GetLengthAndNormalize( &length2, d2 );
+
+	// Does segment B project outside segment A?
+	float fp2 = b2Dot( b2Sub( p2, p1 ), u1 );
+	float fq2 = b2Dot( b2Sub( q2, p1 ), u1 );
+	bool outsideA = ( fp2 <= 0.0f && fq2 <= 0.0f ) || ( fp2 >= length1 && fq2 >= length1 );
+
+	// Does segment A project outside segment B?
+	float fp1 = b2Dot( b2Sub( p1, p2 ), u2 );
+	float fq1 = b2Dot( b2Sub( q1, p2 ), u2 );
+	bool outsideB = ( fp1 <= 0.0f && fq1 <= 0.0f ) || ( fp1 >= length2 && fq1 >= length2 );
+
+	if ( outsideA == false && outsideB == false )
+	{
+		// attempt to clip
+		// this may yield contact points with excessive separation
+		// in that case the algorithm falls back to single point collision
+
+		// find reference edge using SAT
+		b2Vec2 normalA;
+		float separationA;
+
+		{
+			normalA = b2LeftPerp( u1 );
+			float ss1 = b2Dot( b2Sub( p2, p1 ), normalA );
+			float ss2 = b2Dot( b2Sub( q2, p1 ), normalA );
+			float s1p = ss1 < ss2 ? ss1 : ss2;
+			float s1n = -ss1 < -ss2 ? -ss1 : -ss2;
+
+			if ( s1p > s1n )
+			{
+				separationA = s1p;
+			}
+			else
+			{
+				separationA = s1n;
+				normalA = b2Neg( normalA );
+			}
+		}
+
+		b2Vec2 normalB;
+		float separationB;
+		{
+			normalB = b2LeftPerp( u2 );
+			float ss1 = b2Dot( b2Sub( p1, p2 ), normalB );
+			float ss2 = b2Dot( b2Sub( q1, p2 ), normalB );
+			float s1p = ss1 < ss2 ? ss1 : ss2;
+			float s1n = -ss1 < -ss2 ? -ss1 : -ss2;
+
+			if ( s1p > s1n )
+			{
+				separationB = s1p;
+			}
+			else
+			{
+				separationB = s1n;
+				normalB = b2Neg( normalB );
+			}
+		}
+
+		// biased to avoid feature flip-flop
+		// todo more testing?
+		if ( separationA + 0.1f * B2_LINEAR_SLOP >= separationB )
+		{
+			manifold.normal = normalA;
+
+			b2Vec2 cp = p2;
+			b2Vec2 cq = q2;
+
+			// clip to p1
+			if ( fp2 < 0.0f && fq2 > 0.0f )
+			{
+				cp = b2Lerp( p2, q2, ( 0.0f - fp2 ) / ( fq2 - fp2 ) );
+			}
+			else if ( fq2 < 0.0f && fp2 > 0.0f )
+			{
+				cq = b2Lerp( q2, p2, ( 0.0f - fq2 ) / ( fp2 - fq2 ) );
+			}
+
+			// clip to q1
+			if ( fp2 > length1 && fq2 < length1 )
+			{
+				cp = b2Lerp( p2, q2, ( fp2 - length1 ) / ( fp2 - fq2 ) );
+			}
+			else if ( fq2 > length1 && fp2 < length1 )
+			{
+				cq = b2Lerp( q2, p2, ( fq2 - length1 ) / ( fq2 - fp2 ) );
+			}
+
+			float sp = b2Dot( b2Sub( cp, p1 ), normalA );
+			float sq = b2Dot( b2Sub( cq, p1 ), normalA );
+
+			if ( sp <= distance + B2_LINEAR_SLOP || sq <= distance + B2_LINEAR_SLOP )
+			{
+				b2ManifoldPoint* mp;
+				mp = manifold.points + 0;
+				mp->anchorA = b2MulAdd( cp, 0.5f * ( radiusA - radiusB - sp ), normalA );
+				mp->separation = sp - radius;
+				mp->id = B2_MAKE_ID( 0, 0 );
+
+				mp = manifold.points + 1;
+				mp->anchorA = b2MulAdd( cq, 0.5f * ( radiusA - radiusB - sq ), normalA );
+				mp->separation = sq - radius;
+				mp->id = B2_MAKE_ID( 0, 1 );
+				manifold.pointCount = 2;
+			}
+		}
+		else
+		{
+			// normal always points from A to B
+			manifold.normal = b2Neg( normalB );
+
+			b2Vec2 cp = p1;
+			b2Vec2 cq = q1;
+
+			// clip to p2
+			if ( fp1 < 0.0f && fq1 > 0.0f )
+			{
+				cp = b2Lerp( p1, q1, ( 0.0f - fp1 ) / ( fq1 - fp1 ) );
+			}
+			else if ( fq1 < 0.0f && fp1 > 0.0f )
+			{
+				cq = b2Lerp( q1, p1, ( 0.0f - fq1 ) / ( fp1 - fq1 ) );
+			}
+
+			// clip to q2
+			if ( fp1 > length2 && fq1 < length2 )
+			{
+				cp = b2Lerp( p1, q1, ( fp1 - length2 ) / ( fp1 - fq1 ) );
+			}
+			else if ( fq1 > length2 && fp1 < length2 )
+			{
+				cq = b2Lerp( q1, p1, ( fq1 - length2 ) / ( fq1 - fp1 ) );
+			}
+
+			float sp = b2Dot( b2Sub( cp, p2 ), normalB );
+			float sq = b2Dot( b2Sub( cq, p2 ), normalB );
+
+			if ( sp <= distance + B2_LINEAR_SLOP || sq <= distance + B2_LINEAR_SLOP )
+			{
+				b2ManifoldPoint* mp;
+				mp = manifold.points + 0;
+				mp->anchorA = b2MulAdd( cp, 0.5f * ( radiusB - radiusA - sp ), normalB );
+				mp->separation = sp - radius;
+				mp->id = B2_MAKE_ID( 0, 0 );
+				mp = manifold.points + 1;
+				mp->anchorA = b2MulAdd( cq, 0.5f * ( radiusB - radiusA - sq ), normalB );
+				mp->separation = sq - radius;
+				mp->id = B2_MAKE_ID( 1, 0 );
+				manifold.pointCount = 2;
+			}
+		}
+	}
+
+	if ( manifold.pointCount == 0 )
+	{
+		// single point collision
+		b2Vec2 normal = b2Sub( closest2, closest1 );
+		if ( b2Dot( normal, normal ) > epsSqr )
+		{
+			normal = b2Normalize( normal );
+		}
+		else
+		{
+			normal = b2LeftPerp( u1 );
+		}
+
+		b2Vec2 c1 = b2MulAdd( closest1, radiusA, normal );
+		b2Vec2 c2 = b2MulAdd( closest2, -radiusB, normal );
+
+		int i1 = f1 == 0.0f ? 0 : 1;
+		int i2 = f2 == 0.0f ? 0 : 1;
+
+		manifold.normal = normal;
+		manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+		manifold.points[0].separation = sqrtf( distanceSquared ) - radius;
+		manifold.points[0].id = B2_MAKE_ID( i1, i2 );
+		manifold.pointCount = 1;
+	}
+
+	// Convert manifold to world space
+	manifold.normal = b2RotateVector( xfA.q, manifold.normal );
+	for ( int i = 0; i < manifold.pointCount; ++i )
+	{
+		b2ManifoldPoint* mp = manifold.points + i;
+
+		// anchor points relative to shape origin in world space
+		mp->anchorA = b2RotateVector( xfA.q, b2Add( mp->anchorA, origin ) );
+		mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+		mp->point = b2Add( xfA.p, mp->anchorA );
+	}
+
+	return manifold;
+}
+
+b2Manifold b2CollideSegmentAndCapsule( const b2Segment* segmentA, b2Transform xfA, const b2Capsule* capsuleB, b2Transform xfB )
+{
+	b2Capsule capsuleA = { segmentA->point1, segmentA->point2, 0.0f };
+	return b2CollideCapsules( &capsuleA, xfA, capsuleB, xfB );
+}
+
+b2Manifold b2CollidePolygonAndCapsule( const b2Polygon* polygonA, b2Transform xfA, const b2Capsule* capsuleB, b2Transform xfB )
+{
+	b2Polygon polyB = b2MakeCapsule( capsuleB->center1, capsuleB->center2, capsuleB->radius );
+	return b2CollidePolygons( polygonA, xfA, &polyB, xfB );
+}
+
+// Polygon clipper used to compute contact points when there are potentially two contact points.
+static b2Manifold b2ClipPolygons( const b2Polygon* polyA, const b2Polygon* polyB, int edgeA, int edgeB, bool flip )
+{
+	b2Manifold manifold = { 0 };
+
+	// reference polygon
+	const b2Polygon* poly1;
+	int i11, i12;
+
+	// incident polygon
+	const b2Polygon* poly2;
+	int i21, i22;
+
+	if ( flip )
+	{
+		poly1 = polyB;
+		poly2 = polyA;
+		i11 = edgeB;
+		i12 = edgeB + 1 < polyB->count ? edgeB + 1 : 0;
+		i21 = edgeA;
+		i22 = edgeA + 1 < polyA->count ? edgeA + 1 : 0;
+	}
+	else
+	{
+		poly1 = polyA;
+		poly2 = polyB;
+		i11 = edgeA;
+		i12 = edgeA + 1 < polyA->count ? edgeA + 1 : 0;
+		i21 = edgeB;
+		i22 = edgeB + 1 < polyB->count ? edgeB + 1 : 0;
+	}
+
+	b2Vec2 normal = poly1->normals[i11];
+
+	// Reference edge vertices
+	b2Vec2 v11 = poly1->vertices[i11];
+	b2Vec2 v12 = poly1->vertices[i12];
+
+	// Incident edge vertices
+	b2Vec2 v21 = poly2->vertices[i21];
+	b2Vec2 v22 = poly2->vertices[i22];
+
+	b2Vec2 tangent = b2CrossSV( 1.0f, normal );
+
+	float lower1 = 0.0f;
+	float upper1 = b2Dot( b2Sub( v12, v11 ), tangent );
+
+	// Incident edge points opposite of tangent due to CCW winding
+	float upper2 = b2Dot( b2Sub( v21, v11 ), tangent );
+	float lower2 = b2Dot( b2Sub( v22, v11 ), tangent );
+
+	// Are the segments disjoint?
+	if ( upper2 < lower1 || upper1 < lower2 )
+	{
+		return manifold;
+	}
+
+	b2Vec2 vLower;
+	if ( lower2 < lower1 && upper2 - lower2 > FLT_EPSILON )
+	{
+		vLower = b2Lerp( v22, v21, ( lower1 - lower2 ) / ( upper2 - lower2 ) );
+	}
+	else
+	{
+		vLower = v22;
+	}
+
+	b2Vec2 vUpper;
+	if ( upper2 > upper1 && upper2 - lower2 > FLT_EPSILON )
+	{
+		vUpper = b2Lerp( v22, v21, ( upper1 - lower2 ) / ( upper2 - lower2 ) );
+	}
+	else
+	{
+		vUpper = v21;
+	}
+
+	// todo vLower can be very close to vUpper, reduce to one point?
+
+	float separationLower = b2Dot( b2Sub( vLower, v11 ), normal );
+	float separationUpper = b2Dot( b2Sub( vUpper, v11 ), normal );
+
+	float r1 = poly1->radius;
+	float r2 = poly2->radius;
+
+	// Put contact points at midpoint, accounting for radii
+	vLower = b2MulAdd( vLower, 0.5f * ( r1 - r2 - separationLower ), normal );
+	vUpper = b2MulAdd( vUpper, 0.5f * ( r1 - r2 - separationUpper ), normal );
+
+	float radius = r1 + r2;
+
+	if ( flip == false )
+	{
+		manifold.normal = normal;
+		b2ManifoldPoint* cp = manifold.points + 0;
+
+		{
+			cp->anchorA = vLower;
+			cp->separation = separationLower - radius;
+			cp->id = B2_MAKE_ID( i11, i22 );
+			manifold.pointCount += 1;
+			cp += 1;
+		}
+
+		{
+			cp->anchorA = vUpper;
+			cp->separation = separationUpper - radius;
+			cp->id = B2_MAKE_ID( i12, i21 );
+			manifold.pointCount += 1;
+		}
+	}
+	else
+	{
+		manifold.normal = b2Neg( normal );
+		b2ManifoldPoint* cp = manifold.points + 0;
+
+		{
+			cp->anchorA = vUpper;
+			cp->separation = separationUpper - radius;
+			cp->id = B2_MAKE_ID( i21, i12 );
+			manifold.pointCount += 1;
+			cp += 1;
+		}
+
+		{
+			cp->anchorA = vLower;
+			cp->separation = separationLower - radius;
+			cp->id = B2_MAKE_ID( i22, i11 );
+			manifold.pointCount += 1;
+		}
+	}
+
+	return manifold;
+}
+
+// Find the max separation between poly1 and poly2 using edge normals from poly1.
+static float b2FindMaxSeparation( int* edgeIndex, const b2Polygon* poly1, const b2Polygon* poly2 )
+{
+	int count1 = poly1->count;
+	int count2 = poly2->count;
+	const b2Vec2* n1s = poly1->normals;
+	const b2Vec2* v1s = poly1->vertices;
+	const b2Vec2* v2s = poly2->vertices;
+
+	int bestIndex = 0;
+	float maxSeparation = -FLT_MAX;
+	for ( int i = 0; i < count1; ++i )
+	{
+		// Get poly1 normal in frame2.
+		b2Vec2 n = n1s[i];
+		b2Vec2 v1 = v1s[i];
+
+		// Find the deepest point for normal i.
+		float si = FLT_MAX;
+		for ( int j = 0; j < count2; ++j )
+		{
+			float sij = b2Dot( n, b2Sub( v2s[j], v1 ) );
+			if ( sij < si )
+			{
+				si = sij;
+			}
+		}
+
+		if ( si > maxSeparation )
+		{
+			maxSeparation = si;
+			bestIndex = i;
+		}
+	}
+
+	*edgeIndex = bestIndex;
+	return maxSeparation;
+}
+
+// Due to speculation, every polygon is rounded
+// Algorithm:
+//
+// compute edge separation using the separating axis test (SAT)
+// if (separation > speculation_distance)
+//   return
+// find reference and incident edge
+// if separation >= 0.1f * B2_LINEAR_SLOP
+//   compute closest points between reference and incident edge
+//   if vertices are closest
+//      single vertex-vertex contact
+//   else
+//      clip edges
+//   end
+// else
+//   clip edges
+// end
+
+b2Manifold b2CollidePolygons( const b2Polygon* polygonA, b2Transform xfA, const b2Polygon* polygonB, b2Transform xfB )
+{
+	b2Vec2 origin = polygonA->vertices[0];
+	float linearSlop = B2_LINEAR_SLOP;
+	float speculativeDistance = B2_SPECULATIVE_DISTANCE;
+
+	// Shift polyA to origin
+	// pw = q * pb + p
+	// pw = q * (pbs + origin) + p
+	// pw = q * pbs + (p + q * origin)
+	b2Transform sfA = { b2Add( xfA.p, b2RotateVector( xfA.q, origin ) ), xfA.q };
+	b2Transform xf = b2InvMulTransforms( sfA, xfB );
+
+	b2Polygon localPolyA;
+	localPolyA.count = polygonA->count;
+	localPolyA.radius = polygonA->radius;
+	localPolyA.vertices[0] = b2Vec2_zero;
+	localPolyA.normals[0] = polygonA->normals[0];
+	for ( int i = 1; i < localPolyA.count; ++i )
+	{
+		localPolyA.vertices[i] = b2Sub( polygonA->vertices[i], origin );
+		localPolyA.normals[i] = polygonA->normals[i];
+	}
+
+	// Put polyB in polyA's frame to reduce round-off error
+	b2Polygon localPolyB;
+	localPolyB.count = polygonB->count;
+	localPolyB.radius = polygonB->radius;
+	for ( int i = 0; i < localPolyB.count; ++i )
+	{
+		localPolyB.vertices[i] = b2TransformPoint( xf, polygonB->vertices[i] );
+		localPolyB.normals[i] = b2RotateVector( xf.q, polygonB->normals[i] );
+	}
+
+	int edgeA = 0;
+	float separationA = b2FindMaxSeparation( &edgeA, &localPolyA, &localPolyB );
+
+	int edgeB = 0;
+	float separationB = b2FindMaxSeparation( &edgeB, &localPolyB, &localPolyA );
+
+	float radius = localPolyA.radius + localPolyB.radius;
+
+	if ( separationA > speculativeDistance + radius || separationB > speculativeDistance + radius )
+	{
+		return (b2Manifold){ 0 };
+	}
+
+	// Find incident edge
+	bool flip;
+	if ( separationA >= separationB )
+	{
+		flip = false;
+
+		b2Vec2 searchDirection = localPolyA.normals[edgeA];
+
+		// Find the incident edge on polyB
+		int count = localPolyB.count;
+		const b2Vec2* normals = localPolyB.normals;
+		edgeB = 0;
+		float minDot = FLT_MAX;
+		for ( int i = 0; i < count; ++i )
+		{
+			float dot = b2Dot( searchDirection, normals[i] );
+			if ( dot < minDot )
+			{
+				minDot = dot;
+				edgeB = i;
+			}
+		}
+	}
+	else
+	{
+		flip = true;
+
+		b2Vec2 searchDirection = localPolyB.normals[edgeB];
+
+		// Find the incident edge on polyA
+		int count = localPolyA.count;
+		const b2Vec2* normals = localPolyA.normals;
+		edgeA = 0;
+		float minDot = FLT_MAX;
+		for ( int i = 0; i < count; ++i )
+		{
+			float dot = b2Dot( searchDirection, normals[i] );
+			if ( dot < minDot )
+			{
+				minDot = dot;
+				edgeA = i;
+			}
+		}
+	}
+
+	b2Manifold manifold = { 0 };
+
+	// Using slop here to ensure vertex-vertex normal vectors can be safely normalized
+	// todo this means edge clipping needs to handle slightly non-overlapping edges.
+	if ( separationA > 0.1f * linearSlop || separationB > 0.1f * linearSlop )
+	{
+#if 1
+		// Edges are disjoint. Find closest points between reference edge and incident edge
+		// Reference edge on polygon A
+		int i11 = edgeA;
+		int i12 = edgeA + 1 < localPolyA.count ? edgeA + 1 : 0;
+		int i21 = edgeB;
+		int i22 = edgeB + 1 < localPolyB.count ? edgeB + 1 : 0;
+
+		b2Vec2 v11 = localPolyA.vertices[i11];
+		b2Vec2 v12 = localPolyA.vertices[i12];
+		b2Vec2 v21 = localPolyB.vertices[i21];
+		b2Vec2 v22 = localPolyB.vertices[i22];
+
+		b2SegmentDistanceResult result = b2SegmentDistance( v11, v12, v21, v22 );
+		B2_ASSERT( result.distanceSquared > 0.0f );
+		float distance = sqrtf( result.distanceSquared );
+		float separation = distance - radius;
+
+		if ( distance - radius > speculativeDistance )
+		{
+			// This can happen in the vertex-vertex case
+			return manifold;
+		}
+
+		// Attempt to clip edges
+		manifold = b2ClipPolygons( &localPolyA, &localPolyB, edgeA, edgeB, flip );
+
+		float minSeparation = FLT_MAX;
+		for ( int i = 0; i < manifold.pointCount; ++i )
+		{
+			minSeparation = b2MinFloat( minSeparation, manifold.points[i].separation );
+		}
+
+		// Does vertex-vertex have substantially larger separation?
+		if ( separation + 0.1f * linearSlop < minSeparation )
+		{
+			if ( result.fraction1 == 0.0f && result.fraction2 == 0.0f )
+			{
+				// v11 - v21
+				b2Vec2 normal = b2Sub( v21, v11 );
+				float invDistance = 1.0f / distance;
+				normal.x *= invDistance;
+				normal.y *= invDistance;
+
+				b2Vec2 c1 = b2MulAdd( v11, localPolyA.radius, normal );
+				b2Vec2 c2 = b2MulAdd( v21, -localPolyB.radius, normal );
+
+				manifold.normal = normal;
+				manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+				manifold.points[0].separation = distance - radius;
+				manifold.points[0].id = B2_MAKE_ID( i11, i21 );
+				manifold.pointCount = 1;
+			}
+			else if ( result.fraction1 == 0.0f && result.fraction2 == 1.0f )
+			{
+				// v11 - v22
+				b2Vec2 normal = b2Sub( v22, v11 );
+				float invDistance = 1.0f / distance;
+				normal.x *= invDistance;
+				normal.y *= invDistance;
+
+				b2Vec2 c1 = b2MulAdd( v11, localPolyA.radius, normal );
+				b2Vec2 c2 = b2MulAdd( v22, -localPolyB.radius, normal );
+
+				manifold.normal = normal;
+				manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+				manifold.points[0].separation = distance - radius;
+				manifold.points[0].id = B2_MAKE_ID( i11, i22 );
+				manifold.pointCount = 1;
+			}
+			else if ( result.fraction1 == 1.0f && result.fraction2 == 0.0f )
+			{
+				// v12 - v21
+				b2Vec2 normal = b2Sub( v21, v12 );
+				float invDistance = 1.0f / distance;
+				normal.x *= invDistance;
+				normal.y *= invDistance;
+
+				b2Vec2 c1 = b2MulAdd( v12, localPolyA.radius, normal );
+				b2Vec2 c2 = b2MulAdd( v21, -localPolyB.radius, normal );
+
+				manifold.normal = normal;
+				manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+				manifold.points[0].separation = distance - radius;
+				manifold.points[0].id = B2_MAKE_ID( i12, i21 );
+				manifold.pointCount = 1;
+			}
+			else if ( result.fraction1 == 1.0f && result.fraction2 == 1.0f )
+			{
+				// v12 - v22
+				b2Vec2 normal = b2Sub( v22, v12 );
+				float invDistance = 1.0f / distance;
+				normal.x *= invDistance;
+				normal.y *= invDistance;
+
+				b2Vec2 c1 = b2MulAdd( v12, localPolyA.radius, normal );
+				b2Vec2 c2 = b2MulAdd( v22, -localPolyB.radius, normal );
+
+				manifold.normal = normal;
+				manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+				manifold.points[0].separation = distance - radius;
+				manifold.points[0].id = B2_MAKE_ID( i12, i22 );
+				manifold.pointCount = 1;
+			}
+		}
+#else
+		// Polygons are disjoint. Find closest points between reference edge and incident edge
+		// Reference edge on polygon A
+		int i11 = edgeA;
+		int i12 = edgeA + 1 < localPolyA.count ? edgeA + 1 : 0;
+		int i21 = edgeB;
+		int i22 = edgeB + 1 < localPolyB.count ? edgeB + 1 : 0;
+
+		b2Vec2 v11 = localPolyA.vertices[i11];
+		b2Vec2 v12 = localPolyA.vertices[i12];
+		b2Vec2 v21 = localPolyB.vertices[i21];
+		b2Vec2 v22 = localPolyB.vertices[i22];
+
+		b2SegmentDistanceResult result = b2SegmentDistance( v11, v12, v21, v22 );
+
+		if ( result.fraction1 == 0.0f && result.fraction2 == 0.0f )
+		{
+			// v11 - v21
+			b2Vec2 normal = b2Sub( v21, v11 );
+			B2_ASSERT( result.distanceSquared > 0.0f );
+			float distance = sqrtf( result.distanceSquared );
+			if ( distance > B2_SPECULATIVE_DISTANCE + radius )
+			{
+				return manifold;
+			}
+			float invDistance = 1.0f / distance;
+			normal.x *= invDistance;
+			normal.y *= invDistance;
+
+			b2Vec2 c1 = b2MulAdd( v11, localPolyA.radius, normal );
+			b2Vec2 c2 = b2MulAdd( v21, -localPolyB.radius, normal );
+
+			manifold.normal = normal;
+			manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+			manifold.points[0].separation = distance - radius;
+			manifold.points[0].id = B2_MAKE_ID( i11, i21 );
+			manifold.pointCount = 1;
+		}
+		else if ( result.fraction1 == 0.0f && result.fraction2 == 1.0f )
+		{
+			// v11 - v22
+			b2Vec2 normal = b2Sub( v22, v11 );
+			B2_ASSERT( result.distanceSquared > 0.0f );
+			float distance = sqrtf( result.distanceSquared );
+			if ( distance > B2_SPECULATIVE_DISTANCE + radius )
+			{
+				return manifold;
+			}
+			float invDistance = 1.0f / distance;
+			normal.x *= invDistance;
+			normal.y *= invDistance;
+
+			b2Vec2 c1 = b2MulAdd( v11, localPolyA.radius, normal );
+			b2Vec2 c2 = b2MulAdd( v22, -localPolyB.radius, normal );
+
+			manifold.normal = normal;
+			manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+			manifold.points[0].separation = distance - radius;
+			manifold.points[0].id = B2_MAKE_ID( i11, i22 );
+			manifold.pointCount = 1;
+		}
+		else if ( result.fraction1 == 1.0f && result.fraction2 == 0.0f )
+		{
+			// v12 - v21
+			b2Vec2 normal = b2Sub( v21, v12 );
+			B2_ASSERT( result.distanceSquared > 0.0f );
+			float distance = sqrtf( result.distanceSquared );
+			if ( distance > B2_SPECULATIVE_DISTANCE + radius )
+			{
+				return manifold;
+			}
+			float invDistance = 1.0f / distance;
+			normal.x *= invDistance;
+			normal.y *= invDistance;
+
+			b2Vec2 c1 = b2MulAdd( v12, localPolyA.radius, normal );
+			b2Vec2 c2 = b2MulAdd( v21, -localPolyB.radius, normal );
+
+			manifold.normal = normal;
+			manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+			manifold.points[0].separation = distance - radius;
+			manifold.points[0].id = B2_MAKE_ID( i12, i21 );
+			manifold.pointCount = 1;
+		}
+		else if ( result.fraction1 == 1.0f && result.fraction2 == 1.0f )
+		{
+			// v12 - v22
+			b2Vec2 normal = b2Sub( v22, v12 );
+			B2_ASSERT( result.distanceSquared > 0.0f );
+			float distance = sqrtf( result.distanceSquared );
+			if ( distance > B2_SPECULATIVE_DISTANCE + radius )
+			{
+				return manifold;
+			}
+			float invDistance = 1.0f / distance;
+			normal.x *= invDistance;
+			normal.y *= invDistance;
+
+			b2Vec2 c1 = b2MulAdd( v12, localPolyA.radius, normal );
+			b2Vec2 c2 = b2MulAdd( v22, -localPolyB.radius, normal );
+
+			manifold.normal = normal;
+			manifold.points[0].anchorA = b2Lerp( c1, c2, 0.5f );
+			manifold.points[0].separation = distance - radius;
+			manifold.points[0].id = B2_MAKE_ID( i12, i22 );
+			manifold.pointCount = 1;
+		}
+		else
+		{
+			// Edge region
+			manifold = b2ClipPolygons( &localPolyA, &localPolyB, edgeA, edgeB, flip );
+		}
+#endif
+	}
+	else
+	{
+		// Polygons overlap
+		manifold = b2ClipPolygons( &localPolyA, &localPolyB, edgeA, edgeB, flip );
+	}
+
+	// Convert manifold to world space
+	if ( manifold.pointCount > 0 )
+	{
+		manifold.normal = b2RotateVector( xfA.q, manifold.normal );
+		for ( int i = 0; i < manifold.pointCount; ++i )
+		{
+			b2ManifoldPoint* mp = manifold.points + i;
+
+			// anchor points relative to shape origin in world space
+			mp->anchorA = b2RotateVector( xfA.q, b2Add( mp->anchorA, origin ) );
+			mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+			mp->point = b2Add( xfA.p, mp->anchorA );
+		}
+	}
+
+	return manifold;
+}
+
+b2Manifold b2CollideSegmentAndCircle( const b2Segment* segmentA, b2Transform xfA, const b2Circle* circleB, b2Transform xfB )
+{
+	b2Capsule capsuleA = { segmentA->point1, segmentA->point2, 0.0f };
+	return b2CollideCapsuleAndCircle( &capsuleA, xfA, circleB, xfB );
+}
+
+b2Manifold b2CollideSegmentAndPolygon( const b2Segment* segmentA, b2Transform xfA, const b2Polygon* polygonB, b2Transform xfB )
+{
+	b2Polygon polygonA = b2MakeCapsule( segmentA->point1, segmentA->point2, 0.0f );
+	return b2CollidePolygons( &polygonA, xfA, polygonB, xfB );
+}
+
+b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA, const b2Circle* circleB,
+										   b2Transform xfB )
+{
+	b2Manifold manifold = { 0 };
+
+	b2Transform xf = b2InvMulTransforms( xfA, xfB );
+
+	// Compute circle in frame of segment
+	b2Vec2 pB = b2TransformPoint( xf, circleB->center );
+
+	b2Vec2 p1 = segmentA->segment.point1;
+	b2Vec2 p2 = segmentA->segment.point2;
+	b2Vec2 e = b2Sub( p2, p1 );
+
+	// Normal points to the right
+	float offset = b2Dot( b2RightPerp( e ), b2Sub( pB, p1 ) );
+	if ( offset < 0.0f )
+	{
+		// collision is one-sided
+		return manifold;
+	}
+
+	// Barycentric coordinates
+	float u = b2Dot( e, b2Sub( p2, pB ) );
+	float v = b2Dot( e, b2Sub( pB, p1 ) );
+
+	b2Vec2 pA;
+
+	if ( v <= 0.0f )
+	{
+		// Behind point1?
+		// Is pB in the Voronoi region of the previous edge?
+		b2Vec2 prevEdge = b2Sub( p1, segmentA->ghost1 );
+		float uPrev = b2Dot( prevEdge, b2Sub( pB, p1 ) );
+		if ( uPrev <= 0.0f )
+		{
+			return manifold;
+		}
+
+		pA = p1;
+	}
+	else if ( u <= 0.0f )
+	{
+		// Ahead of point2?
+		b2Vec2 nextEdge = b2Sub( segmentA->ghost2, p2 );
+		float vNext = b2Dot( nextEdge, b2Sub( pB, p2 ) );
+
+		// Is pB in the Voronoi region of the next edge?
+		if ( vNext > 0.0f )
+		{
+			return manifold;
+		}
+
+		pA = p2;
+	}
+	else
+	{
+		float ee = b2Dot( e, e );
+		pA = (b2Vec2){ u * p1.x + v * p2.x, u * p1.y + v * p2.y };
+		pA = ee > 0.0f ? b2MulSV( 1.0f / ee, pA ) : p1;
+	}
+
+	float distance;
+	b2Vec2 normal = b2GetLengthAndNormalize( &distance, b2Sub( pB, pA ) );
+
+	float radius = circleB->radius;
+	float separation = distance - radius;
+	if ( separation > B2_SPECULATIVE_DISTANCE )
+	{
+		return manifold;
+	}
+
+	b2Vec2 cA = pA;
+	b2Vec2 cB = b2MulAdd( pB, -radius, normal );
+	b2Vec2 contactPointA = b2Lerp( cA, cB, 0.5f );
+
+	manifold.normal = b2RotateVector( xfA.q, normal );
+
+	b2ManifoldPoint* mp = manifold.points + 0;
+	mp->anchorA = b2RotateVector( xfA.q, contactPointA );
+	mp->anchorB = b2Add( mp->anchorA, b2Sub( xfA.p, xfB.p ) );
+	mp->point = b2Add( xfA.p, mp->anchorA );
+	mp->separation = separation;
+	mp->id = 0;
+	manifold.pointCount = 1;
+	return manifold;
+}
+
+b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA, const b2Capsule* capsuleB,
+											b2Transform xfB, b2SimplexCache* cache )
+{
+	b2Polygon polyB = b2MakeCapsule( capsuleB->center1, capsuleB->center2, capsuleB->radius );
+	return b2CollideChainSegmentAndPolygon( segmentA, xfA, &polyB, xfB, cache );
+}
+
+static b2Manifold b2ClipSegments( b2Vec2 a1, b2Vec2 a2, b2Vec2 b1, b2Vec2 b2, b2Vec2 normal, float ra, float rb, uint16_t id1,
+								  uint16_t id2 )
+{
+	b2Manifold manifold = { 0 };
+
+	b2Vec2 tangent = b2LeftPerp( normal );
+
+	// Barycentric coordinates of each point relative to a1 along tangent
+	float lower1 = 0.0f;
+	float upper1 = b2Dot( b2Sub( a2, a1 ), tangent );
+
+	// Incident edge points opposite of tangent due to CCW winding
+	float upper2 = b2Dot( b2Sub( b1, a1 ), tangent );
+	float lower2 = b2Dot( b2Sub( b2, a1 ), tangent );
+
+	// Do segments overlap?
+	if ( upper2 < lower1 || upper1 < lower2 )
+	{
+		return manifold;
+	}
+
+	b2Vec2 vLower;
+	if ( lower2 < lower1 && upper2 - lower2 > FLT_EPSILON )
+	{
+		vLower = b2Lerp( b2, b1, ( lower1 - lower2 ) / ( upper2 - lower2 ) );
+	}
+	else
+	{
+		vLower = b2;
+	}
+
+	b2Vec2 vUpper;
+	if ( upper2 > upper1 && upper2 - lower2 > FLT_EPSILON )
+	{
+		vUpper = b2Lerp( b2, b1, ( upper1 - lower2 ) / ( upper2 - lower2 ) );
+	}
+	else
+	{
+		vUpper = b1;
+	}
+
+	// todo vLower can be very close to vUpper, reduce to one point?
+
+	float separationLower = b2Dot( b2Sub( vLower, a1 ), normal );
+	float separationUpper = b2Dot( b2Sub( vUpper, a1 ), normal );
+
+	// Put contact points at midpoint, accounting for radii
+	vLower = b2MulAdd( vLower, 0.5f * ( ra - rb - separationLower ), normal );
+	vUpper = b2MulAdd( vUpper, 0.5f * ( ra - rb - separationUpper ), normal );
+
+	float radius = ra + rb;
+
+	manifold.normal = normal;
+	{
+		b2ManifoldPoint* cp = manifold.points + 0;
+		cp->anchorA = vLower;
+		cp->separation = separationLower - radius;
+		cp->id = id1;
+	}
+
+	{
+		b2ManifoldPoint* cp = manifold.points + 1;
+		cp->anchorA = vUpper;
+		cp->separation = separationUpper - radius;
+		cp->id = id2;
+	}
+
+	manifold.pointCount = 2;
+
+	return manifold;
+}
+
+enum b2NormalType
+{
+	// This means the normal points in a direction that is non-smooth relative to a convex vertex and should be skipped
+	b2_normalSkip,
+
+	// This means the normal points in a direction that is smooth relative to a convex vertex and should be used for collision
+	b2_normalAdmit,
+
+	// This means the normal is in a region of a concave vertex and should be snapped to the segment normal
+	b2_normalSnap
+};
+
+struct b2ChainSegmentParams
+{
+	b2Vec2 edge1;
+	b2Vec2 normal0;
+	b2Vec2 normal2;
+	bool convex1;
+	bool convex2;
+};
+
+// Evaluate Gauss map
+// See https://box2d.org/posts/2020/06/ghost-collisions/
+static enum b2NormalType b2ClassifyNormal( struct b2ChainSegmentParams params, b2Vec2 normal )
+{
+	const float sinTol = 0.01f;
+
+	if ( b2Dot( normal, params.edge1 ) <= 0.0f )
+	{
+		// Normal points towards the segment tail
+		if ( params.convex1 )
+		{
+			if ( b2Cross( normal, params.normal0 ) > sinTol )
+			{
+				return b2_normalSkip;
+			}
+
+			return b2_normalAdmit;
+		}
+		else
+		{
+			return b2_normalSnap;
+		}
+	}
+	else
+	{
+		// Normal points towards segment head
+		if ( params.convex2 )
+		{
+			if ( b2Cross( params.normal2, normal ) > sinTol )
+			{
+				return b2_normalSkip;
+			}
+
+			return b2_normalAdmit;
+		}
+		else
+		{
+			return b2_normalSnap;
+		}
+	}
+}
+
+b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA, const b2Polygon* polygonB,
+											b2Transform xfB, b2SimplexCache* cache )
+{
+	b2Manifold manifold = { 0 };
+
+	b2Transform xf = b2InvMulTransforms( xfA, xfB );
+
+	b2Vec2 centroidB = b2TransformPoint( xf, polygonB->centroid );
+	float radiusB = polygonB->radius;
+
+	b2Vec2 p1 = segmentA->segment.point1;
+	b2Vec2 p2 = segmentA->segment.point2;
+
+	b2Vec2 edge1 = b2Normalize( b2Sub( p2, p1 ) );
+
+	struct b2ChainSegmentParams smoothParams = { 0 };
+	smoothParams.edge1 = edge1;
+
+	const float convexTol = 0.01f;
+	b2Vec2 edge0 = b2Normalize( b2Sub( p1, segmentA->ghost1 ) );
+	smoothParams.normal0 = b2RightPerp( edge0 );
+	smoothParams.convex1 = b2Cross( edge0, edge1 ) >= convexTol;
+
+	b2Vec2 edge2 = b2Normalize( b2Sub( segmentA->ghost2, p2 ) );
+	smoothParams.normal2 = b2RightPerp( edge2 );
+	smoothParams.convex2 = b2Cross( edge1, edge2 ) >= convexTol;
+
+	// Normal points to the right
+	b2Vec2 normal1 = b2RightPerp( edge1 );
+	bool behind1 = b2Dot( normal1, b2Sub( centroidB, p1 ) ) < 0.0f;
+	bool behind0 = true;
+	bool behind2 = true;
+	if ( smoothParams.convex1 )
+	{
+		behind0 = b2Dot( smoothParams.normal0, b2Sub( centroidB, p1 ) ) < 0.0f;
+	}
+
+	if ( smoothParams.convex2 )
+	{
+		behind2 = b2Dot( smoothParams.normal2, b2Sub( centroidB, p2 ) ) < 0.0f;
+	}
+
+	if ( behind1 && behind0 && behind2 )
+	{
+		// one-sided collision
+		return manifold;
+	}
+
+	// Get polygonB in frameA
+	int count = polygonB->count;
+	b2Vec2 vertices[B2_MAX_POLYGON_VERTICES];
+	b2Vec2 normals[B2_MAX_POLYGON_VERTICES];
+	for ( int i = 0; i < count; ++i )
+	{
+		vertices[i] = b2TransformPoint( xf, polygonB->vertices[i] );
+		normals[i] = b2RotateVector( xf.q, polygonB->normals[i] );
+	}
+
+	// Distance doesn't work correctly with partial polygons
+	b2DistanceInput input;
+	input.proxyA = b2MakeProxy( &segmentA->segment.point1, 2, 0.0f );
+	input.proxyB = b2MakeProxy( vertices, count, 0.0f );
+	input.transformA = b2Transform_identity;
+	input.transformB = b2Transform_identity;
+	input.useRadii = false;
+
+	b2DistanceOutput output = b2ShapeDistance( &input, cache, NULL, 0 );
+
+	if ( output.distance > radiusB + B2_SPECULATIVE_DISTANCE )
+	{
+		return manifold;
+	}
+
+	// Snap concave normals for partial polygon
+	b2Vec2 n0 = smoothParams.convex1 ? smoothParams.normal0 : normal1;
+	b2Vec2 n2 = smoothParams.convex2 ? smoothParams.normal2 : normal1;
+
+	// Index of incident vertex on polygon
+	int incidentIndex = -1;
+	int incidentNormal = -1;
+
+	if ( behind1 == false && output.distance > 0.1f * B2_LINEAR_SLOP )
+	{
+		// The closest features may be two vertices or an edge and a vertex even when there should
+		// be face contact
+
+		if ( cache->count == 1 )
+		{
+			// vertex-vertex collision
+			b2Vec2 pA = output.pointA;
+			b2Vec2 pB = output.pointB;
+
+			b2Vec2 normal = b2Normalize( b2Sub( pB, pA ) );
+
+			enum b2NormalType type = b2ClassifyNormal( smoothParams, normal );
+			if ( type == b2_normalSkip )
+			{
+				return manifold;
+			}
+
+			if ( type == b2_normalAdmit )
+			{
+				manifold.normal = b2RotateVector( xfA.q, normal );
+				b2ManifoldPoint* cp = manifold.points + 0;
+				cp->anchorA = b2RotateVector( xfA.q, pA );
+				cp->anchorB = b2Add( cp->anchorA, b2Sub( xfA.p, xfB.p ) );
+				cp->point = b2Add( xfA.p, cp->anchorA );
+				cp->separation = output.distance - radiusB;
+				cp->id = B2_MAKE_ID( cache->indexA[0], cache->indexB[0] );
+				manifold.pointCount = 1;
+				return manifold;
+			}
+
+			// fall through b2_normalSnap
+			incidentIndex = cache->indexB[0];
+		}
+		else
+		{
+			// vertex-edge collision
+			B2_ASSERT( cache->count == 2 );
+
+			int ia1 = cache->indexA[0];
+			int ia2 = cache->indexA[1];
+			int ib1 = cache->indexB[0];
+			int ib2 = cache->indexB[1];
+
+			if ( ia1 == ia2 )
+			{
+				// 1 point on A, expect 2 points on B
+				B2_ASSERT( ib1 != ib2 );
+
+				// Find polygon normal most aligned with vector between closest points.
+				// This effectively sorts ib1 and ib2
+				b2Vec2 normalB = b2Sub( output.pointA, output.pointB );
+				float dot1 = b2Dot( normalB, normals[ib1] );
+				float dot2 = b2Dot( normalB, normals[ib2] );
+				int ib = dot1 > dot2 ? ib1 : ib2;
+
+				// Use accurate normal
+				normalB = normals[ib];
+
+				enum b2NormalType type = b2ClassifyNormal( smoothParams, b2Neg( normalB ) );
+				if ( type == b2_normalSkip )
+				{
+					return manifold;
+				}
+
+				if ( type == b2_normalAdmit )
+				{
+					// Get polygon edge associated with normal
+					ib1 = ib;
+					ib2 = ib < count - 1 ? ib + 1 : 0;
+
+					b2Vec2 b1 = vertices[ib1];
+					b2Vec2 b2 = vertices[ib2];
+
+					// Find incident segment vertex
+					dot1 = b2Dot( normalB, b2Sub( p1, b1 ) );
+					dot2 = b2Dot( normalB, b2Sub( p2, b1 ) );
+
+					if ( dot1 < dot2 )
+					{
+						if ( b2Dot( n0, normalB ) < b2Dot( normal1, normalB ) )
+						{
+							// Neighbor is incident
+							return manifold;
+						}
+					}
+					else
+					{
+						if ( b2Dot( n2, normalB ) < b2Dot( normal1, normalB ) )
+						{
+							// Neighbor is incident
+							return manifold;
+						}
+					}
+
+					manifold =
+						b2ClipSegments( b1, b2, p1, p2, normalB, radiusB, 0.0f, B2_MAKE_ID( ib1, 1 ), B2_MAKE_ID( ib2, 0 ) );
+
+					B2_ASSERT( manifold.pointCount == 0 || manifold.pointCount == 2 );
+					if ( manifold.pointCount == 2 )
+					{
+						manifold.normal = b2RotateVector( xfA.q, b2Neg( normalB ) );
+						manifold.points[0].anchorA = b2RotateVector( xfA.q, manifold.points[0].anchorA );
+						manifold.points[1].anchorA = b2RotateVector( xfA.q, manifold.points[1].anchorA );
+						b2Vec2 pAB = b2Sub( xfA.p, xfB.p );
+						manifold.points[0].anchorB = b2Add( manifold.points[0].anchorA, pAB );
+						manifold.points[1].anchorB = b2Add( manifold.points[1].anchorA, pAB );
+						manifold.points[0].point = b2Add( xfA.p, manifold.points[0].anchorA );
+						manifold.points[1].point = b2Add( xfA.p, manifold.points[1].anchorA );
+					}
+					return manifold;
+				}
+
+				// fall through b2_normalSnap
+				incidentNormal = ib;
+			}
+			else
+			{
+				// Get index of incident polygonB vertex
+				float dot1 = b2Dot( normal1, b2Sub( vertices[ib1], p1 ) );
+				float dot2 = b2Dot( normal1, b2Sub( vertices[ib2], p2 ) );
+				incidentIndex = dot1 < dot2 ? ib1 : ib2;
+			}
+		}
+	}
+	else
+	{
+		// SAT edge normal
+		float edgeSeparation = FLT_MAX;
+
+		for ( int i = 0; i < count; ++i )
+		{
+			float s = b2Dot( normal1, b2Sub( vertices[i], p1 ) );
+			if ( s < edgeSeparation )
+			{
+				edgeSeparation = s;
+				incidentIndex = i;
+			}
+		}
+
+		// Check convex neighbor for edge separation
+		if ( smoothParams.convex1 )
+		{
+			float s0 = FLT_MAX;
+
+			for ( int i = 0; i < count; ++i )
+			{
+				float s = b2Dot( smoothParams.normal0, b2Sub( vertices[i], p1 ) );
+				if ( s < s0 )
+				{
+					s0 = s;
+				}
+			}
+
+			if ( s0 > edgeSeparation )
+			{
+				edgeSeparation = s0;
+
+				// Indicate neighbor owns edge separation
+				incidentIndex = -1;
+			}
+		}
+
+		// Check convex neighbor for edge separation
+		if ( smoothParams.convex2 )
+		{
+			float s2 = FLT_MAX;
+
+			for ( int i = 0; i < count; ++i )
+			{
+				float s = b2Dot( smoothParams.normal2, b2Sub( vertices[i], p2 ) );
+				if ( s < s2 )
+				{
+					s2 = s;
+				}
+			}
+
+			if ( s2 > edgeSeparation )
+			{
+				edgeSeparation = s2;
+
+				// Indicate neighbor owns edge separation
+				incidentIndex = -1;
+			}
+		}
+
+		// SAT polygon normals
+		float polygonSeparation = -FLT_MAX;
+		int referenceIndex = -1;
+
+		for ( int i = 0; i < count; ++i )
+		{
+			b2Vec2 n = normals[i];
+
+			enum b2NormalType type = b2ClassifyNormal( smoothParams, b2Neg( n ) );
+			if ( type != b2_normalAdmit )
+			{
+				continue;
+			}
+
+			// Check the infinite sides of the partial polygon
+			// if ((smoothParams.convex1 && b2Cross(n0, n) > 0.0f) || (smoothParams.convex2 && b2Cross(n, n2) > 0.0f))
+			//{
+			//	continue;
+			//}
+
+			b2Vec2 p = vertices[i];
+			float s = b2MinFloat( b2Dot( n, b2Sub( p2, p ) ), b2Dot( n, b2Sub( p1, p ) ) );
+
+			if ( s > polygonSeparation )
+			{
+				polygonSeparation = s;
+				referenceIndex = i;
+			}
+		}
+
+		if ( polygonSeparation > edgeSeparation )
+		{
+			int ia1 = referenceIndex;
+			int ia2 = ia1 < count - 1 ? ia1 + 1 : 0;
+			b2Vec2 a1 = vertices[ia1];
+			b2Vec2 a2 = vertices[ia2];
+
+			b2Vec2 n = normals[ia1];
+
+			float dot1 = b2Dot( n, b2Sub( p1, a1 ) );
+			float dot2 = b2Dot( n, b2Sub( p2, a1 ) );
+
+			if ( dot1 < dot2 )
+			{
+				if ( b2Dot( n0, n ) < b2Dot( normal1, n ) )
+				{
+					// Neighbor is incident
+					return manifold;
+				}
+			}
+			else
+			{
+				if ( b2Dot( n2, n ) < b2Dot( normal1, n ) )
+				{
+					// Neighbor is incident
+					return manifold;
+				}
+			}
+
+			manifold = b2ClipSegments( a1, a2, p1, p2, normals[ia1], radiusB, 0.0f, B2_MAKE_ID( ia1, 1 ), B2_MAKE_ID( ia2, 0 ) );
+
+			B2_ASSERT( manifold.pointCount == 0 || manifold.pointCount == 2 );
+			if ( manifold.pointCount == 2 )
+			{
+
+				manifold.normal = b2RotateVector( xfA.q, b2Neg( normals[ia1] ) );
+				manifold.points[0].anchorA = b2RotateVector( xfA.q, manifold.points[0].anchorA );
+				manifold.points[1].anchorA = b2RotateVector( xfA.q, manifold.points[1].anchorA );
+				b2Vec2 pAB = b2Sub( xfA.p, xfB.p );
+				manifold.points[0].anchorB = b2Add( manifold.points[0].anchorA, pAB );
+				manifold.points[1].anchorB = b2Add( manifold.points[1].anchorA, pAB );
+				manifold.points[0].point = b2Add( xfA.p, manifold.points[0].anchorA );
+				manifold.points[1].point = b2Add( xfA.p, manifold.points[1].anchorA );
+			}
+
+			return manifold;
+		}
+
+		if ( incidentIndex == -1 )
+		{
+			// neighboring segment is the separating axis
+			return manifold;
+		}
+
+		// fall through segment normal axis
+	}
+
+	B2_ASSERT( incidentNormal != -1 || incidentIndex != -1 );
+
+	// Segment normal
+
+	// Find incident polygon normal: normal adjacent to deepest vertex that is most anti-parallel to segment normal
+	b2Vec2 b1, b2;
+	int ib1, ib2;
+
+	if ( incidentNormal != -1 )
+	{
+		ib1 = incidentNormal;
+		ib2 = ib1 < count - 1 ? ib1 + 1 : 0;
+		b1 = vertices[ib1];
+		b2 = vertices[ib2];
+	}
+	else
+	{
+		int i2 = incidentIndex;
+		int i1 = i2 > 0 ? i2 - 1 : count - 1;
+		float d1 = b2Dot( normal1, normals[i1] );
+		float d2 = b2Dot( normal1, normals[i2] );
+		if ( d1 < d2 )
+		{
+			ib1 = i1, ib2 = i2;
+			b1 = vertices[ib1];
+			b2 = vertices[ib2];
+		}
+		else
+		{
+			ib1 = i2, ib2 = i2 < count - 1 ? i2 + 1 : 0;
+			b1 = vertices[ib1];
+			b2 = vertices[ib2];
+		}
+	}
+
+	manifold = b2ClipSegments( p1, p2, b1, b2, normal1, 0.0f, radiusB, B2_MAKE_ID( 0, ib2 ), B2_MAKE_ID( 1, ib1 ) );
+
+	B2_ASSERT( manifold.pointCount == 0 || manifold.pointCount == 2 );
+	if ( manifold.pointCount == 2 )
+	{
+		// There may be no points c
+		manifold.normal = b2RotateVector( xfA.q, manifold.normal );
+		manifold.points[0].anchorA = b2RotateVector( xfA.q, manifold.points[0].anchorA );
+		manifold.points[1].anchorA = b2RotateVector( xfA.q, manifold.points[1].anchorA );
+		b2Vec2 pAB = b2Sub( xfA.p, xfB.p );
+		manifold.points[0].anchorB = b2Add( manifold.points[0].anchorA, pAB );
+		manifold.points[1].anchorB = b2Add( manifold.points[1].anchorA, pAB );
+		manifold.points[0].point = b2Add( xfA.p, manifold.points[0].anchorA );
+		manifold.points[1].point = b2Add( xfA.p, manifold.points[1].anchorA );
+	}
+
+	return manifold;
+}
diff --git a/src/vendor/box2d/math_functions.c b/src/vendor/box2d/math_functions.c
new file mode 100644
index 0000000..5b1e949
--- /dev/null
+++ b/src/vendor/box2d/math_functions.c
@@ -0,0 +1,159 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "box2d/math_functions.h"
+
+#include <float.h>
+
+_Static_assert( sizeof( int32_t ) == sizeof( int ), "Box2D expects int32_t and int to be the same" );
+
+bool b2IsValidFloat( float a )
+{
+	if ( isnan( a ) )
+	{
+		return false;
+	}
+
+	if ( isinf( a ) )
+	{
+		return false;
+	}
+
+	return true;
+}
+
+bool b2IsValidVec2( b2Vec2 v )
+{
+	if ( isnan( v.x ) || isnan( v.y ) )
+	{
+		return false;
+	}
+
+	if ( isinf( v.x ) || isinf( v.y ) )
+	{
+		return false;
+	}
+
+	return true;
+}
+
+bool b2IsValidRotation( b2Rot q )
+{
+	if ( isnan( q.s ) || isnan( q.c ) )
+	{
+		return false;
+	}
+
+	if ( isinf( q.s ) || isinf( q.c ) )
+	{
+		return false;
+	}
+
+	return b2IsNormalizedRot( q );
+}
+
+bool b2IsValidPlane( b2Plane a )
+{
+	return b2IsValidVec2( a.normal ) && b2IsNormalized( a.normal ) && b2IsValidFloat( a.offset );
+}
+
+// https://stackoverflow.com/questions/46210708/atan2-approximation-with-11bits-in-mantissa-on-x86with-sse2-and-armwith-vfpv4
+float b2Atan2( float y, float x )
+{
+	// Added check for (0,0) to match atan2f and avoid NaN
+	if (x == 0.0f && y == 0.0f)
+	{
+		return 0.0f;
+	}
+
+	float ax = b2AbsFloat( x );
+	float ay = b2AbsFloat( y );
+	float mx = b2MaxFloat( ay, ax );
+	float mn = b2MinFloat( ay, ax );
+	float a = mn / mx;
+
+	// Minimax polynomial approximation to atan(a) on [0,1]
+	float s = a * a;
+	float c = s * a;
+	float q = s * s;
+	float r = 0.024840285f * q + 0.18681418f;
+	float t = -0.094097948f * q - 0.33213072f;
+	r = r * s + t;
+	r = r * c + a;
+
+	// Map to full circle
+	if ( ay > ax )
+	{
+		r = 1.57079637f - r;
+	}
+
+	if ( x < 0 )
+	{
+		r = 3.14159274f - r;
+	}
+
+	if ( y < 0 )
+	{
+		r = -r;
+	}
+
+	return r;
+}
+
+// Approximate cosine and sine for determinism. In my testing cosf and sinf produced
+// the same results on x64 and ARM using MSVC, GCC, and Clang. However, I don't trust
+// this result.
+// https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula
+b2CosSin b2ComputeCosSin( float radians )
+{
+	float x = b2UnwindLargeAngle( radians );
+	float pi2 = B2_PI * B2_PI;
+
+	// cosine needs angle in [-pi/2, pi/2]
+	float c;
+	if ( x < -0.5f * B2_PI )
+	{
+		float y = x + B2_PI;
+		float y2 = y * y;
+		c = -( pi2 - 4.0f * y2 ) / ( pi2 + y2 );
+	}
+	else if ( x > 0.5f * B2_PI )
+	{
+		float y = x - B2_PI;
+		float y2 = y * y;
+		c = -( pi2 - 4.0f * y2 ) / ( pi2 + y2 );
+	}
+	else
+	{
+		float y2 = x * x;
+		c = ( pi2 - 4.0f * y2 ) / ( pi2 + y2 );
+	}
+
+	// sine needs angle in [0, pi]
+	float s;
+	if ( x < 0.0f )
+	{
+		float y = x + B2_PI;
+		s = -16.0f * y * ( B2_PI - y ) / ( 5.0f * pi2 - 4.0f * y * ( B2_PI - y ) );
+	}
+	else
+	{
+		s = 16.0f * x * ( B2_PI - x ) / ( 5.0f * pi2 - 4.0f * x * ( B2_PI - x ) );
+	}
+
+	float mag = sqrtf( s * s + c * c );
+	float invMag = mag > 0.0 ? 1.0f / mag : 0.0f;
+	b2CosSin cs = { c * invMag, s * invMag };
+	return cs;
+}
+
+b2Rot b2ComputeRotationBetweenUnitVectors(b2Vec2 v1, b2Vec2 v2)
+{
+	B2_ASSERT( b2AbsFloat( 1.0f - b2Length( v1 ) ) < 100.0f * FLT_EPSILON );
+	B2_ASSERT( b2AbsFloat( 1.0f - b2Length( v2 ) ) < 100.0f * FLT_EPSILON );
+
+	b2Rot rot;
+	rot.c = b2Dot( v1, v2 );
+	rot.s = b2Cross( v1, v2 );
+	return b2NormalizeRot( rot );
+}
diff --git a/src/vendor/box2d/math_functions.h b/src/vendor/box2d/math_functions.h
new file mode 100644
index 0000000..88597d8
--- /dev/null
+++ b/src/vendor/box2d/math_functions.h
@@ -0,0 +1,761 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "base.h"
+
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+
+/**
+ * @defgroup math Math
+ * @brief Vector math types and functions
+ * @{
+ */
+
+/// 2D vector
+/// This can be used to represent a point or free vector
+typedef struct b2Vec2
+{
+	/// coordinates
+	float x, y;
+} b2Vec2;
+
+/// Cosine and sine pair
+/// This uses a custom implementation designed for cross-platform determinism
+typedef struct b2CosSin
+{
+	/// cosine and sine
+	float cosine;
+	float sine;
+} b2CosSin;
+
+/// 2D rotation
+/// This is similar to using a complex number for rotation
+typedef struct b2Rot
+{
+	/// cosine and sine
+	float c, s;
+} b2Rot;
+
+/// A 2D rigid transform
+typedef struct b2Transform
+{
+	b2Vec2 p;
+	b2Rot q;
+} b2Transform;
+
+/// A 2-by-2 Matrix
+typedef struct b2Mat22
+{
+	/// columns
+	b2Vec2 cx, cy;
+} b2Mat22;
+
+/// Axis-aligned bounding box
+typedef struct b2AABB
+{
+	b2Vec2 lowerBound;
+	b2Vec2 upperBound;
+} b2AABB;
+
+/// separation = dot(normal, point) - offset
+typedef struct b2Plane
+{
+	b2Vec2 normal;
+	float offset;
+} b2Plane;
+
+/**@}*/
+
+/**
+ * @addtogroup math
+ * @{
+ */
+
+/// https://en.wikipedia.org/wiki/Pi
+#define B2_PI 3.14159265359f
+
+static const b2Vec2 b2Vec2_zero = { 0.0f, 0.0f };
+static const b2Rot b2Rot_identity = { 1.0f, 0.0f };
+static const b2Transform b2Transform_identity = { { 0.0f, 0.0f }, { 1.0f, 0.0f } };
+static const b2Mat22 b2Mat22_zero = { { 0.0f, 0.0f }, { 0.0f, 0.0f } };
+
+/// @return the minimum of two integers
+B2_INLINE int b2MinInt( int a, int b )
+{
+	return a < b ? a : b;
+}
+
+/// @return the maximum of two integers
+B2_INLINE int b2MaxInt( int a, int b )
+{
+	return a > b ? a : b;
+}
+
+/// @return the absolute value of an integer
+B2_INLINE int b2AbsInt( int a )
+{
+	return a < 0 ? -a : a;
+}
+
+/// @return an integer clamped between a lower and upper bound
+B2_INLINE int b2ClampInt( int a, int lower, int upper )
+{
+	return a < lower ? lower : ( a > upper ? upper : a );
+}
+
+/// @return the minimum of two floats
+B2_INLINE float b2MinFloat( float a, float b )
+{
+	return a < b ? a : b;
+}
+
+/// @return the maximum of two floats
+B2_INLINE float b2MaxFloat( float a, float b )
+{
+	return a > b ? a : b;
+}
+
+/// @return the absolute value of a float
+B2_INLINE float b2AbsFloat( float a )
+{
+	return a < 0 ? -a : a;
+}
+
+/// @return a float clamped between a lower and upper bound
+B2_INLINE float b2ClampFloat( float a, float lower, float upper )
+{
+	return a < lower ? lower : ( a > upper ? upper : a );
+}
+
+/// Compute an approximate arctangent in the range [-pi, pi]
+/// This is hand coded for cross-platform determinism. The atan2f
+/// function in the standard library is not cross-platform deterministic.
+///	Accurate to around 0.0023 degrees
+B2_API float b2Atan2( float y, float x );
+
+/// Compute the cosine and sine of an angle in radians. Implemented
+/// for cross-platform determinism.
+B2_API b2CosSin b2ComputeCosSin( float radians );
+
+/// Vector dot product
+B2_INLINE float b2Dot( b2Vec2 a, b2Vec2 b )
+{
+	return a.x * b.x + a.y * b.y;
+}
+
+/// Vector cross product. In 2D this yields a scalar.
+B2_INLINE float b2Cross( b2Vec2 a, b2Vec2 b )
+{
+	return a.x * b.y - a.y * b.x;
+}
+
+/// Perform the cross product on a vector and a scalar. In 2D this produces a vector.
+B2_INLINE b2Vec2 b2CrossVS( b2Vec2 v, float s )
+{
+	return B2_LITERAL( b2Vec2 ){ s * v.y, -s * v.x };
+}
+
+/// Perform the cross product on a scalar and a vector. In 2D this produces a vector.
+B2_INLINE b2Vec2 b2CrossSV( float s, b2Vec2 v )
+{
+	return B2_LITERAL( b2Vec2 ){ -s * v.y, s * v.x };
+}
+
+/// Get a left pointing perpendicular vector. Equivalent to b2CrossSV(1.0f, v)
+B2_INLINE b2Vec2 b2LeftPerp( b2Vec2 v )
+{
+	return B2_LITERAL( b2Vec2 ){ -v.y, v.x };
+}
+
+/// Get a right pointing perpendicular vector. Equivalent to b2CrossVS(v, 1.0f)
+B2_INLINE b2Vec2 b2RightPerp( b2Vec2 v )
+{
+	return B2_LITERAL( b2Vec2 ){ v.y, -v.x };
+}
+
+/// Vector addition
+B2_INLINE b2Vec2 b2Add( b2Vec2 a, b2Vec2 b )
+{
+	return B2_LITERAL( b2Vec2 ){ a.x + b.x, a.y + b.y };
+}
+
+/// Vector subtraction
+B2_INLINE b2Vec2 b2Sub( b2Vec2 a, b2Vec2 b )
+{
+	return B2_LITERAL( b2Vec2 ){ a.x - b.x, a.y - b.y };
+}
+
+/// Vector negation
+B2_INLINE b2Vec2 b2Neg( b2Vec2 a )
+{
+	return B2_LITERAL( b2Vec2 ){ -a.x, -a.y };
+}
+
+/// Vector linear interpolation
+/// https://fgiesen.wordpress.com/2012/08/15/linear-interpolation-past-present-and-future/
+B2_INLINE b2Vec2 b2Lerp( b2Vec2 a, b2Vec2 b, float t )
+{
+	return B2_LITERAL( b2Vec2 ){ ( 1.0f - t ) * a.x + t * b.x, ( 1.0f - t ) * a.y + t * b.y };
+}
+
+/// Component-wise multiplication
+B2_INLINE b2Vec2 b2Mul( b2Vec2 a, b2Vec2 b )
+{
+	return B2_LITERAL( b2Vec2 ){ a.x * b.x, a.y * b.y };
+}
+
+/// Multiply a scalar and vector
+B2_INLINE b2Vec2 b2MulSV( float s, b2Vec2 v )
+{
+	return B2_LITERAL( b2Vec2 ){ s * v.x, s * v.y };
+}
+
+/// a + s * b
+B2_INLINE b2Vec2 b2MulAdd( b2Vec2 a, float s, b2Vec2 b )
+{
+	return B2_LITERAL( b2Vec2 ){ a.x + s * b.x, a.y + s * b.y };
+}
+
+/// a - s * b
+B2_INLINE b2Vec2 b2MulSub( b2Vec2 a, float s, b2Vec2 b )
+{
+	return B2_LITERAL( b2Vec2 ){ a.x - s * b.x, a.y - s * b.y };
+}
+
+/// Component-wise absolute vector
+B2_INLINE b2Vec2 b2Abs( b2Vec2 a )
+{
+	b2Vec2 b;
+	b.x = b2AbsFloat( a.x );
+	b.y = b2AbsFloat( a.y );
+	return b;
+}
+
+/// Component-wise minimum vector
+B2_INLINE b2Vec2 b2Min( b2Vec2 a, b2Vec2 b )
+{
+	b2Vec2 c;
+	c.x = b2MinFloat( a.x, b.x );
+	c.y = b2MinFloat( a.y, b.y );
+	return c;
+}
+
+/// Component-wise maximum vector
+B2_INLINE b2Vec2 b2Max( b2Vec2 a, b2Vec2 b )
+{
+	b2Vec2 c;
+	c.x = b2MaxFloat( a.x, b.x );
+	c.y = b2MaxFloat( a.y, b.y );
+	return c;
+}
+
+/// Component-wise clamp vector v into the range [a, b]
+B2_INLINE b2Vec2 b2Clamp( b2Vec2 v, b2Vec2 a, b2Vec2 b )
+{
+	b2Vec2 c;
+	c.x = b2ClampFloat( v.x, a.x, b.x );
+	c.y = b2ClampFloat( v.y, a.y, b.y );
+	return c;
+}
+
+/// Get the length of this vector (the norm)
+B2_INLINE float b2Length( b2Vec2 v )
+{
+	return sqrtf( v.x * v.x + v.y * v.y );
+}
+
+/// Get the distance between two points
+B2_INLINE float b2Distance( b2Vec2 a, b2Vec2 b )
+{
+	float dx = b.x - a.x;
+	float dy = b.y - a.y;
+	return sqrtf( dx * dx + dy * dy );
+}
+
+/// Convert a vector into a unit vector if possible, otherwise returns the zero vector.
+/// todo MSVC is not inlining this function in several places per warning 4710
+B2_INLINE b2Vec2 b2Normalize( b2Vec2 v )
+{
+	float length = sqrtf( v.x * v.x + v.y * v.y );
+	if ( length < FLT_EPSILON )
+	{
+		return B2_LITERAL( b2Vec2 ){ 0.0f, 0.0f };
+	}
+
+	float invLength = 1.0f / length;
+	b2Vec2 n = { invLength * v.x, invLength * v.y };
+	return n;
+}
+
+/// Determines if the provided vector is normalized (norm(a) == 1).
+B2_INLINE bool b2IsNormalized( b2Vec2 a )
+{
+	float aa = b2Dot( a, a );
+	return b2AbsFloat( 1.0f - aa ) < 10.0f * FLT_EPSILON;
+}
+
+/// Convert a vector into a unit vector if possible, otherwise returns the zero vector. Also
+/// outputs the length.
+B2_INLINE b2Vec2 b2GetLengthAndNormalize( float* length, b2Vec2 v )
+{
+	*length = sqrtf( v.x * v.x + v.y * v.y );
+	if ( *length < FLT_EPSILON )
+	{
+		return B2_LITERAL( b2Vec2 ){ 0.0f, 0.0f };
+	}
+
+	float invLength = 1.0f / *length;
+	b2Vec2 n = { invLength * v.x, invLength * v.y };
+	return n;
+}
+
+/// Normalize rotation
+B2_INLINE b2Rot b2NormalizeRot( b2Rot q )
+{
+	float mag = sqrtf( q.s * q.s + q.c * q.c );
+	float invMag = mag > 0.0 ? 1.0f / mag : 0.0f;
+	b2Rot qn = { q.c * invMag, q.s * invMag };
+	return qn;
+}
+
+/// Integrate rotation from angular velocity
+/// @param q1 initial rotation
+/// @param deltaAngle the angular displacement in radians
+B2_INLINE b2Rot b2IntegrateRotation( b2Rot q1, float deltaAngle )
+{
+	// dc/dt = -omega * sin(t)
+	// ds/dt = omega * cos(t)
+	// c2 = c1 - omega * h * s1
+	// s2 = s1 + omega * h * c1
+	b2Rot q2 = { q1.c - deltaAngle * q1.s, q1.s + deltaAngle * q1.c };
+	float mag = sqrtf( q2.s * q2.s + q2.c * q2.c );
+	float invMag = mag > 0.0 ? 1.0f / mag : 0.0f;
+	b2Rot qn = { q2.c * invMag, q2.s * invMag };
+	return qn;
+}
+
+/// Get the length squared of this vector
+B2_INLINE float b2LengthSquared( b2Vec2 v )
+{
+	return v.x * v.x + v.y * v.y;
+}
+
+/// Get the distance squared between points
+B2_INLINE float b2DistanceSquared( b2Vec2 a, b2Vec2 b )
+{
+	b2Vec2 c = { b.x - a.x, b.y - a.y };
+	return c.x * c.x + c.y * c.y;
+}
+
+/// Make a rotation using an angle in radians
+B2_INLINE b2Rot b2MakeRot( float radians )
+{
+	b2CosSin cs = b2ComputeCosSin( radians );
+	return B2_LITERAL( b2Rot ){ cs.cosine, cs.sine };
+}
+
+/// Compute the rotation between two unit vectors
+B2_API b2Rot b2ComputeRotationBetweenUnitVectors( b2Vec2 v1, b2Vec2 v2 );
+
+/// Is this rotation normalized?
+B2_INLINE bool b2IsNormalizedRot( b2Rot q )
+{
+	// larger tolerance due to failure on mingw 32-bit
+	float qq = q.s * q.s + q.c * q.c;
+	return 1.0f - 0.0006f < qq && qq < 1.0f + 0.0006f;
+}
+
+/// Normalized linear interpolation
+/// https://fgiesen.wordpress.com/2012/08/15/linear-interpolation-past-present-and-future/
+///	https://web.archive.org/web/20170825184056/http://number-none.com/product/Understanding%20Slerp,%20Then%20Not%20Using%20It/
+B2_INLINE b2Rot b2NLerp( b2Rot q1, b2Rot q2, float t )
+{
+	float omt = 1.0f - t;
+	b2Rot q = {
+		omt * q1.c + t * q2.c,
+		omt * q1.s + t * q2.s,
+	};
+
+	float mag = sqrtf( q.s * q.s + q.c * q.c );
+	float invMag = mag > 0.0 ? 1.0f / mag : 0.0f;
+	b2Rot qn = { q.c * invMag, q.s * invMag };
+	return qn;
+}
+
+/// Compute the angular velocity necessary to rotate between two rotations over a give time
+/// @param q1 initial rotation
+/// @param q2 final rotation
+/// @param inv_h inverse time step
+B2_INLINE float b2ComputeAngularVelocity( b2Rot q1, b2Rot q2, float inv_h )
+{
+	// ds/dt = omega * cos(t)
+	// dc/dt = -omega * sin(t)
+	// s2 = s1 + omega * h * c1
+	// c2 = c1 - omega * h * s1
+
+	// omega * h * s1 = c1 - c2
+	// omega * h * c1 = s2 - s1
+	// omega * h = (c1 - c2) * s1 + (s2 - s1) * c1;
+	// omega * h = s1 * c1 - c2 * s1 + s2 * c1 - s1 * c1
+	// omega * h = s2 * c1 - c2 * s1 = sin(a2 - a1) ~= a2 - a1 for small delta
+	float omega = inv_h * ( q2.s * q1.c - q2.c * q1.s );
+	return omega;
+}
+
+/// Get the angle in radians in the range [-pi, pi]
+B2_INLINE float b2Rot_GetAngle( b2Rot q )
+{
+	return b2Atan2( q.s, q.c );
+}
+
+/// Get the x-axis
+B2_INLINE b2Vec2 b2Rot_GetXAxis( b2Rot q )
+{
+	b2Vec2 v = { q.c, q.s };
+	return v;
+}
+
+/// Get the y-axis
+B2_INLINE b2Vec2 b2Rot_GetYAxis( b2Rot q )
+{
+	b2Vec2 v = { -q.s, q.c };
+	return v;
+}
+
+/// Multiply two rotations: q * r
+B2_INLINE b2Rot b2MulRot( b2Rot q, b2Rot r )
+{
+	// [qc -qs] * [rc -rs] = [qc*rc-qs*rs -qc*rs-qs*rc]
+	// [qs  qc]   [rs  rc]   [qs*rc+qc*rs -qs*rs+qc*rc]
+	// s(q + r) = qs * rc + qc * rs
+	// c(q + r) = qc * rc - qs * rs
+	b2Rot qr;
+	qr.s = q.s * r.c + q.c * r.s;
+	qr.c = q.c * r.c - q.s * r.s;
+	return qr;
+}
+
+/// Transpose multiply two rotations: qT * r
+B2_INLINE b2Rot b2InvMulRot( b2Rot q, b2Rot r )
+{
+	// [ qc qs] * [rc -rs] = [qc*rc+qs*rs -qc*rs+qs*rc]
+	// [-qs qc]   [rs  rc]   [-qs*rc+qc*rs qs*rs+qc*rc]
+	// s(q - r) = qc * rs - qs * rc
+	// c(q - r) = qc * rc + qs * rs
+	b2Rot qr;
+	qr.s = q.c * r.s - q.s * r.c;
+	qr.c = q.c * r.c + q.s * r.s;
+	return qr;
+}
+
+/// relative angle between b and a (rot_b * inv(rot_a))
+B2_INLINE float b2RelativeAngle( b2Rot b, b2Rot a )
+{
+	// sin(b - a) = bs * ac - bc * as
+	// cos(b - a) = bc * ac + bs * as
+	float s = b.s * a.c - b.c * a.s;
+	float c = b.c * a.c + b.s * a.s;
+	return b2Atan2( s, c );
+}
+
+/// Convert an angle in the range [-2*pi, 2*pi] into the range [-pi, pi]
+B2_INLINE float b2UnwindAngle( float radians )
+{
+	if ( radians < -B2_PI )
+	{
+		return radians + 2.0f * B2_PI;
+	}
+	else if ( radians > B2_PI )
+	{
+		return radians - 2.0f * B2_PI;
+	}
+
+	return radians;
+}
+
+/// Convert any into the range [-pi, pi] (slow)
+B2_INLINE float b2UnwindLargeAngle( float radians )
+{
+	while ( radians > B2_PI )
+	{
+		radians -= 2.0f * B2_PI;
+	}
+
+	while ( radians < -B2_PI )
+	{
+		radians += 2.0f * B2_PI;
+	}
+
+	return radians;
+}
+
+/// Rotate a vector
+B2_INLINE b2Vec2 b2RotateVector( b2Rot q, b2Vec2 v )
+{
+	return B2_LITERAL( b2Vec2 ){ q.c * v.x - q.s * v.y, q.s * v.x + q.c * v.y };
+}
+
+/// Inverse rotate a vector
+B2_INLINE b2Vec2 b2InvRotateVector( b2Rot q, b2Vec2 v )
+{
+	return B2_LITERAL( b2Vec2 ){ q.c * v.x + q.s * v.y, -q.s * v.x + q.c * v.y };
+}
+
+/// Transform a point (e.g. local space to world space)
+B2_INLINE b2Vec2 b2TransformPoint( b2Transform t, const b2Vec2 p )
+{
+	float x = ( t.q.c * p.x - t.q.s * p.y ) + t.p.x;
+	float y = ( t.q.s * p.x + t.q.c * p.y ) + t.p.y;
+
+	return B2_LITERAL( b2Vec2 ){ x, y };
+}
+
+/// Inverse transform a point (e.g. world space to local space)
+B2_INLINE b2Vec2 b2InvTransformPoint( b2Transform t, const b2Vec2 p )
+{
+	float vx = p.x - t.p.x;
+	float vy = p.y - t.p.y;
+	return B2_LITERAL( b2Vec2 ){ t.q.c * vx + t.q.s * vy, -t.q.s * vx + t.q.c * vy };
+}
+
+/// Multiply two transforms. If the result is applied to a point p local to frame B,
+/// the transform would first convert p to a point local to frame A, then into a point
+/// in the world frame.
+/// v2 = A.q.Rot(B.q.Rot(v1) + B.p) + A.p
+///    = (A.q * B.q).Rot(v1) + A.q.Rot(B.p) + A.p
+B2_INLINE b2Transform b2MulTransforms( b2Transform A, b2Transform B )
+{
+	b2Transform C;
+	C.q = b2MulRot( A.q, B.q );
+	C.p = b2Add( b2RotateVector( A.q, B.p ), A.p );
+	return C;
+}
+
+/// Creates a transform that converts a local point in frame B to a local point in frame A.
+/// v2 = A.q' * (B.q * v1 + B.p - A.p)
+///    = A.q' * B.q * v1 + A.q' * (B.p - A.p)
+B2_INLINE b2Transform b2InvMulTransforms( b2Transform A, b2Transform B )
+{
+	b2Transform C;
+	C.q = b2InvMulRot( A.q, B.q );
+	C.p = b2InvRotateVector( A.q, b2Sub( B.p, A.p ) );
+	return C;
+}
+
+/// Multiply a 2-by-2 matrix times a 2D vector
+B2_INLINE b2Vec2 b2MulMV( b2Mat22 A, b2Vec2 v )
+{
+	b2Vec2 u = {
+		A.cx.x * v.x + A.cy.x * v.y,
+		A.cx.y * v.x + A.cy.y * v.y,
+	};
+	return u;
+}
+
+/// Get the inverse of a 2-by-2 matrix
+B2_INLINE b2Mat22 b2GetInverse22( b2Mat22 A )
+{
+	float a = A.cx.x, b = A.cy.x, c = A.cx.y, d = A.cy.y;
+	float det = a * d - b * c;
+	if ( det != 0.0f )
+	{
+		det = 1.0f / det;
+	}
+
+	b2Mat22 B = {
+		{ det * d, -det * c },
+		{ -det * b, det * a },
+	};
+	return B;
+}
+
+/// Solve A * x = b, where b is a column vector. This is more efficient
+/// than computing the inverse in one-shot cases.
+B2_INLINE b2Vec2 b2Solve22( b2Mat22 A, b2Vec2 b )
+{
+	float a11 = A.cx.x, a12 = A.cy.x, a21 = A.cx.y, a22 = A.cy.y;
+	float det = a11 * a22 - a12 * a21;
+	if ( det != 0.0f )
+	{
+		det = 1.0f / det;
+	}
+	b2Vec2 x = { det * ( a22 * b.x - a12 * b.y ), det * ( a11 * b.y - a21 * b.x ) };
+	return x;
+}
+
+/// Does a fully contain b
+B2_INLINE bool b2AABB_Contains( b2AABB a, b2AABB b )
+{
+	bool s = true;
+	s = s && a.lowerBound.x <= b.lowerBound.x;
+	s = s && a.lowerBound.y <= b.lowerBound.y;
+	s = s && b.upperBound.x <= a.upperBound.x;
+	s = s && b.upperBound.y <= a.upperBound.y;
+	return s;
+}
+
+/// Get the center of the AABB.
+B2_INLINE b2Vec2 b2AABB_Center( b2AABB a )
+{
+	b2Vec2 b = { 0.5f * ( a.lowerBound.x + a.upperBound.x ), 0.5f * ( a.lowerBound.y + a.upperBound.y ) };
+	return b;
+}
+
+/// Get the extents of the AABB (half-widths).
+B2_INLINE b2Vec2 b2AABB_Extents( b2AABB a )
+{
+	b2Vec2 b = { 0.5f * ( a.upperBound.x - a.lowerBound.x ), 0.5f * ( a.upperBound.y - a.lowerBound.y ) };
+	return b;
+}
+
+/// Union of two AABBs
+B2_INLINE b2AABB b2AABB_Union( b2AABB a, b2AABB b )
+{
+	b2AABB c;
+	c.lowerBound.x = b2MinFloat( a.lowerBound.x, b.lowerBound.x );
+	c.lowerBound.y = b2MinFloat( a.lowerBound.y, b.lowerBound.y );
+	c.upperBound.x = b2MaxFloat( a.upperBound.x, b.upperBound.x );
+	c.upperBound.y = b2MaxFloat( a.upperBound.y, b.upperBound.y );
+	return c;
+}
+
+/// Compute the bounding box of an array of circles
+B2_INLINE b2AABB b2MakeAABB( const b2Vec2* points, int count, float radius )
+{
+	B2_ASSERT( count > 0 );
+	b2AABB a = { points[0], points[0] };
+	for ( int i = 1; i < count; ++i )
+	{
+		a.lowerBound = b2Min( a.lowerBound, points[i] );
+		a.upperBound = b2Max( a.upperBound, points[i] );
+	}
+
+	b2Vec2 r = { radius, radius };
+	a.lowerBound = b2Sub( a.lowerBound, r );
+	a.upperBound = b2Add( a.upperBound, r );
+
+	return a;
+}
+
+/// Signed separation of a point from a plane
+B2_INLINE float b2PlaneSeparation( b2Plane plane, b2Vec2 point )
+{
+	return b2Dot( plane.normal, point ) - plane.offset;
+}
+
+/// Is this a valid number? Not NaN or infinity.
+B2_API bool b2IsValidFloat( float a );
+
+/// Is this a valid vector? Not NaN or infinity.
+B2_API bool b2IsValidVec2( b2Vec2 v );
+
+/// Is this a valid rotation? Not NaN or infinity. Is normalized.
+B2_API bool b2IsValidRotation( b2Rot q );
+
+/// Is this a valid bounding box? Not Nan or infinity. Upper bound greater than or equal to lower bound.
+B2_API bool b2IsValidAABB( b2AABB aabb );
+
+/// Is this a valid plane? Normal is a unit vector. Not Nan or infinity.
+B2_API bool b2IsValidPlane( b2Plane a );
+
+/// Box2D bases all length units on meters, but you may need different units for your game.
+/// You can set this value to use different units. This should be done at application startup
+/// and only modified once. Default value is 1.
+/// For example, if your game uses pixels for units you can use pixels for all length values
+/// sent to Box2D. There should be no extra cost. However, Box2D has some internal tolerances
+/// and thresholds that have been tuned for meters. By calling this function, Box2D is able
+/// to adjust those tolerances and thresholds to improve accuracy.
+/// A good rule of thumb is to pass the height of your player character to this function. So
+/// if your player character is 32 pixels high, then pass 32 to this function. Then you may
+/// confidently use pixels for all the length values sent to Box2D. All length values returned
+/// from Box2D will also be pixels because Box2D does not do any scaling internally.
+/// However, you are now on the hook for coming up with good values for gravity, density, and
+/// forces.
+/// @warning This must be modified before any calls to Box2D
+B2_API void b2SetLengthUnitsPerMeter( float lengthUnits );
+
+/// Get the current length units per meter.
+B2_API float b2GetLengthUnitsPerMeter( void );
+
+/**@}*/
+
+/**
+ * @defgroup math_cpp C++ Math
+ * @brief Math operator overloads for C++
+ *
+ * See math_functions.h for details.
+ * @{
+ */
+
+#ifdef __cplusplus
+
+/// Unary add one vector to another
+inline void operator+=( b2Vec2& a, b2Vec2 b )
+{
+	a.x += b.x;
+	a.y += b.y;
+}
+
+/// Unary subtract one vector from another
+inline void operator-=( b2Vec2& a, b2Vec2 b )
+{
+	a.x -= b.x;
+	a.y -= b.y;
+}
+
+/// Unary multiply a vector by a scalar
+inline void operator*=( b2Vec2& a, float b )
+{
+	a.x *= b;
+	a.y *= b;
+}
+
+/// Unary negate a vector
+inline b2Vec2 operator-( b2Vec2 a )
+{
+	return { -a.x, -a.y };
+}
+
+/// Binary vector addition
+inline b2Vec2 operator+( b2Vec2 a, b2Vec2 b )
+{
+	return { a.x + b.x, a.y + b.y };
+}
+
+/// Binary vector subtraction
+inline b2Vec2 operator-( b2Vec2 a, b2Vec2 b )
+{
+	return { a.x - b.x, a.y - b.y };
+}
+
+/// Binary scalar and vector multiplication
+inline b2Vec2 operator*( float a, b2Vec2 b )
+{
+	return { a * b.x, a * b.y };
+}
+
+/// Binary scalar and vector multiplication
+inline b2Vec2 operator*( b2Vec2 a, float b )
+{
+	return { a.x * b, a.y * b };
+}
+
+/// Binary vector equality
+inline bool operator==( b2Vec2 a, b2Vec2 b )
+{
+	return a.x == b.x && a.y == b.y;
+}
+
+/// Binary vector inequality
+inline bool operator!=( b2Vec2 a, b2Vec2 b )
+{
+	return a.x != b.x || a.y != b.y;
+}
+
+#endif
+
+/**@}*/
diff --git a/src/vendor/box2d/motor_joint.c b/src/vendor/box2d/motor_joint.c
new file mode 100644
index 0000000..d6610d6
--- /dev/null
+++ b/src/vendor/box2d/motor_joint.c
@@ -0,0 +1,283 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "body.h"
+#include "core.h"
+#include "joint.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+void b2MotorJoint_SetLinearOffset( b2JointId jointId, b2Vec2 linearOffset )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	joint->motorJoint.linearOffset = linearOffset;
+}
+
+b2Vec2 b2MotorJoint_GetLinearOffset( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	return joint->motorJoint.linearOffset;
+}
+
+void b2MotorJoint_SetAngularOffset( b2JointId jointId, float angularOffset )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	joint->motorJoint.angularOffset = b2ClampFloat( angularOffset, -B2_PI, B2_PI );
+}
+
+float b2MotorJoint_GetAngularOffset( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	return joint->motorJoint.angularOffset;
+}
+
+void b2MotorJoint_SetMaxForce( b2JointId jointId, float maxForce )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	joint->motorJoint.maxForce = b2MaxFloat( 0.0f, maxForce );
+}
+
+float b2MotorJoint_GetMaxForce( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	return joint->motorJoint.maxForce;
+}
+
+void b2MotorJoint_SetMaxTorque( b2JointId jointId, float maxTorque )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	joint->motorJoint.maxTorque = b2MaxFloat( 0.0f, maxTorque );
+}
+
+float b2MotorJoint_GetMaxTorque( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	return joint->motorJoint.maxTorque;
+}
+
+void b2MotorJoint_SetCorrectionFactor( b2JointId jointId, float correctionFactor )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	joint->motorJoint.correctionFactor = b2ClampFloat( correctionFactor, 0.0f, 1.0f );
+}
+
+float b2MotorJoint_GetCorrectionFactor( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_motorJoint );
+	return joint->motorJoint.correctionFactor;
+}
+
+b2Vec2 b2GetMotorJointForce( b2World* world, b2JointSim* base )
+{
+	b2Vec2 force = b2MulSV( world->inv_h, base->motorJoint.linearImpulse );
+	return force;
+}
+
+float b2GetMotorJointTorque( b2World* world, b2JointSim* base )
+{
+	return world->inv_h * base->motorJoint.angularImpulse;
+}
+
+// Point-to-point constraint
+// C = p2 - p1
+// Cdot = v2 - v1
+//      = v2 + cross(w2, r2) - v1 - cross(w1, r1)
+// J = [-I -r1_skew I r2_skew ]
+// Identity used:
+// w k % (rx i + ry j) = w * (-ry i + rx j)
+
+// Angle constraint
+// C = angle2 - angle1 - referenceAngle
+// Cdot = w2 - w1
+// J = [0 0 -1 0 0 1]
+// K = invI1 + invI2
+
+void b2PrepareMotorJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_motorJoint );
+
+	// chase body id to the solver set where the body lives
+	int idA = base->bodyIdA;
+	int idB = base->bodyIdB;
+
+	b2World* world = context->world;
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, idA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, idB );
+
+	B2_ASSERT( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet );
+
+	b2SolverSet* setA = b2SolverSetArray_Get( &world->solverSets, bodyA->setIndex );
+	b2SolverSet* setB = b2SolverSetArray_Get( &world->solverSets, bodyB->setIndex );
+
+	int localIndexA = bodyA->localIndex;
+	int localIndexB = bodyB->localIndex;
+
+	b2BodySim* bodySimA = b2BodySimArray_Get( &setA->bodySims, localIndexA );
+	b2BodySim* bodySimB = b2BodySimArray_Get( &setB->bodySims, localIndexB );
+
+	float mA = bodySimA->invMass;
+	float iA = bodySimA->invInertia;
+	float mB = bodySimB->invMass;
+	float iB = bodySimB->invInertia;
+
+	base->invMassA = mA;
+	base->invMassB = mB;
+	base->invIA = iA;
+	base->invIB = iB;
+
+	b2MotorJoint* joint = &base->motorJoint;
+	joint->indexA = bodyA->setIndex == b2_awakeSet ? localIndexA : B2_NULL_INDEX;
+	joint->indexB = bodyB->setIndex == b2_awakeSet ? localIndexB : B2_NULL_INDEX;
+
+	joint->anchorA = b2RotateVector( bodySimA->transform.q, b2Sub( base->localOriginAnchorA, bodySimA->localCenter ) );
+	joint->anchorB = b2RotateVector( bodySimB->transform.q, b2Sub( base->localOriginAnchorB, bodySimB->localCenter ) );
+	joint->deltaCenter = b2Sub( b2Sub( bodySimB->center, bodySimA->center ), joint->linearOffset );
+	joint->deltaAngle = b2RelativeAngle( bodySimB->transform.q, bodySimA->transform.q ) - joint->angularOffset;
+	joint->deltaAngle = b2UnwindAngle( joint->deltaAngle );
+
+	b2Vec2 rA = joint->anchorA;
+	b2Vec2 rB = joint->anchorB;
+
+	b2Mat22 K;
+	K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB;
+	K.cx.y = -rA.y * rA.x * iA - rB.y * rB.x * iB;
+	K.cy.x = K.cx.y;
+	K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB;
+	joint->linearMass = b2GetInverse22( K );
+
+	float ka = iA + iB;
+	joint->angularMass = ka > 0.0f ? 1.0f / ka : 0.0f;
+
+	if ( context->enableWarmStarting == false )
+	{
+		joint->linearImpulse = b2Vec2_zero;
+		joint->angularImpulse = 0.0f;
+	}
+}
+
+void b2WarmStartMotorJoint( b2JointSim* base, b2StepContext* context )
+{
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	b2MotorJoint* joint = &base->motorJoint;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2BodyState* bodyA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* bodyB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 rA = b2RotateVector( bodyA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( bodyB->deltaRotation, joint->anchorB );
+
+	bodyA->linearVelocity = b2MulSub( bodyA->linearVelocity, mA, joint->linearImpulse );
+	bodyA->angularVelocity -= iA * ( b2Cross( rA, joint->linearImpulse ) + joint->angularImpulse );
+	bodyB->linearVelocity = b2MulAdd( bodyB->linearVelocity, mB, joint->linearImpulse );
+	bodyB->angularVelocity += iB * ( b2Cross( rB, joint->linearImpulse ) + joint->angularImpulse );
+}
+
+void b2SolveMotorJoint( b2JointSim* base, b2StepContext* context, bool useBias )
+{
+	B2_UNUSED( useBias );
+	B2_ASSERT( base->type == b2_motorJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2MotorJoint* joint = &base->motorJoint;
+	b2BodyState* bodyA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* bodyB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 vA = bodyA->linearVelocity;
+	float wA = bodyA->angularVelocity;
+	b2Vec2 vB = bodyB->linearVelocity;
+	float wB = bodyB->angularVelocity;
+
+	// angular constraint
+	{
+		float angularSeperation = b2RelativeAngle( bodyB->deltaRotation, bodyA->deltaRotation ) + joint->deltaAngle;
+		angularSeperation = b2UnwindAngle( angularSeperation );
+
+		float angularBias = context->inv_h * joint->correctionFactor * angularSeperation;
+
+		float Cdot = wB - wA;
+		float impulse = -joint->angularMass * ( Cdot + angularBias );
+
+		float oldImpulse = joint->angularImpulse;
+		float maxImpulse = context->h * joint->maxTorque;
+		joint->angularImpulse = b2ClampFloat( joint->angularImpulse + impulse, -maxImpulse, maxImpulse );
+		impulse = joint->angularImpulse - oldImpulse;
+
+		wA -= iA * impulse;
+		wB += iB * impulse;
+	}
+
+	// linear constraint
+	{
+		b2Vec2 rA = b2RotateVector( bodyA->deltaRotation, joint->anchorA );
+		b2Vec2 rB = b2RotateVector( bodyB->deltaRotation, joint->anchorB );
+
+		b2Vec2 ds = b2Add( b2Sub( bodyB->deltaPosition, bodyA->deltaPosition ), b2Sub( rB, rA ) );
+		b2Vec2 linearSeparation = b2Add( joint->deltaCenter, ds );
+		b2Vec2 linearBias = b2MulSV( context->inv_h * joint->correctionFactor, linearSeparation );
+
+		b2Vec2 Cdot = b2Sub( b2Add( vB, b2CrossSV( wB, rB ) ), b2Add( vA, b2CrossSV( wA, rA ) ) );
+		b2Vec2 b = b2MulMV( joint->linearMass, b2Add( Cdot, linearBias ) );
+		b2Vec2 impulse = { -b.x, -b.y };
+
+		b2Vec2 oldImpulse = joint->linearImpulse;
+		float maxImpulse = context->h * joint->maxForce;
+		joint->linearImpulse = b2Add( joint->linearImpulse, impulse );
+
+		if ( b2LengthSquared( joint->linearImpulse ) > maxImpulse * maxImpulse )
+		{
+			joint->linearImpulse = b2Normalize( joint->linearImpulse );
+			joint->linearImpulse.x *= maxImpulse;
+			joint->linearImpulse.y *= maxImpulse;
+		}
+
+		impulse = b2Sub( joint->linearImpulse, oldImpulse );
+
+		vA = b2MulSub( vA, mA, impulse );
+		wA -= iA * b2Cross( rA, impulse );
+		vB = b2MulAdd( vB, mB, impulse );
+		wB += iB * b2Cross( rB, impulse );
+	}
+
+	bodyA->linearVelocity = vA;
+	bodyA->angularVelocity = wA;
+	bodyB->linearVelocity = vB;
+	bodyB->angularVelocity = wB;
+}
+
+#if 0
+void b2DumpMotorJoint()
+{
+	int32 indexA = m_bodyA->m_islandIndex;
+	int32 indexB = m_bodyB->m_islandIndex;
+
+	b2Dump("  b2MotorJointDef jd;\n");
+	b2Dump("  jd.bodyA = sims[%d];\n", indexA);
+	b2Dump("  jd.bodyB = sims[%d];\n", indexB);
+	b2Dump("  jd.collideConnected = bool(%d);\n", m_collideConnected);
+	b2Dump("  jd.localAnchorA.Set(%.9g, %.9g);\n", m_localAnchorA.x, m_localAnchorA.y);
+	b2Dump("  jd.localAnchorB.Set(%.9g, %.9g);\n", m_localAnchorB.x, m_localAnchorB.y);
+	b2Dump("  jd.referenceAngle = %.9g;\n", m_referenceAngle);
+	b2Dump("  jd.stiffness = %.9g;\n", m_stiffness);
+	b2Dump("  jd.damping = %.9g;\n", m_damping);
+	b2Dump("  joints[%d] = m_world->CreateJoint(&jd);\n", m_index);
+}
+#endif
diff --git a/src/vendor/box2d/mouse_joint.c b/src/vendor/box2d/mouse_joint.c
new file mode 100644
index 0000000..6042fd3
--- /dev/null
+++ b/src/vendor/box2d/mouse_joint.c
@@ -0,0 +1,214 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "body.h"
+#include "core.h"
+#include "joint.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+void b2MouseJoint_SetTarget( b2JointId jointId, b2Vec2 target )
+{
+	B2_ASSERT( b2IsValidVec2( target ) );
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	base->mouseJoint.targetA = target;
+}
+
+b2Vec2 b2MouseJoint_GetTarget( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	return base->mouseJoint.targetA;
+}
+
+void b2MouseJoint_SetSpringHertz( b2JointId jointId, float hertz )
+{
+	B2_ASSERT( b2IsValidFloat( hertz ) && hertz >= 0.0f );
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	base->mouseJoint.hertz = hertz;
+}
+
+float b2MouseJoint_GetSpringHertz( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	return base->mouseJoint.hertz;
+}
+
+void b2MouseJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio )
+{
+	B2_ASSERT( b2IsValidFloat( dampingRatio ) && dampingRatio >= 0.0f );
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	base->mouseJoint.dampingRatio = dampingRatio;
+}
+
+float b2MouseJoint_GetSpringDampingRatio( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	return base->mouseJoint.dampingRatio;
+}
+
+void b2MouseJoint_SetMaxForce( b2JointId jointId, float maxForce )
+{
+	B2_ASSERT( b2IsValidFloat( maxForce ) && maxForce >= 0.0f );
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	base->mouseJoint.maxForce = maxForce;
+}
+
+float b2MouseJoint_GetMaxForce( b2JointId jointId )
+{
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_mouseJoint );
+	return base->mouseJoint.maxForce;
+}
+
+b2Vec2 b2GetMouseJointForce( b2World* world, b2JointSim* base )
+{
+	b2Vec2 force = b2MulSV( world->inv_h, base->mouseJoint.linearImpulse );
+	return force;
+}
+
+float b2GetMouseJointTorque( b2World* world, b2JointSim* base )
+{
+	return world->inv_h * base->mouseJoint.angularImpulse;
+}
+
+void b2PrepareMouseJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_mouseJoint );
+
+	// chase body id to the solver set where the body lives
+	int idB = base->bodyIdB;
+
+	b2World* world = context->world;
+
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, idB );
+
+	B2_ASSERT( bodyB->setIndex == b2_awakeSet );
+	b2SolverSet* setB = b2SolverSetArray_Get( &world->solverSets, bodyB->setIndex );
+
+	int localIndexB = bodyB->localIndex;
+	b2BodySim* bodySimB = b2BodySimArray_Get( &setB->bodySims, localIndexB );
+
+	base->invMassB = bodySimB->invMass;
+	base->invIB = bodySimB->invInertia;
+
+	b2MouseJoint* joint = &base->mouseJoint;
+	joint->indexB = bodyB->setIndex == b2_awakeSet ? localIndexB : B2_NULL_INDEX;
+	joint->anchorB = b2RotateVector( bodySimB->transform.q, b2Sub( base->localOriginAnchorB, bodySimB->localCenter ) );
+
+	joint->linearSoftness = b2MakeSoft( joint->hertz, joint->dampingRatio, context->h );
+
+	float angularHertz = 0.5f;
+	float angularDampingRatio = 0.1f;
+	joint->angularSoftness = b2MakeSoft( angularHertz, angularDampingRatio, context->h );
+
+	b2Vec2 rB = joint->anchorB;
+	float mB = bodySimB->invMass;
+	float iB = bodySimB->invInertia;
+
+	// K = [(1/m1 + 1/m2) * eye(2) - skew(r1) * invI1 * skew(r1) - skew(r2) * invI2 * skew(r2)]
+	//   = [1/m1+1/m2     0    ] + invI1 * [r1.y*r1.y -r1.x*r1.y] + invI2 * [r1.y*r1.y -r1.x*r1.y]
+	//     [    0     1/m1+1/m2]           [-r1.x*r1.y r1.x*r1.x]           [-r1.x*r1.y r1.x*r1.x]
+	b2Mat22 K;
+	K.cx.x = mB + iB * rB.y * rB.y;
+	K.cx.y = -iB * rB.x * rB.y;
+	K.cy.x = K.cx.y;
+	K.cy.y = mB + iB * rB.x * rB.x;
+
+	joint->linearMass = b2GetInverse22( K );
+	joint->deltaCenter = b2Sub( bodySimB->center, joint->targetA );
+
+	if ( context->enableWarmStarting == false )
+	{
+		joint->linearImpulse = b2Vec2_zero;
+		joint->angularImpulse = 0.0f;
+	}
+}
+
+void b2WarmStartMouseJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_mouseJoint );
+
+	float mB = base->invMassB;
+	float iB = base->invIB;
+
+	b2MouseJoint* joint = &base->mouseJoint;
+
+	b2BodyState* stateB = context->states + joint->indexB;
+	b2Vec2 vB = stateB->linearVelocity;
+	float wB = stateB->angularVelocity;
+
+	b2Rot dqB = stateB->deltaRotation;
+	b2Vec2 rB = b2RotateVector( dqB, joint->anchorB );
+
+	vB = b2MulAdd( vB, mB, joint->linearImpulse );
+	wB += iB * ( b2Cross( rB, joint->linearImpulse ) + joint->angularImpulse );
+
+	stateB->linearVelocity = vB;
+	stateB->angularVelocity = wB;
+}
+
+void b2SolveMouseJoint( b2JointSim* base, b2StepContext* context )
+{
+	float mB = base->invMassB;
+	float iB = base->invIB;
+
+	b2MouseJoint* joint = &base->mouseJoint;
+	b2BodyState* stateB = context->states + joint->indexB;
+
+	b2Vec2 vB = stateB->linearVelocity;
+	float wB = stateB->angularVelocity;
+
+	// Softness with no bias to reduce rotation speed
+	{
+		float massScale = joint->angularSoftness.massScale;
+		float impulseScale = joint->angularSoftness.impulseScale;
+
+		float impulse = iB > 0.0f ? -wB / iB : 0.0f;
+		impulse = massScale * impulse - impulseScale * joint->angularImpulse;
+		joint->angularImpulse += impulse;
+
+		wB += iB * impulse;
+	}
+
+	float maxImpulse = joint->maxForce * context->h;
+
+	{
+		b2Rot dqB = stateB->deltaRotation;
+		b2Vec2 rB = b2RotateVector( dqB, joint->anchorB );
+		b2Vec2 Cdot = b2Add( vB, b2CrossSV( wB, rB ) );
+
+		b2Vec2 separation = b2Add( b2Add( stateB->deltaPosition, rB ), joint->deltaCenter );
+		b2Vec2 bias = b2MulSV( joint->linearSoftness.biasRate, separation );
+
+		float massScale = joint->linearSoftness.massScale;
+		float impulseScale = joint->linearSoftness.impulseScale;
+
+		b2Vec2 b = b2MulMV( joint->linearMass, b2Add( Cdot, bias ) );
+
+		b2Vec2 impulse;
+		impulse.x = -massScale * b.x - impulseScale * joint->linearImpulse.x;
+		impulse.y = -massScale * b.y - impulseScale * joint->linearImpulse.y;
+
+		b2Vec2 oldImpulse = joint->linearImpulse;
+		joint->linearImpulse.x += impulse.x;
+		joint->linearImpulse.y += impulse.y;
+
+		float mag = b2Length( joint->linearImpulse );
+		if ( mag > maxImpulse )
+		{
+			joint->linearImpulse = b2MulSV( maxImpulse, b2Normalize( joint->linearImpulse ) );
+		}
+
+		impulse.x = joint->linearImpulse.x - oldImpulse.x;
+		impulse.y = joint->linearImpulse.y - oldImpulse.y;
+
+		vB = b2MulAdd( vB, mB, impulse );
+		wB += iB * b2Cross( rB, impulse );
+	}
+
+	stateB->linearVelocity = vB;
+	stateB->angularVelocity = wB;
+}
diff --git a/src/vendor/box2d/mover.c b/src/vendor/box2d/mover.c
new file mode 100644
index 0000000..10644b0
--- /dev/null
+++ b/src/vendor/box2d/mover.c
@@ -0,0 +1,73 @@
+// SPDX-FileCopyrightText: 2025 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "constants.h"
+
+#include "box2d/collision.h"
+
+b2PlaneSolverResult b2SolvePlanes( b2Vec2 position, b2CollisionPlane* planes, int count )
+{
+	for ( int i = 0; i < count; ++i )
+	{
+		planes[i].push = 0.0f;
+	}
+
+	b2Vec2 delta = b2Vec2_zero;
+	float tolerance = B2_LINEAR_SLOP;
+
+	int iteration;
+	for ( iteration = 0; iteration < 20; ++iteration )
+	{
+		float totalPush = 0.0f;
+		for ( int planeIndex = 0; planeIndex < count; ++planeIndex )
+		{
+			b2CollisionPlane* plane = planes + planeIndex;
+
+			// Add slop to prevent jitter
+			float separation = b2PlaneSeparation( plane->plane, delta ) + B2_LINEAR_SLOP;
+			// if (separation > 0.0f)
+			//{
+			//	continue;
+			// }
+
+			float push = -separation;
+
+			// Clamp accumulated push
+			float accumulatedPush = plane->push;
+			plane->push = b2ClampFloat( plane->push + push, 0.0f, plane->pushLimit );
+			push = plane->push - accumulatedPush;
+			delta = b2MulAdd( delta, push, plane->plane.normal );
+
+			// Track maximum push for convergence
+			totalPush += b2AbsFloat( push );
+		}
+
+		if ( totalPush < tolerance )
+		{
+			break;
+		}
+	}
+
+	return (b2PlaneSolverResult){
+		.position = b2Add( delta, position ),
+		.iterationCount = iteration,
+	};
+}
+
+b2Vec2 b2ClipVector( b2Vec2 vector, const b2CollisionPlane* planes, int count )
+{
+	b2Vec2 v = vector;
+
+	for ( int planeIndex = 0; planeIndex < count; ++planeIndex )
+	{
+		const b2CollisionPlane* plane = planes + planeIndex;
+		if ( plane->push == 0.0f || plane->clipVelocity == false )
+		{
+			continue;
+		}
+
+		v = b2MulSub( v, b2MinFloat( 0.0f, b2Dot( v, plane->plane.normal ) ), plane->plane.normal );
+	}
+
+	return v;
+}
diff --git a/src/vendor/box2d/prismatic_joint.c b/src/vendor/box2d/prismatic_joint.c
new file mode 100644
index 0000000..43cb45b
--- /dev/null
+++ b/src/vendor/box2d/prismatic_joint.c
@@ -0,0 +1,654 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "body.h"
+#include "core.h"
+#include "joint.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+#include <stdio.h>
+
+void b2PrismaticJoint_EnableSpring( b2JointId jointId, bool enableSpring )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	if ( enableSpring != joint->prismaticJoint.enableSpring )
+	{
+		joint->prismaticJoint.enableSpring = enableSpring;
+		joint->prismaticJoint.springImpulse = 0.0f;
+	}
+}
+
+bool b2PrismaticJoint_IsSpringEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.enableSpring;
+}
+
+void b2PrismaticJoint_SetSpringHertz( b2JointId jointId, float hertz )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	joint->prismaticJoint.hertz = hertz;
+}
+
+float b2PrismaticJoint_GetSpringHertz( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.hertz;
+}
+
+void b2PrismaticJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	joint->prismaticJoint.dampingRatio = dampingRatio;
+}
+
+float b2PrismaticJoint_GetSpringDampingRatio( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.dampingRatio;
+}
+
+void b2PrismaticJoint_EnableLimit( b2JointId jointId, bool enableLimit )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	if ( enableLimit != joint->prismaticJoint.enableLimit )
+	{
+		joint->prismaticJoint.enableLimit = enableLimit;
+		joint->prismaticJoint.lowerImpulse = 0.0f;
+		joint->prismaticJoint.upperImpulse = 0.0f;
+	}
+}
+
+bool b2PrismaticJoint_IsLimitEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.enableLimit;
+}
+
+float b2PrismaticJoint_GetLowerLimit( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.lowerTranslation;
+}
+
+float b2PrismaticJoint_GetUpperLimit( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.upperTranslation;
+}
+
+void b2PrismaticJoint_SetLimits( b2JointId jointId, float lower, float upper )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	if ( lower != joint->prismaticJoint.lowerTranslation || upper != joint->prismaticJoint.upperTranslation )
+	{
+		joint->prismaticJoint.lowerTranslation = b2MinFloat( lower, upper );
+		joint->prismaticJoint.upperTranslation = b2MaxFloat( lower, upper );
+		joint->prismaticJoint.lowerImpulse = 0.0f;
+		joint->prismaticJoint.upperImpulse = 0.0f;
+	}
+}
+
+void b2PrismaticJoint_EnableMotor( b2JointId jointId, bool enableMotor )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	if ( enableMotor != joint->prismaticJoint.enableMotor )
+	{
+		joint->prismaticJoint.enableMotor = enableMotor;
+		joint->prismaticJoint.motorImpulse = 0.0f;
+	}
+}
+
+bool b2PrismaticJoint_IsMotorEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.enableMotor;
+}
+
+void b2PrismaticJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	joint->prismaticJoint.motorSpeed = motorSpeed;
+}
+
+float b2PrismaticJoint_GetMotorSpeed( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.motorSpeed;
+}
+
+float b2PrismaticJoint_GetMotorForce( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2JointSim* base = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return world->inv_h * base->prismaticJoint.motorImpulse;
+}
+
+void b2PrismaticJoint_SetMaxMotorForce( b2JointId jointId, float force )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	joint->prismaticJoint.maxMotorForce = force;
+}
+
+float b2PrismaticJoint_GetMaxMotorForce( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	return joint->prismaticJoint.maxMotorForce;
+}
+
+float b2PrismaticJoint_GetTranslation(b2JointId jointId)
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2JointSim* jointSim = b2GetJointSimCheckType( jointId, b2_prismaticJoint );
+	b2Transform transformA = b2GetBodyTransform( world, jointSim->bodyIdA );
+	b2Transform transformB = b2GetBodyTransform( world, jointSim->bodyIdB );
+
+	b2PrismaticJoint* joint = &jointSim->prismaticJoint;
+	b2Vec2 axisA = b2RotateVector( transformA.q, joint->localAxisA );
+	b2Vec2 pA = b2TransformPoint( transformA, jointSim->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, jointSim->localOriginAnchorB );
+	b2Vec2 d = b2Sub( pB, pA );
+	float translation = b2Dot( d, axisA );
+	return translation;
+}
+
+float b2PrismaticJoint_GetSpeed(b2JointId jointId)
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2Joint* joint = b2GetJointFullId( world, jointId );
+	B2_ASSERT( joint->type == b2_prismaticJoint );
+	b2JointSim* jointSim = b2GetJointSim( world, joint );
+	B2_ASSERT( jointSim->type == b2_prismaticJoint );
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, jointSim->bodyIdA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, jointSim->bodyIdB );
+	b2BodySim* bodySimA = b2GetBodySim( world, bodyA );
+	b2BodySim* bodySimB = b2GetBodySim( world, bodyB );
+	b2BodyState* bodyStateA = b2GetBodyState( world, bodyA );
+	b2BodyState* bodyStateB = b2GetBodyState( world, bodyB );
+
+	b2Transform transformA = bodySimA->transform;
+	b2Transform transformB = bodySimB->transform;
+
+	b2PrismaticJoint* prismatic = &jointSim->prismaticJoint;
+	b2Vec2 axisA = b2RotateVector( transformA.q, prismatic->localAxisA );
+	b2Vec2 cA = bodySimA->center;
+	b2Vec2 cB = bodySimB->center;
+	b2Vec2 rA = b2RotateVector( transformA.q, b2Sub( jointSim->localOriginAnchorA, bodySimA->localCenter ) );
+	b2Vec2 rB = b2RotateVector( transformB.q, b2Sub( jointSim->localOriginAnchorB, bodySimB->localCenter ) );
+
+	b2Vec2 d = b2Add(b2Sub(cB, cA), b2Sub( rB, rA ));
+
+	b2Vec2 vA = bodyStateA ? bodyStateA->linearVelocity : b2Vec2_zero;
+	b2Vec2 vB = bodyStateB ? bodyStateB->linearVelocity : b2Vec2_zero;
+	float wA = bodyStateA ? bodyStateA->angularVelocity : 0.0f;
+	float wB = bodyStateB ? bodyStateB->angularVelocity : 0.0f;
+
+	b2Vec2 vRel = b2Sub( b2Add( vB, b2CrossSV( wB, rB ) ), b2Add( vA, b2CrossSV( wA, rA ) ) );
+	float speed = b2Dot( d, b2CrossSV( wA, axisA ) ) + b2Dot( axisA, vRel );
+	return speed;
+}
+
+b2Vec2 b2GetPrismaticJointForce( b2World* world, b2JointSim* base )
+{
+	int idA = base->bodyIdA;
+	b2Transform transformA = b2GetBodyTransform( world, idA );
+
+	b2PrismaticJoint* joint = &base->prismaticJoint;
+
+	b2Vec2 axisA = b2RotateVector( transformA.q, joint->localAxisA );
+	b2Vec2 perpA = b2LeftPerp( axisA );
+
+	float inv_h = world->inv_h;
+	float perpForce = inv_h * joint->impulse.x;
+	float axialForce = inv_h * ( joint->motorImpulse + joint->lowerImpulse - joint->upperImpulse );
+
+	b2Vec2 force = b2Add( b2MulSV( perpForce, perpA ), b2MulSV( axialForce, axisA ) );
+	return force;
+}
+
+float b2GetPrismaticJointTorque( b2World* world, b2JointSim* base )
+{
+	return world->inv_h * base->prismaticJoint.impulse.y;
+}
+
+// Linear constraint (point-to-line)
+// d = p2 - p1 = x2 + r2 - x1 - r1
+// C = dot(perp, d)
+// Cdot = dot(d, cross(w1, perp)) + dot(perp, v2 + cross(w2, r2) - v1 - cross(w1, r1))
+//      = -dot(perp, v1) - dot(cross(d + r1, perp), w1) + dot(perp, v2) + dot(cross(r2, perp), v2)
+// J = [-perp, -cross(d + r1, perp), perp, cross(r2,perp)]
+//
+// Angular constraint
+// C = a2 - a1 + a_initial
+// Cdot = w2 - w1
+// J = [0 0 -1 0 0 1]
+//
+// K = J * invM * JT
+//
+// J = [-a -s1 a s2]
+//     [0  -1  0  1]
+// a = perp
+// s1 = cross(d + r1, a) = cross(p2 - x1, a)
+// s2 = cross(r2, a) = cross(p2 - x2, a)
+
+// Motor/Limit linear constraint
+// C = dot(ax1, d)
+// Cdot = -dot(ax1, v1) - dot(cross(d + r1, ax1), w1) + dot(ax1, v2) + dot(cross(r2, ax1), v2)
+// J = [-ax1 -cross(d+r1,ax1) ax1 cross(r2,ax1)]
+
+// Predictive limit is applied even when the limit is not active.
+// Prevents a constraint speed that can lead to a constraint error in one time step.
+// Want C2 = C1 + h * Cdot >= 0
+// Or:
+// Cdot + C1/h >= 0
+// I do not apply a negative constraint error because that is handled in position correction.
+// So:
+// Cdot + max(C1, 0)/h >= 0
+
+// Block Solver
+// We develop a block solver that includes the angular and linear constraints. This makes the limit stiffer.
+//
+// The Jacobian has 2 rows:
+// J = [-uT -s1 uT s2] // linear
+//     [0   -1   0  1] // angular
+//
+// u = perp
+// s1 = cross(d + r1, u), s2 = cross(r2, u)
+// a1 = cross(d + r1, v), a2 = cross(r2, v)
+
+void b2PreparePrismaticJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_prismaticJoint );
+
+	// chase body id to the solver set where the body lives
+	int idA = base->bodyIdA;
+	int idB = base->bodyIdB;
+
+	b2World* world = context->world;
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, idA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, idB );
+
+	B2_ASSERT( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet );
+	b2SolverSet* setA = b2SolverSetArray_Get( &world->solverSets, bodyA->setIndex );
+	b2SolverSet* setB = b2SolverSetArray_Get( &world->solverSets, bodyB->setIndex );
+
+	int localIndexA = bodyA->localIndex;
+	int localIndexB = bodyB->localIndex;
+
+	b2BodySim* bodySimA = b2BodySimArray_Get( &setA->bodySims, localIndexA );
+	b2BodySim* bodySimB = b2BodySimArray_Get( &setB->bodySims, localIndexB );
+
+	float mA = bodySimA->invMass;
+	float iA = bodySimA->invInertia;
+	float mB = bodySimB->invMass;
+	float iB = bodySimB->invInertia;
+
+	base->invMassA = mA;
+	base->invMassB = mB;
+	base->invIA = iA;
+	base->invIB = iB;
+
+	b2PrismaticJoint* joint = &base->prismaticJoint;
+	joint->indexA = bodyA->setIndex == b2_awakeSet ? localIndexA : B2_NULL_INDEX;
+	joint->indexB = bodyB->setIndex == b2_awakeSet ? localIndexB : B2_NULL_INDEX;
+
+	b2Rot qA = bodySimA->transform.q;
+	b2Rot qB = bodySimB->transform.q;
+
+	joint->anchorA = b2RotateVector( qA, b2Sub( base->localOriginAnchorA, bodySimA->localCenter ) );
+	joint->anchorB = b2RotateVector( qB, b2Sub( base->localOriginAnchorB, bodySimB->localCenter ) );
+	joint->axisA = b2RotateVector( qA, joint->localAxisA );
+	joint->deltaCenter = b2Sub( bodySimB->center, bodySimA->center );
+	joint->deltaAngle = b2RelativeAngle( qB, qA ) - joint->referenceAngle;
+	joint->deltaAngle = b2UnwindAngle( joint->deltaAngle );
+
+	b2Vec2 rA = joint->anchorA;
+	b2Vec2 rB = joint->anchorB;
+
+	b2Vec2 d = b2Add( joint->deltaCenter, b2Sub( rB, rA ) );
+	float a1 = b2Cross( b2Add( d, rA ), joint->axisA );
+	float a2 = b2Cross( rB, joint->axisA );
+
+	// effective masses
+	float k = mA + mB + iA * a1 * a1 + iB * a2 * a2;
+	joint->axialMass = k > 0.0f ? 1.0f / k : 0.0f;
+
+	joint->springSoftness = b2MakeSoft( joint->hertz, joint->dampingRatio, context->h );
+
+	if ( context->enableWarmStarting == false )
+	{
+		joint->impulse = b2Vec2_zero;
+		joint->springImpulse = 0.0f;
+		joint->motorImpulse = 0.0f;
+		joint->lowerImpulse = 0.0f;
+		joint->upperImpulse = 0.0f;
+	}
+}
+
+void b2WarmStartPrismaticJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_prismaticJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2PrismaticJoint* joint = &base->prismaticJoint;
+
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	b2Vec2 d = b2Add( b2Add( b2Sub( stateB->deltaPosition, stateA->deltaPosition ), joint->deltaCenter ), b2Sub( rB, rA ) );
+	b2Vec2 axisA = b2RotateVector( stateA->deltaRotation, joint->axisA );
+
+	// impulse is applied at anchor point on body B
+	float a1 = b2Cross( b2Add( d, rA ), axisA );
+	float a2 = b2Cross( rB, axisA );
+	float axialImpulse = joint->springImpulse + joint->motorImpulse + joint->lowerImpulse - joint->upperImpulse;
+
+	// perpendicular constraint
+	b2Vec2 perpA = b2LeftPerp( axisA );
+	float s1 = b2Cross( b2Add( d, rA ), perpA );
+	float s2 = b2Cross( rB, perpA );
+	float perpImpulse = joint->impulse.x;
+	float angleImpulse = joint->impulse.y;
+
+	b2Vec2 P = b2Add( b2MulSV( axialImpulse, axisA ), b2MulSV( perpImpulse, perpA ) );
+	float LA = axialImpulse * a1 + perpImpulse * s1 + angleImpulse;
+	float LB = axialImpulse * a2 + perpImpulse * s2 + angleImpulse;
+
+	stateA->linearVelocity = b2MulSub( stateA->linearVelocity, mA, P );
+	stateA->angularVelocity -= iA * LA;
+	stateB->linearVelocity = b2MulAdd( stateB->linearVelocity, mB, P );
+	stateB->angularVelocity += iB * LB;
+}
+
+void b2SolvePrismaticJoint( b2JointSim* base, b2StepContext* context, bool useBias )
+{
+	B2_ASSERT( base->type == b2_prismaticJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2PrismaticJoint* joint = &base->prismaticJoint;
+
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 vA = stateA->linearVelocity;
+	float wA = stateA->angularVelocity;
+	b2Vec2 vB = stateB->linearVelocity;
+	float wB = stateB->angularVelocity;
+
+	// current anchors
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	b2Vec2 d = b2Add( b2Add( b2Sub( stateB->deltaPosition, stateA->deltaPosition ), joint->deltaCenter ), b2Sub( rB, rA ) );
+	b2Vec2 axisA = b2RotateVector( stateA->deltaRotation, joint->axisA );
+	float translation = b2Dot( axisA, d );
+
+	// These scalars are for torques generated by axial forces
+	float a1 = b2Cross( b2Add( d, rA ), axisA );
+	float a2 = b2Cross( rB, axisA );
+
+	// spring constraint
+	if ( joint->enableSpring )
+	{
+		// This is a real spring and should be applied even during relax
+		float C = translation;
+		float bias = joint->springSoftness.biasRate * C;
+		float massScale = joint->springSoftness.massScale;
+		float impulseScale = joint->springSoftness.impulseScale;
+
+		float Cdot = b2Dot( axisA, b2Sub( vB, vA ) ) + a2 * wB - a1 * wA;
+		float deltaImpulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->springImpulse;
+		joint->springImpulse += deltaImpulse;
+
+		b2Vec2 P = b2MulSV( deltaImpulse, axisA );
+		float LA = deltaImpulse * a1;
+		float LB = deltaImpulse * a2;
+
+		vA = b2MulSub( vA, mA, P );
+		wA -= iA * LA;
+		vB = b2MulAdd( vB, mB, P );
+		wB += iB * LB;
+	}
+
+	// Solve motor constraint
+	if ( joint->enableMotor )
+	{
+		float Cdot = b2Dot( axisA, b2Sub( vB, vA ) ) + a2 * wB - a1 * wA;
+		float impulse = joint->axialMass * ( joint->motorSpeed - Cdot );
+		float oldImpulse = joint->motorImpulse;
+		float maxImpulse = context->h * joint->maxMotorForce;
+		joint->motorImpulse = b2ClampFloat( joint->motorImpulse + impulse, -maxImpulse, maxImpulse );
+		impulse = joint->motorImpulse - oldImpulse;
+
+		b2Vec2 P = b2MulSV( impulse, axisA );
+		float LA = impulse * a1;
+		float LB = impulse * a2;
+
+		vA = b2MulSub( vA, mA, P );
+		wA -= iA * LA;
+		vB = b2MulAdd( vB, mB, P );
+		wB += iB * LB;
+	}
+
+	if ( joint->enableLimit )
+	{
+		// Lower limit
+		{
+			float C = translation - joint->lowerTranslation;
+			float bias = 0.0f;
+			float massScale = 1.0f;
+			float impulseScale = 0.0f;
+
+			if ( C > 0.0f )
+			{
+				// speculation
+				bias = C * context->inv_h;
+			}
+			else if ( useBias )
+			{
+				bias = context->jointSoftness.biasRate * C;
+				massScale = context->jointSoftness.massScale;
+				impulseScale = context->jointSoftness.impulseScale;
+			}
+
+			float oldImpulse = joint->lowerImpulse;
+			float Cdot = b2Dot( axisA, b2Sub( vB, vA ) ) + a2 * wB - a1 * wA;
+			float impulse = -joint->axialMass * massScale * ( Cdot + bias ) - impulseScale * oldImpulse;
+			joint->lowerImpulse = b2MaxFloat( oldImpulse + impulse, 0.0f );
+			impulse = joint->lowerImpulse - oldImpulse;
+
+			b2Vec2 P = b2MulSV( impulse, axisA );
+			float LA = impulse * a1;
+			float LB = impulse * a2;
+
+			vA = b2MulSub( vA, mA, P );
+			wA -= iA * LA;
+			vB = b2MulAdd( vB, mB, P );
+			wB += iB * LB;
+		}
+
+		// Upper limit
+		// Note: signs are flipped to keep C positive when the constraint is satisfied.
+		// This also keeps the impulse positive when the limit is active.
+		{
+			// sign flipped
+			float C = joint->upperTranslation - translation;
+			float bias = 0.0f;
+			float massScale = 1.0f;
+			float impulseScale = 0.0f;
+
+			if ( C > 0.0f )
+			{
+				// speculation
+				bias = C * context->inv_h;
+			}
+			else if ( useBias )
+			{
+				bias = context->jointSoftness.biasRate * C;
+				massScale = context->jointSoftness.massScale;
+				impulseScale = context->jointSoftness.impulseScale;
+			}
+
+			float oldImpulse = joint->upperImpulse;
+			// sign flipped
+			float Cdot = b2Dot( axisA, b2Sub( vA, vB ) ) + a1 * wA - a2 * wB;
+			float impulse = -joint->axialMass * massScale * ( Cdot + bias ) - impulseScale * oldImpulse;
+			joint->upperImpulse = b2MaxFloat( oldImpulse + impulse, 0.0f );
+			impulse = joint->upperImpulse - oldImpulse;
+
+			b2Vec2 P = b2MulSV( impulse, axisA );
+			float LA = impulse * a1;
+			float LB = impulse * a2;
+
+			// sign flipped
+			vA = b2MulAdd( vA, mA, P );
+			wA += iA * LA;
+			vB = b2MulSub( vB, mB, P );
+			wB -= iB * LB;
+		}
+	}
+
+	// Solve the prismatic constraint in block form
+	{
+		b2Vec2 perpA = b2LeftPerp( axisA );
+
+		// These scalars are for torques generated by the perpendicular constraint force
+		float s1 = b2Cross( b2Add( d, rA ), perpA );
+		float s2 = b2Cross( rB, perpA );
+
+		b2Vec2 Cdot;
+		Cdot.x = b2Dot( perpA, b2Sub( vB, vA ) ) + s2 * wB - s1 * wA;
+		Cdot.y = wB - wA;
+
+		b2Vec2 bias = b2Vec2_zero;
+		float massScale = 1.0f;
+		float impulseScale = 0.0f;
+		if ( useBias )
+		{
+			b2Vec2 C;
+			C.x = b2Dot( perpA, d );
+			C.y = b2RelativeAngle( stateB->deltaRotation, stateA->deltaRotation ) + joint->deltaAngle;
+
+			bias = b2MulSV( context->jointSoftness.biasRate, C );
+			massScale = context->jointSoftness.massScale;
+			impulseScale = context->jointSoftness.impulseScale;
+		}
+
+		float k11 = mA + mB + iA * s1 * s1 + iB * s2 * s2;
+		float k12 = iA * s1 + iB * s2;
+		float k22 = iA + iB;
+		if ( k22 == 0.0f )
+		{
+			// For bodies with fixed rotation.
+			k22 = 1.0f;
+		}
+
+		b2Mat22 K = { { k11, k12 }, { k12, k22 } };
+
+		b2Vec2 b = b2Solve22( K, b2Add( Cdot, bias ) );
+		b2Vec2 impulse;
+		impulse.x = -massScale * b.x - impulseScale * joint->impulse.x;
+		impulse.y = -massScale * b.y - impulseScale * joint->impulse.y;
+
+		joint->impulse.x += impulse.x;
+		joint->impulse.y += impulse.y;
+
+		b2Vec2 P = b2MulSV( impulse.x, perpA );
+		float LA = impulse.x * s1 + impulse.y;
+		float LB = impulse.x * s2 + impulse.y;
+
+		vA = b2MulSub( vA, mA, P );
+		wA -= iA * LA;
+		vB = b2MulAdd( vB, mB, P );
+		wB += iB * LB;
+	}
+
+	stateA->linearVelocity = vA;
+	stateA->angularVelocity = wA;
+	stateB->linearVelocity = vB;
+	stateB->angularVelocity = wB;
+}
+
+#if 0
+void b2PrismaticJoint::Dump()
+{
+	int32 indexA = joint->bodyA->joint->islandIndex;
+	int32 indexB = joint->bodyB->joint->islandIndex;
+
+	b2Dump("  b2PrismaticJointDef jd;\n");
+	b2Dump("  jd.bodyA = sims[%d];\n", indexA);
+	b2Dump("  jd.bodyB = sims[%d];\n", indexB);
+	b2Dump("  jd.collideConnected = bool(%d);\n", joint->collideConnected);
+	b2Dump("  jd.localAnchorA.Set(%.9g, %.9g);\n", joint->localAnchorA.x, joint->localAnchorA.y);
+	b2Dump("  jd.localAnchorB.Set(%.9g, %.9g);\n", joint->localAnchorB.x, joint->localAnchorB.y);
+	b2Dump("  jd.referenceAngle = %.9g;\n", joint->referenceAngle);
+	b2Dump("  jd.enableLimit = bool(%d);\n", joint->enableLimit);
+	b2Dump("  jd.lowerAngle = %.9g;\n", joint->lowerAngle);
+	b2Dump("  jd.upperAngle = %.9g;\n", joint->upperAngle);
+	b2Dump("  jd.enableMotor = bool(%d);\n", joint->enableMotor);
+	b2Dump("  jd.motorSpeed = %.9g;\n", joint->motorSpeed);
+	b2Dump("  jd.maxMotorTorque = %.9g;\n", joint->maxMotorTorque);
+	b2Dump("  joints[%d] = joint->world->CreateJoint(&jd);\n", joint->index);
+}
+#endif
+
+void b2DrawPrismaticJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB )
+{
+	B2_ASSERT( base->type == b2_prismaticJoint );
+
+	b2PrismaticJoint* joint = &base->prismaticJoint;
+
+	b2Vec2 pA = b2TransformPoint( transformA, base->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, base->localOriginAnchorB );
+
+	b2Vec2 axis = b2RotateVector( transformA.q, joint->localAxisA );
+
+	b2HexColor c1 = b2_colorGray;
+	b2HexColor c2 = b2_colorGreen;
+	b2HexColor c3 = b2_colorRed;
+	b2HexColor c4 = b2_colorBlue;
+	b2HexColor c5 = b2_colorDimGray;
+
+	draw->DrawSegmentFcn( pA, pB, c5, draw->context );
+
+	if ( joint->enableLimit )
+	{
+		b2Vec2 lower = b2MulAdd( pA, joint->lowerTranslation, axis );
+		b2Vec2 upper = b2MulAdd( pA, joint->upperTranslation, axis );
+		b2Vec2 perp = b2LeftPerp( axis );
+		draw->DrawSegmentFcn( lower, upper, c1, draw->context );
+		draw->DrawSegmentFcn( b2MulSub( lower, 0.1f, perp ), b2MulAdd( lower, 0.1f, perp ), c2, draw->context );
+		draw->DrawSegmentFcn( b2MulSub( upper, 0.1f, perp ), b2MulAdd( upper, 0.1f, perp ), c3, draw->context );
+	}
+	else
+	{
+		draw->DrawSegmentFcn( b2MulSub( pA, 1.0f, axis ), b2MulAdd( pA, 1.0f, axis ), c1, draw->context );
+	}
+
+	draw->DrawPointFcn( pA, 5.0f, c1, draw->context );
+	draw->DrawPointFcn( pB, 5.0f, c4, draw->context );
+}
diff --git a/src/vendor/box2d/revolute_joint.c b/src/vendor/box2d/revolute_joint.c
new file mode 100644
index 0000000..a8edeb0
--- /dev/null
+++ b/src/vendor/box2d/revolute_joint.c
@@ -0,0 +1,530 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#if defined( _MSC_VER ) && !defined( _CRT_SECURE_NO_WARNINGS )
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "body.h"
+#include "core.h"
+#include "joint.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+#include <stdio.h>
+
+void b2RevoluteJoint_EnableSpring( b2JointId jointId, bool enableSpring )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	if ( enableSpring != joint->revoluteJoint.enableSpring )
+	{
+		joint->revoluteJoint.enableSpring = enableSpring;
+		joint->revoluteJoint.springImpulse = 0.0f;
+	}
+}
+
+bool b2RevoluteJoint_IsSpringEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.enableSpring;
+}
+
+void b2RevoluteJoint_SetSpringHertz( b2JointId jointId, float hertz )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	joint->revoluteJoint.hertz = hertz;
+}
+
+float b2RevoluteJoint_GetSpringHertz( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.hertz;
+}
+
+void b2RevoluteJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	joint->revoluteJoint.dampingRatio = dampingRatio;
+}
+
+float b2RevoluteJoint_GetSpringDampingRatio( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.dampingRatio;
+}
+
+float b2RevoluteJoint_GetAngle( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2JointSim* jointSim = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	b2Transform transformA = b2GetBodyTransform( world, jointSim->bodyIdA );
+	b2Transform transformB = b2GetBodyTransform( world, jointSim->bodyIdB );
+
+	float angle = b2RelativeAngle( transformB.q, transformA.q ) - jointSim->revoluteJoint.referenceAngle;
+	angle = b2UnwindAngle( angle );
+	return angle;
+}
+
+void b2RevoluteJoint_EnableLimit( b2JointId jointId, bool enableLimit )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	if ( enableLimit != joint->revoluteJoint.enableLimit )
+	{
+		joint->revoluteJoint.enableLimit = enableLimit;
+		joint->revoluteJoint.lowerImpulse = 0.0f;
+		joint->revoluteJoint.upperImpulse = 0.0f;
+	}
+}
+
+bool b2RevoluteJoint_IsLimitEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.enableLimit;
+}
+
+float b2RevoluteJoint_GetLowerLimit( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.lowerAngle;
+}
+
+float b2RevoluteJoint_GetUpperLimit( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.upperAngle;
+}
+
+void b2RevoluteJoint_SetLimits( b2JointId jointId, float lower, float upper )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	if ( lower != joint->revoluteJoint.lowerAngle || upper != joint->revoluteJoint.upperAngle )
+	{
+		joint->revoluteJoint.lowerAngle = b2MinFloat( lower, upper );
+		joint->revoluteJoint.upperAngle = b2MaxFloat( lower, upper );
+		joint->revoluteJoint.lowerImpulse = 0.0f;
+		joint->revoluteJoint.upperImpulse = 0.0f;
+	}
+}
+
+void b2RevoluteJoint_EnableMotor( b2JointId jointId, bool enableMotor )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	if ( enableMotor != joint->revoluteJoint.enableMotor )
+	{
+		joint->revoluteJoint.enableMotor = enableMotor;
+		joint->revoluteJoint.motorImpulse = 0.0f;
+	}
+}
+
+bool b2RevoluteJoint_IsMotorEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.enableMotor;
+}
+
+void b2RevoluteJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	joint->revoluteJoint.motorSpeed = motorSpeed;
+}
+
+float b2RevoluteJoint_GetMotorSpeed( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.motorSpeed;
+}
+
+float b2RevoluteJoint_GetMotorTorque( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return world->inv_h * joint->revoluteJoint.motorImpulse;
+}
+
+void b2RevoluteJoint_SetMaxMotorTorque( b2JointId jointId, float torque )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	joint->revoluteJoint.maxMotorTorque = torque;
+}
+
+float b2RevoluteJoint_GetMaxMotorTorque( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_revoluteJoint );
+	return joint->revoluteJoint.maxMotorTorque;
+}
+
+b2Vec2 b2GetRevoluteJointForce( b2World* world, b2JointSim* base )
+{
+	b2Vec2 force = b2MulSV( world->inv_h, base->revoluteJoint.linearImpulse );
+	return force;
+}
+
+float b2GetRevoluteJointTorque( b2World* world, b2JointSim* base )
+{
+	const b2RevoluteJoint* revolute = &base->revoluteJoint;
+	float torque = world->inv_h * ( revolute->motorImpulse + revolute->lowerImpulse - revolute->upperImpulse );
+	return torque;
+}
+
+// Point-to-point constraint
+// C = p2 - p1
+// Cdot = v2 - v1
+//      = v2 + cross(w2, r2) - v1 - cross(w1, r1)
+// J = [-I -r1_skew I r2_skew ]
+// Identity used:
+// w k % (rx i + ry j) = w * (-ry i + rx j)
+
+// Motor constraint
+// Cdot = w2 - w1
+// J = [0 0 -1 0 0 1]
+// K = invI1 + invI2
+
+void b2PrepareRevoluteJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_revoluteJoint );
+
+	// chase body id to the solver set where the body lives
+	int idA = base->bodyIdA;
+	int idB = base->bodyIdB;
+
+	b2World* world = context->world;
+
+	b2Body* bodyA = b2BodyArray_Get(&world->bodies, idA);
+	b2Body* bodyB = b2BodyArray_Get(&world->bodies, idB);
+
+	B2_ASSERT( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet );
+	b2SolverSet* setA = b2SolverSetArray_Get( &world->solverSets, bodyA->setIndex );
+	b2SolverSet* setB = b2SolverSetArray_Get( &world->solverSets, bodyB->setIndex );
+
+	int localIndexA = bodyA->localIndex;
+	int localIndexB = bodyB->localIndex;
+
+	b2BodySim* bodySimA = b2BodySimArray_Get( &setA->bodySims, localIndexA );
+	b2BodySim* bodySimB = b2BodySimArray_Get( &setB->bodySims, localIndexB );
+
+	float mA = bodySimA->invMass;
+	float iA = bodySimA->invInertia;
+	float mB = bodySimB->invMass;
+	float iB = bodySimB->invInertia;
+
+	base->invMassA = mA;
+	base->invMassB = mB;
+	base->invIA = iA;
+	base->invIB = iB;
+
+	b2RevoluteJoint* joint = &base->revoluteJoint;
+
+	joint->indexA = bodyA->setIndex == b2_awakeSet ? localIndexA : B2_NULL_INDEX;
+	joint->indexB = bodyB->setIndex == b2_awakeSet ? localIndexB : B2_NULL_INDEX;
+
+	// initial anchors in world space
+	joint->anchorA = b2RotateVector( bodySimA->transform.q, b2Sub( base->localOriginAnchorA, bodySimA->localCenter ) );
+	joint->anchorB = b2RotateVector( bodySimB->transform.q, b2Sub( base->localOriginAnchorB, bodySimB->localCenter ) );
+	joint->deltaCenter = b2Sub( bodySimB->center, bodySimA->center );
+	joint->deltaAngle = b2RelativeAngle( bodySimB->transform.q, bodySimA->transform.q ) - joint->referenceAngle;
+	joint->deltaAngle = b2UnwindAngle( joint->deltaAngle );
+
+	float k = iA + iB;
+	joint->axialMass = k > 0.0f ? 1.0f / k : 0.0f;
+
+	joint->springSoftness = b2MakeSoft( joint->hertz, joint->dampingRatio, context->h );
+
+	if ( context->enableWarmStarting == false )
+	{
+		joint->linearImpulse = b2Vec2_zero;
+		joint->springImpulse = 0.0f;
+		joint->motorImpulse = 0.0f;
+		joint->lowerImpulse = 0.0f;
+		joint->upperImpulse = 0.0f;
+	}
+}
+
+void b2WarmStartRevoluteJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_revoluteJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2RevoluteJoint* joint = &base->revoluteJoint;
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	float axialImpulse = joint->springImpulse + joint->motorImpulse + joint->lowerImpulse - joint->upperImpulse;
+
+	stateA->linearVelocity = b2MulSub( stateA->linearVelocity, mA, joint->linearImpulse );
+	stateA->angularVelocity -= iA * ( b2Cross( rA, joint->linearImpulse ) + axialImpulse );
+
+	stateB->linearVelocity = b2MulAdd( stateB->linearVelocity, mB, joint->linearImpulse );
+	stateB->angularVelocity += iB * ( b2Cross( rB, joint->linearImpulse ) + axialImpulse );
+}
+
+void b2SolveRevoluteJoint( b2JointSim* base, b2StepContext* context, bool useBias )
+{
+	B2_ASSERT( base->type == b2_revoluteJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2RevoluteJoint* joint = &base->revoluteJoint;
+
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 vA = stateA->linearVelocity;
+	float wA = stateA->angularVelocity;
+	b2Vec2 vB = stateB->linearVelocity;
+	float wB = stateB->angularVelocity;
+
+	bool fixedRotation = ( iA + iB == 0.0f );
+	// const float maxBias = context->maxBiasVelocity;
+
+	// Solve spring.
+	if ( joint->enableSpring && fixedRotation == false )
+	{
+		float C = b2RelativeAngle( stateB->deltaRotation, stateA->deltaRotation ) + joint->deltaAngle;
+		float bias = joint->springSoftness.biasRate * C;
+		float massScale = joint->springSoftness.massScale;
+		float impulseScale = joint->springSoftness.impulseScale;
+
+		float Cdot = wB - wA;
+		float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->springImpulse;
+		joint->springImpulse += impulse;
+
+		wA -= iA * impulse;
+		wB += iB * impulse;
+	}
+
+	// Solve motor constraint.
+	if ( joint->enableMotor && fixedRotation == false )
+	{
+		float Cdot = wB - wA - joint->motorSpeed;
+		float impulse = -joint->axialMass * Cdot;
+		float oldImpulse = joint->motorImpulse;
+		float maxImpulse = context->h * joint->maxMotorTorque;
+		joint->motorImpulse = b2ClampFloat( joint->motorImpulse + impulse, -maxImpulse, maxImpulse );
+		impulse = joint->motorImpulse - oldImpulse;
+
+		wA -= iA * impulse;
+		wB += iB * impulse;
+	}
+
+	if ( joint->enableLimit && fixedRotation == false )
+	{
+		float jointAngle = b2RelativeAngle( stateB->deltaRotation, stateA->deltaRotation ) + joint->deltaAngle;
+		jointAngle = b2UnwindAngle( jointAngle );
+
+		// Lower limit
+		{
+			float C = jointAngle - joint->lowerAngle;
+			float bias = 0.0f;
+			float massScale = 1.0f;
+			float impulseScale = 0.0f;
+			if ( C > 0.0f )
+			{
+				// speculation
+				bias = C * context->inv_h;
+			}
+			else if ( useBias )
+			{
+				bias = context->jointSoftness.biasRate * C;
+				massScale = context->jointSoftness.massScale;
+				impulseScale = context->jointSoftness.impulseScale;
+			}
+
+			float Cdot = wB - wA;
+			float oldImpulse = joint->lowerImpulse;
+			float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * oldImpulse;
+			joint->lowerImpulse = b2MaxFloat( oldImpulse + impulse, 0.0f );
+			impulse = joint->lowerImpulse - oldImpulse;
+
+			wA -= iA * impulse;
+			wB += iB * impulse;
+		}
+
+		// Upper limit
+		// Note: signs are flipped to keep C positive when the constraint is satisfied.
+		// This also keeps the impulse positive when the limit is active.
+		{
+			float C = joint->upperAngle - jointAngle;
+			float bias = 0.0f;
+			float massScale = 1.0f;
+			float impulseScale = 0.0f;
+			if ( C > 0.0f )
+			{
+				// speculation
+				bias = C * context->inv_h;
+			}
+			else if ( useBias )
+			{
+				bias = context->jointSoftness.biasRate * C;
+				massScale = context->jointSoftness.massScale;
+				impulseScale = context->jointSoftness.impulseScale;
+			}
+
+			// sign flipped on Cdot
+			float Cdot = wA - wB;
+			float oldImpulse = joint->upperImpulse;
+			float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * oldImpulse;
+			joint->upperImpulse = b2MaxFloat( oldImpulse + impulse, 0.0f );
+			impulse = joint->upperImpulse - oldImpulse;
+
+			// sign flipped on applied impulse
+			wA += iA * impulse;
+			wB -= iB * impulse;
+		}
+	}
+
+	// Solve point-to-point constraint
+	{
+		// J = [-I -r1_skew I r2_skew]
+		// r_skew = [-ry; rx]
+		// K = [ mA+r1y^2*iA+mB+r2y^2*iB,  -r1y*iA*r1x-r2y*iB*r2x]
+		//     [  -r1y*iA*r1x-r2y*iB*r2x, mA+r1x^2*iA+mB+r2x^2*iB]
+
+		// current anchors
+		b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+		b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+		b2Vec2 Cdot = b2Sub( b2Add( vB, b2CrossSV( wB, rB ) ), b2Add( vA, b2CrossSV( wA, rA ) ) );
+
+		b2Vec2 bias = b2Vec2_zero;
+		float massScale = 1.0f;
+		float impulseScale = 0.0f;
+		if ( useBias )
+		{
+			b2Vec2 dcA = stateA->deltaPosition;
+			b2Vec2 dcB = stateB->deltaPosition;
+
+			b2Vec2 separation = b2Add( b2Add( b2Sub( dcB, dcA ), b2Sub( rB, rA ) ), joint->deltaCenter );
+			bias = b2MulSV( context->jointSoftness.biasRate, separation );
+			massScale = context->jointSoftness.massScale;
+			impulseScale = context->jointSoftness.impulseScale;
+		}
+
+		b2Mat22 K;
+		K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB;
+		K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB;
+		K.cx.y = K.cy.x;
+		K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB;
+		b2Vec2 b = b2Solve22( K, b2Add( Cdot, bias ) );
+
+		b2Vec2 impulse;
+		impulse.x = -massScale * b.x - impulseScale * joint->linearImpulse.x;
+		impulse.y = -massScale * b.y - impulseScale * joint->linearImpulse.y;
+		joint->linearImpulse.x += impulse.x;
+		joint->linearImpulse.y += impulse.y;
+
+		vA = b2MulSub( vA, mA, impulse );
+		wA -= iA * b2Cross( rA, impulse );
+		vB = b2MulAdd( vB, mB, impulse );
+		wB += iB * b2Cross( rB, impulse );
+	}
+
+	stateA->linearVelocity = vA;
+	stateA->angularVelocity = wA;
+	stateB->linearVelocity = vB;
+	stateB->angularVelocity = wB;
+}
+
+#if 0
+void b2RevoluteJoint::Dump()
+{
+	int32 indexA = joint->bodyA->joint->islandIndex;
+	int32 indexB = joint->bodyB->joint->islandIndex;
+
+	b2Dump("  b2RevoluteJointDef jd;\n");
+	b2Dump("  jd.bodyA = bodies[%d];\n", indexA);
+	b2Dump("  jd.bodyB = bodies[%d];\n", indexB);
+	b2Dump("  jd.collideConnected = bool(%d);\n", joint->collideConnected);
+	b2Dump("  jd.localAnchorA.Set(%.9g, %.9g);\n", joint->localAnchorA.x, joint->localAnchorA.y);
+	b2Dump("  jd.localAnchorB.Set(%.9g, %.9g);\n", joint->localAnchorB.x, joint->localAnchorB.y);
+	b2Dump("  jd.referenceAngle = %.9g;\n", joint->referenceAngle);
+	b2Dump("  jd.enableLimit = bool(%d);\n", joint->enableLimit);
+	b2Dump("  jd.lowerAngle = %.9g;\n", joint->lowerAngle);
+	b2Dump("  jd.upperAngle = %.9g;\n", joint->upperAngle);
+	b2Dump("  jd.enableMotor = bool(%d);\n", joint->enableMotor);
+	b2Dump("  jd.motorSpeed = %.9g;\n", joint->motorSpeed);
+	b2Dump("  jd.maxMotorTorque = %.9g;\n", joint->maxMotorTorque);
+	b2Dump("  joints[%d] = joint->world->CreateJoint(&jd);\n", joint->index);
+}
+#endif
+
+void b2DrawRevoluteJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB, float drawSize )
+{
+	B2_ASSERT( base->type == b2_revoluteJoint );
+
+	b2RevoluteJoint* joint = &base->revoluteJoint;
+
+	b2Vec2 pA = b2TransformPoint( transformA, base->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, base->localOriginAnchorB );
+
+	b2HexColor c1 = b2_colorGray;
+	b2HexColor c2 = b2_colorGreen;
+	b2HexColor c3 = b2_colorRed;
+
+	const float L = drawSize;
+	// draw->drawPoint(pA, 3.0f, b2_colorGray40, draw->context);
+	// draw->drawPoint(pB, 3.0f, b2_colorLightBlue, draw->context);
+	draw->DrawCircleFcn( pB, L, c1, draw->context );
+
+	float angle = b2RelativeAngle( transformB.q, transformA.q );
+
+	b2Rot rot = b2MakeRot( angle );
+	b2Vec2 r = { L * rot.c, L * rot.s };
+	b2Vec2 pC = b2Add( pB, r );
+	draw->DrawSegmentFcn( pB, pC, c1, draw->context );
+
+	if ( draw->drawJointExtras )
+	{
+		float jointAngle = b2UnwindAngle( angle - joint->referenceAngle );
+		char buffer[32];
+		snprintf( buffer, 32, " %.1f deg", 180.0f * jointAngle / B2_PI );
+		draw->DrawStringFcn( pC, buffer, b2_colorWhite, draw->context );
+	}
+
+	float lowerAngle = joint->lowerAngle + joint->referenceAngle;
+	float upperAngle = joint->upperAngle + joint->referenceAngle;
+
+	if ( joint->enableLimit )
+	{
+		b2Rot rotLo = b2MakeRot( lowerAngle );
+		b2Vec2 rlo = { L * rotLo.c, L * rotLo.s };
+
+		b2Rot rotHi = b2MakeRot( upperAngle );
+		b2Vec2 rhi = { L * rotHi.c, L * rotHi.s };
+
+		draw->DrawSegmentFcn( pB, b2Add( pB, rlo ), c2, draw->context );
+		draw->DrawSegmentFcn( pB, b2Add( pB, rhi ), c3, draw->context );
+
+		b2Rot rotRef = b2MakeRot( joint->referenceAngle );
+		b2Vec2 ref = ( b2Vec2 ){ L * rotRef.c, L * rotRef.s };
+		draw->DrawSegmentFcn( pB, b2Add( pB, ref ), b2_colorBlue, draw->context );
+	}
+
+	b2HexColor color = b2_colorGold;
+	draw->DrawSegmentFcn( transformA.p, pA, color, draw->context );
+	draw->DrawSegmentFcn( pA, pB, color, draw->context );
+	draw->DrawSegmentFcn( transformB.p, pB, color, draw->context );
+
+	// char buffer[32];
+	// sprintf(buffer, "%.1f", b2Length(joint->impulse));
+	// draw->DrawString(pA, buffer, draw->context);
+}
diff --git a/src/vendor/box2d/sensor.c b/src/vendor/box2d/sensor.c
new file mode 100644
index 0000000..3d203ed
--- /dev/null
+++ b/src/vendor/box2d/sensor.c
@@ -0,0 +1,389 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "sensor.h"
+
+#include "array.h"
+#include "body.h"
+#include "contact.h"
+#include "ctz.h"
+#include "shape.h"
+#include "world.h"
+
+#include "box2d/collision.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+B2_ARRAY_SOURCE( b2ShapeRef, b2ShapeRef )
+B2_ARRAY_SOURCE( b2Sensor, b2Sensor )
+B2_ARRAY_SOURCE( b2SensorTaskContext, b2SensorTaskContext )
+
+struct b2SensorQueryContext
+{
+	b2World* world;
+	b2SensorTaskContext* taskContext;
+	b2Sensor* sensor;
+	b2Shape* sensorShape;
+	b2Transform transform;
+};
+
+// Sensor shapes need to
+// - detect begin and end overlap events
+// - events must be reported in deterministic order
+// - maintain an active list of overlaps for query
+
+// Assumption
+// - sensors don't detect shapes on the same body
+
+// Algorithm
+// Query all sensors for overlaps
+// Check against previous overlaps
+
+// Data structures
+// Each sensor has an double buffered array of overlaps
+// These overlaps use a shape reference with index and generation
+
+static bool b2SensorQueryCallback( int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	struct b2SensorQueryContext* queryContext = context;
+	b2Shape* sensorShape = queryContext->sensorShape;
+	int sensorShapeId = sensorShape->id;
+
+	if ( shapeId == sensorShapeId )
+	{
+		return true;
+	}
+
+	b2World* world = queryContext->world;
+	b2Shape* otherShape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+	// Are sensor events enabled on the other shape?
+	if ( otherShape->enableSensorEvents == false )
+	{
+		return true;
+	}
+
+	// Skip shapes on the same body
+	if ( otherShape->bodyId == sensorShape->bodyId )
+	{
+		return true;
+	}
+
+	// Check filter
+	if ( b2ShouldShapesCollide( sensorShape->filter, otherShape->filter ) == false )
+	{
+		return true;
+	}
+
+	b2Transform otherTransform = b2GetBodyTransform( world, otherShape->bodyId );
+
+	b2DistanceInput input;
+	input.proxyA = b2MakeShapeDistanceProxy( sensorShape );
+	input.proxyB = b2MakeShapeDistanceProxy( otherShape );
+	input.transformA = queryContext->transform;
+	input.transformB = otherTransform;
+	input.useRadii = true;
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput output = b2ShapeDistance(&input, &cache, NULL, 0 );
+
+	bool overlaps = output.distance < 10.0f * FLT_EPSILON;
+	if ( overlaps == false )
+	{
+		return true;
+	}
+
+	// Record the overlap
+	b2Sensor* sensor = queryContext->sensor;
+	b2ShapeRef* shapeRef = b2ShapeRefArray_Add( &sensor->overlaps2 );
+	shapeRef->shapeId = shapeId;
+	shapeRef->generation = otherShape->generation;
+
+	return true;
+}
+
+static int b2CompareShapeRefs( const void* a, const void* b )
+{
+	const b2ShapeRef* sa = a;
+	const b2ShapeRef* sb = b;
+
+	if ( sa->shapeId < sb->shapeId )
+	{
+		return -1;
+	}
+
+	if ( sa->shapeId == sb->shapeId )
+	{
+		if ( sa->generation < sb->generation )
+		{
+			return -1;
+		}
+
+		if ( sa->generation == sb->generation )
+		{
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static void b2SensorTask( int startIndex, int endIndex, uint32_t threadIndex, void* context )
+{
+	b2TracyCZoneNC( sensor_task, "Overlap", b2_colorBrown, true );
+
+	b2World* world = context;
+	B2_ASSERT( (int)threadIndex < world->workerCount );
+	b2SensorTaskContext* taskContext = world->sensorTaskContexts.data + threadIndex;
+
+	B2_ASSERT( startIndex < endIndex );
+
+	b2DynamicTree* trees = world->broadPhase.trees;
+	for ( int sensorIndex = startIndex; sensorIndex < endIndex; ++sensorIndex )
+	{
+		b2Sensor* sensor = b2SensorArray_Get( &world->sensors, sensorIndex );
+		b2Shape* sensorShape = b2ShapeArray_Get( &world->shapes, sensor->shapeId );
+
+		// swap overlap arrays
+		b2ShapeRefArray temp = sensor->overlaps1;
+		sensor->overlaps1 = sensor->overlaps2;
+		sensor->overlaps2 = temp;
+		b2ShapeRefArray_Clear( &sensor->overlaps2 );
+
+		b2Body* body = b2BodyArray_Get( &world->bodies, sensorShape->bodyId );
+		if ( body->setIndex == b2_disabledSet || sensorShape->enableSensorEvents == false )
+		{
+			if ( sensor->overlaps1.count != 0 )
+			{
+				b2SetBit( &taskContext->eventBits, sensorIndex );
+			}
+			continue;
+		}
+
+		b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+		struct b2SensorQueryContext queryContext = {
+			.world = world,
+			.taskContext = taskContext,
+			.sensorShape = sensorShape,
+			.sensor = sensor,
+			.transform = transform,
+		};
+
+		B2_ASSERT( sensorShape->sensorIndex == sensorIndex );
+		b2AABB queryBounds = sensorShape->aabb;
+
+		// Query all trees
+		b2DynamicTree_Query( trees + 0, queryBounds, sensorShape->filter.maskBits, b2SensorQueryCallback, &queryContext );
+		b2DynamicTree_Query( trees + 1, queryBounds, sensorShape->filter.maskBits, b2SensorQueryCallback, &queryContext );
+		b2DynamicTree_Query( trees + 2, queryBounds, sensorShape->filter.maskBits, b2SensorQueryCallback, &queryContext );
+
+		// Sort the overlaps to enable finding begin and end events.
+		qsort( sensor->overlaps2.data, sensor->overlaps2.count, sizeof( b2ShapeRef ), b2CompareShapeRefs );
+
+		int count1 = sensor->overlaps1.count;
+		int count2 = sensor->overlaps2.count;
+		if ( count1 != count2 )
+		{
+			// something changed
+			b2SetBit( &taskContext->eventBits, sensorIndex );
+		}
+		else
+		{
+			for ( int i = 0; i < count1; ++i )
+			{
+				b2ShapeRef* s1 = sensor->overlaps1.data + i;
+				b2ShapeRef* s2 = sensor->overlaps2.data + i;
+
+				if ( s1->shapeId != s2->shapeId || s1->generation != s2->generation )
+				{
+					// something changed
+					b2SetBit( &taskContext->eventBits, sensorIndex );
+					break;
+				}
+			}
+		}
+	}
+
+	b2TracyCZoneEnd( sensor_task );
+}
+
+void b2OverlapSensors( b2World* world )
+{
+	int sensorCount = world->sensors.count;
+	if ( sensorCount == 0 )
+	{
+		return;
+	}
+
+	B2_ASSERT( world->workerCount > 0 );
+
+	b2TracyCZoneNC( overlap_sensors, "Sensors", b2_colorMediumPurple, true );
+
+	for ( int i = 0; i < world->workerCount; ++i )
+	{
+		b2SetBitCountAndClear( &world->sensorTaskContexts.data[i].eventBits, sensorCount );
+	}
+
+	// Parallel-for sensors overlaps
+	int minRange = 16;
+	void* userSensorTask = world->enqueueTaskFcn( &b2SensorTask, sensorCount, minRange, world, world->userTaskContext );
+	world->taskCount += 1;
+	if ( userSensorTask != NULL )
+	{
+		world->finishTaskFcn( userSensorTask, world->userTaskContext );
+	}
+
+	b2TracyCZoneNC( sensor_state, "Events", b2_colorLightSlateGray, true );
+
+	b2BitSet* bitSet = &world->sensorTaskContexts.data[0].eventBits;
+	for ( int i = 1; i < world->workerCount; ++i )
+	{
+		b2InPlaceUnion( bitSet, &world->sensorTaskContexts.data[i].eventBits );
+	}
+
+	// Iterate sensors bits and publish events
+	// Process contact state changes. Iterate over set bits
+	uint64_t* bits = bitSet->bits;
+	uint32_t blockCount = bitSet->blockCount;
+
+	for ( uint32_t k = 0; k < blockCount; ++k )
+	{
+		uint64_t word = bits[k];
+		while ( word != 0 )
+		{
+			uint32_t ctz = b2CTZ64( word );
+			int sensorIndex = (int)( 64 * k + ctz );
+
+			b2Sensor* sensor = b2SensorArray_Get( &world->sensors, sensorIndex );
+			b2Shape* sensorShape = b2ShapeArray_Get( &world->shapes, sensor->shapeId );
+			b2ShapeId sensorId = { sensor->shapeId + 1, world->worldId, sensorShape->generation };
+
+			int count1 = sensor->overlaps1.count;
+			int count2 = sensor->overlaps2.count;
+			const b2ShapeRef* refs1 = sensor->overlaps1.data;
+			const b2ShapeRef* refs2 = sensor->overlaps2.data;
+
+			// overlaps1 can have overlaps that end
+			// overlaps2 can have overlaps that begin
+			int index1 = 0, index2 = 0;
+			while ( index1 < count1 && index2 < count2 )
+			{
+				const b2ShapeRef* r1 = refs1 + index1;
+				const b2ShapeRef* r2 = refs2 + index2;
+				if ( r1->shapeId == r2->shapeId )
+				{
+					if ( r1->generation < r2->generation )
+					{
+						// end
+						b2ShapeId visitorId = { r1->shapeId + 1, world->worldId, r1->generation };
+						b2SensorEndTouchEvent event = {
+							.sensorShapeId = sensorId,
+							.visitorShapeId = visitorId,
+						};
+						b2SensorEndTouchEventArray_Push( &world->sensorEndEvents[world->endEventArrayIndex], event );
+						index1 += 1;
+					}
+					else if ( r1->generation > r2->generation )
+					{
+						// begin
+						b2ShapeId visitorId = { r2->shapeId + 1, world->worldId, r2->generation };
+						b2SensorBeginTouchEvent event = { sensorId, visitorId };
+						b2SensorBeginTouchEventArray_Push( &world->sensorBeginEvents, event );
+						index2 += 1;
+					}
+					else
+					{
+						// persisted
+						index1 += 1;
+						index2 += 1;
+					}
+				}
+				else if ( r1->shapeId < r2->shapeId )
+				{
+					// end
+					b2ShapeId visitorId = { r1->shapeId + 1, world->worldId, r1->generation };
+					b2SensorEndTouchEvent event = { sensorId, visitorId };
+					b2SensorEndTouchEventArray_Push( &world->sensorEndEvents[world->endEventArrayIndex], event );
+					index1 += 1;
+				}
+				else
+				{
+					// begin
+					b2ShapeId visitorId = { r2->shapeId + 1, world->worldId, r2->generation };
+					b2SensorBeginTouchEvent event = { sensorId, visitorId };
+					b2SensorBeginTouchEventArray_Push( &world->sensorBeginEvents, event );
+					index2 += 1;
+				}
+			}
+
+			while ( index1 < count1 )
+			{
+				// end
+				const b2ShapeRef* r1 = refs1 + index1;
+				b2ShapeId visitorId = { r1->shapeId + 1, world->worldId, r1->generation };
+				b2SensorEndTouchEvent event = { sensorId, visitorId };
+				b2SensorEndTouchEventArray_Push( &world->sensorEndEvents[world->endEventArrayIndex], event );
+				index1 += 1;
+			}
+
+			while ( index2 < count2 )
+			{
+				// begin
+				const b2ShapeRef* r2 = refs2 + index2;
+				b2ShapeId visitorId = { r2->shapeId + 1, world->worldId, r2->generation };
+				b2SensorBeginTouchEvent event = { sensorId, visitorId };
+				b2SensorBeginTouchEventArray_Push( &world->sensorBeginEvents, event );
+				index2 += 1;
+			}
+
+			// Clear the smallest set bit
+			word = word & ( word - 1 );
+		}
+	}
+
+	b2TracyCZoneEnd( sensor_state );
+	b2TracyCZoneEnd( overlap_sensors );
+}
+
+void b2DestroySensor( b2World* world, b2Shape* sensorShape )
+{
+	b2Sensor* sensor = b2SensorArray_Get( &world->sensors, sensorShape->sensorIndex );
+	for ( int i = 0; i < sensor->overlaps2.count; ++i )
+	{
+		b2ShapeRef* ref = sensor->overlaps2.data + i;
+		b2SensorEndTouchEvent event = {
+			.sensorShapeId =
+				{
+					.index1 = sensorShape->id + 1,
+					.generation = sensorShape->generation,
+					.world0 = world->worldId,
+				},
+			.visitorShapeId =
+				{
+					.index1 = ref->shapeId + 1,
+					.generation = ref->generation,
+					.world0 = world->worldId,
+				},
+		};
+
+		b2SensorEndTouchEventArray_Push( world->sensorEndEvents + world->endEventArrayIndex, event );
+	}
+
+	// Destroy sensor
+	b2ShapeRefArray_Destroy( &sensor->overlaps1 );
+	b2ShapeRefArray_Destroy( &sensor->overlaps2 );
+
+	int movedIndex = b2SensorArray_RemoveSwap( &world->sensors, sensorShape->sensorIndex );
+	if ( movedIndex != B2_NULL_INDEX )
+	{
+		// Fixup moved sensor
+		b2Sensor* movedSensor = b2SensorArray_Get( &world->sensors, sensorShape->sensorIndex );
+		b2Shape* otherSensorShape = b2ShapeArray_Get( &world->shapes, movedSensor->shapeId );
+		otherSensorShape->sensorIndex = sensorShape->sensorIndex;
+	}
+}
diff --git a/src/vendor/box2d/sensor.h b/src/vendor/box2d/sensor.h
new file mode 100644
index 0000000..6136d9e
--- /dev/null
+++ b/src/vendor/box2d/sensor.h
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+#include "bitset.h"
+
+typedef struct b2Shape b2Shape;
+typedef struct b2World b2World;
+
+typedef struct b2ShapeRef
+{
+	int shapeId;
+	uint16_t generation;
+} b2ShapeRef;
+
+typedef struct b2Sensor
+{
+	b2ShapeRefArray overlaps1;
+	b2ShapeRefArray overlaps2;
+	int shapeId;
+} b2Sensor;
+
+typedef struct b2SensorTaskContext
+{
+	b2BitSet eventBits;
+} b2SensorTaskContext;
+
+void b2OverlapSensors( b2World* world );
+
+void b2DestroySensor( b2World* world, b2Shape* sensorShape );
+
+B2_ARRAY_INLINE( b2ShapeRef, b2ShapeRef )
+B2_ARRAY_INLINE( b2Sensor, b2Sensor )
+B2_ARRAY_INLINE( b2SensorTaskContext, b2SensorTaskContext )
diff --git a/src/vendor/box2d/shape.c b/src/vendor/box2d/shape.c
new file mode 100644
index 0000000..a6cc913
--- /dev/null
+++ b/src/vendor/box2d/shape.c
@@ -0,0 +1,1714 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "shape.h"
+
+#include "body.h"
+#include "broad_phase.h"
+#include "contact.h"
+#include "sensor.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+#include <stddef.h>
+
+B2_ARRAY_SOURCE( b2ChainShape, b2ChainShape )
+B2_ARRAY_SOURCE( b2Shape, b2Shape )
+
+static b2Shape* b2GetShape( b2World* world, b2ShapeId shapeId )
+{
+	int id = shapeId.index1 - 1;
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, id );
+	B2_ASSERT( shape->id == id && shape->generation == shapeId.generation );
+	return shape;
+}
+
+static b2ChainShape* b2GetChainShape( b2World* world, b2ChainId chainId )
+{
+	int id = chainId.index1 - 1;
+	b2ChainShape* chain = b2ChainShapeArray_Get( &world->chainShapes, id );
+	B2_ASSERT( chain->id == id && chain->generation == chainId.generation );
+	return chain;
+}
+
+static void b2UpdateShapeAABBs( b2Shape* shape, b2Transform transform, b2BodyType proxyType )
+{
+	// Compute a bounding box with a speculative margin
+	const float speculativeDistance = B2_SPECULATIVE_DISTANCE;
+	const float aabbMargin = B2_AABB_MARGIN;
+
+	b2AABB aabb = b2ComputeShapeAABB( shape, transform );
+	aabb.lowerBound.x -= speculativeDistance;
+	aabb.lowerBound.y -= speculativeDistance;
+	aabb.upperBound.x += speculativeDistance;
+	aabb.upperBound.y += speculativeDistance;
+	shape->aabb = aabb;
+
+	// Smaller margin for static bodies. Cannot be zero due to TOI tolerance.
+	float margin = proxyType == b2_staticBody ? speculativeDistance : aabbMargin;
+	b2AABB fatAABB;
+	fatAABB.lowerBound.x = aabb.lowerBound.x - margin;
+	fatAABB.lowerBound.y = aabb.lowerBound.y - margin;
+	fatAABB.upperBound.x = aabb.upperBound.x + margin;
+	fatAABB.upperBound.y = aabb.upperBound.y + margin;
+	shape->fatAABB = fatAABB;
+}
+
+static b2Shape* b2CreateShapeInternal( b2World* world, b2Body* body, b2Transform transform, const b2ShapeDef* def,
+									   const void* geometry, b2ShapeType shapeType )
+{
+	int shapeId = b2AllocId( &world->shapeIdPool );
+
+	if ( shapeId == world->shapes.count )
+	{
+		b2ShapeArray_Push( &world->shapes, ( b2Shape ){ 0 } );
+	}
+	else
+	{
+		B2_ASSERT( world->shapes.data[shapeId].id == B2_NULL_INDEX );
+	}
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+	switch ( shapeType )
+	{
+		case b2_capsuleShape:
+			shape->capsule = *(const b2Capsule*)geometry;
+			break;
+
+		case b2_circleShape:
+			shape->circle = *(const b2Circle*)geometry;
+			break;
+
+		case b2_polygonShape:
+			shape->polygon = *(const b2Polygon*)geometry;
+			break;
+
+		case b2_segmentShape:
+			shape->segment = *(const b2Segment*)geometry;
+			break;
+
+		case b2_chainSegmentShape:
+			shape->chainSegment = *(const b2ChainSegment*)geometry;
+			break;
+
+		default:
+			B2_ASSERT( false );
+			break;
+	}
+
+	shape->id = shapeId;
+	shape->bodyId = body->id;
+	shape->type = shapeType;
+	shape->density = def->density;
+	shape->friction = def->material.friction;
+	shape->restitution = def->material.restitution;
+	shape->rollingResistance = def->material.rollingResistance;
+	shape->tangentSpeed = def->material.tangentSpeed;
+	shape->userMaterialId = def->material.userMaterialId;
+	shape->filter = def->filter;
+	shape->userData = def->userData;
+	shape->customColor = def->material.customColor;
+	shape->enlargedAABB = false;
+	shape->enableSensorEvents = def->enableSensorEvents;
+	shape->enableContactEvents = def->enableContactEvents;
+	shape->enableHitEvents = def->enableHitEvents;
+	shape->enablePreSolveEvents = def->enablePreSolveEvents;
+	shape->proxyKey = B2_NULL_INDEX;
+	shape->localCentroid = b2GetShapeCentroid( shape );
+	shape->aabb = ( b2AABB ){ b2Vec2_zero, b2Vec2_zero };
+	shape->fatAABB = ( b2AABB ){ b2Vec2_zero, b2Vec2_zero };
+	shape->generation += 1;
+
+	if ( body->setIndex != b2_disabledSet )
+	{
+		b2BodyType proxyType = body->type;
+		b2CreateShapeProxy( shape, &world->broadPhase, proxyType, transform, def->invokeContactCreation || def->isSensor );
+	}
+
+	// Add to shape doubly linked list
+	if ( body->headShapeId != B2_NULL_INDEX )
+	{
+		b2Shape* headShape = b2ShapeArray_Get( &world->shapes, body->headShapeId );
+		headShape->prevShapeId = shapeId;
+	}
+
+	shape->prevShapeId = B2_NULL_INDEX;
+	shape->nextShapeId = body->headShapeId;
+	body->headShapeId = shapeId;
+	body->shapeCount += 1;
+
+	if ( def->isSensor )
+	{
+		shape->sensorIndex = world->sensors.count;
+		b2Sensor sensor = {
+			.overlaps1 = b2ShapeRefArray_Create( 16 ),
+			.overlaps2 = b2ShapeRefArray_Create( 16 ),
+			.shapeId = shapeId,
+		};
+		b2SensorArray_Push( &world->sensors, sensor );
+	}
+	else
+	{
+		shape->sensorIndex = B2_NULL_INDEX;
+	}
+
+	b2ValidateSolverSets( world );
+
+	return shape;
+}
+
+static b2ShapeId b2CreateShape( b2BodyId bodyId, const b2ShapeDef* def, const void* geometry, b2ShapeType shapeType )
+{
+	B2_CHECK_DEF( def );
+	B2_ASSERT( b2IsValidFloat( def->density ) && def->density >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( def->material.friction ) && def->material.friction >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( def->material.restitution ) && def->material.restitution >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( def->material.rollingResistance ) && def->material.rollingResistance >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( def->material.tangentSpeed ) );
+
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return ( b2ShapeId ){ 0 };
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	b2Shape* shape = b2CreateShapeInternal( world, body, transform, def, geometry, shapeType );
+
+	if ( def->updateBodyMass == true )
+	{
+		b2UpdateBodyMassData( world, body );
+	}
+
+	b2ValidateSolverSets( world );
+
+	b2ShapeId id = { shape->id + 1, bodyId.world0, shape->generation };
+	return id;
+}
+
+b2ShapeId b2CreateCircleShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Circle* circle )
+{
+	return b2CreateShape( bodyId, def, circle, b2_circleShape );
+}
+
+b2ShapeId b2CreateCapsuleShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Capsule* capsule )
+{
+	float lengthSqr = b2DistanceSquared( capsule->center1, capsule->center2 );
+	if ( lengthSqr <= B2_LINEAR_SLOP * B2_LINEAR_SLOP )
+	{
+		b2Circle circle = { b2Lerp( capsule->center1, capsule->center2, 0.5f ), capsule->radius };
+		return b2CreateShape( bodyId, def, &circle, b2_circleShape );
+	}
+
+	return b2CreateShape( bodyId, def, capsule, b2_capsuleShape );
+}
+
+b2ShapeId b2CreatePolygonShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Polygon* polygon )
+{
+	B2_ASSERT( b2IsValidFloat( polygon->radius ) && polygon->radius >= 0.0f );
+	return b2CreateShape( bodyId, def, polygon, b2_polygonShape );
+}
+
+b2ShapeId b2CreateSegmentShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Segment* segment )
+{
+	float lengthSqr = b2DistanceSquared( segment->point1, segment->point2 );
+	if ( lengthSqr <= B2_LINEAR_SLOP * B2_LINEAR_SLOP )
+	{
+		B2_ASSERT( false );
+		return b2_nullShapeId;
+	}
+
+	return b2CreateShape( bodyId, def, segment, b2_segmentShape );
+}
+
+// Destroy a shape on a body. This doesn't need to be called when destroying a body.
+static void b2DestroyShapeInternal( b2World* world, b2Shape* shape, b2Body* body, bool wakeBodies )
+{
+	int shapeId = shape->id;
+
+	// Remove the shape from the body's doubly linked list.
+	if ( shape->prevShapeId != B2_NULL_INDEX )
+	{
+		b2Shape* prevShape = b2ShapeArray_Get( &world->shapes, shape->prevShapeId );
+		prevShape->nextShapeId = shape->nextShapeId;
+	}
+
+	if ( shape->nextShapeId != B2_NULL_INDEX )
+	{
+		b2Shape* nextShape = b2ShapeArray_Get( &world->shapes, shape->nextShapeId );
+		nextShape->prevShapeId = shape->prevShapeId;
+	}
+
+	if ( shapeId == body->headShapeId )
+	{
+		body->headShapeId = shape->nextShapeId;
+	}
+
+	body->shapeCount -= 1;
+
+	// Remove from broad-phase.
+	b2DestroyShapeProxy( shape, &world->broadPhase );
+
+	// Destroy any contacts associated with the shape.
+	int contactKey = body->headContactKey;
+	while ( contactKey != B2_NULL_INDEX )
+	{
+		int contactId = contactKey >> 1;
+		int edgeIndex = contactKey & 1;
+
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+		contactKey = contact->edges[edgeIndex].nextKey;
+
+		if ( contact->shapeIdA == shapeId || contact->shapeIdB == shapeId )
+		{
+			b2DestroyContact( world, contact, wakeBodies );
+		}
+	}
+
+	if ( shape->sensorIndex != B2_NULL_INDEX )
+	{
+		b2Sensor* sensor = b2SensorArray_Get( &world->sensors, shape->sensorIndex );
+		for ( int i = 0; i < sensor->overlaps2.count; ++i )
+		{
+			b2ShapeRef* ref = sensor->overlaps2.data + i;
+			b2SensorEndTouchEvent event = {
+				.sensorShapeId =
+					{
+						.index1 = shapeId + 1,
+						.generation = shape->generation,
+						.world0 = world->worldId,
+					},
+				.visitorShapeId =
+					{
+						.index1 = ref->shapeId + 1,
+						.generation = ref->generation,
+						.world0 = world->worldId,
+					},
+			};
+
+			b2SensorEndTouchEventArray_Push( world->sensorEndEvents + world->endEventArrayIndex, event );
+		}
+
+		// Destroy sensor
+		b2ShapeRefArray_Destroy( &sensor->overlaps1 );
+		b2ShapeRefArray_Destroy( &sensor->overlaps2 );
+
+		int movedIndex = b2SensorArray_RemoveSwap( &world->sensors, shape->sensorIndex );
+		if ( movedIndex != B2_NULL_INDEX )
+		{
+			// Fixup moved sensor
+			b2Sensor* movedSensor = b2SensorArray_Get( &world->sensors, shape->sensorIndex );
+			b2Shape* otherSensorShape = b2ShapeArray_Get( &world->shapes, movedSensor->shapeId );
+			otherSensorShape->sensorIndex = shape->sensorIndex;
+		}
+	}
+
+	// Return shape to free list.
+	b2FreeId( &world->shapeIdPool, shapeId );
+	shape->id = B2_NULL_INDEX;
+
+	b2ValidateSolverSets( world );
+}
+
+void b2DestroyShape( b2ShapeId shapeId, bool updateBodyMass )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+
+	// need to wake bodies because this might be a static body
+	bool wakeBodies = true;
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	b2DestroyShapeInternal( world, shape, body, wakeBodies );
+
+	if ( updateBodyMass == true )
+	{
+		b2UpdateBodyMassData( world, body );
+	}
+}
+
+b2ChainId b2CreateChain( b2BodyId bodyId, const b2ChainDef* def )
+{
+	B2_CHECK_DEF( def );
+	B2_ASSERT( def->count >= 4 );
+	B2_ASSERT( def->materialCount == 1 || def->materialCount == def->count );
+
+	b2World* world = b2GetWorldLocked( bodyId.world0 );
+	if ( world == NULL )
+	{
+		return ( b2ChainId ){ 0 };
+	}
+
+	b2Body* body = b2GetBodyFullId( world, bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	int chainId = b2AllocId( &world->chainIdPool );
+
+	if ( chainId == world->chainShapes.count )
+	{
+		b2ChainShapeArray_Push( &world->chainShapes, ( b2ChainShape ){ 0 } );
+	}
+	else
+	{
+		B2_ASSERT( world->chainShapes.data[chainId].id == B2_NULL_INDEX );
+	}
+
+	b2ChainShape* chainShape = b2ChainShapeArray_Get( &world->chainShapes, chainId );
+
+	chainShape->id = chainId;
+	chainShape->bodyId = body->id;
+	chainShape->nextChainId = body->headChainId;
+	chainShape->generation += 1;
+
+	int materialCount = def->materialCount;
+	chainShape->materialCount = materialCount;
+	chainShape->materials = b2Alloc( materialCount * sizeof( b2SurfaceMaterial ) );
+
+	for ( int i = 0; i < materialCount; ++i )
+	{
+		const b2SurfaceMaterial* material = def->materials + i;
+		B2_ASSERT( b2IsValidFloat( material->friction ) && material->friction >= 0.0f );
+		B2_ASSERT( b2IsValidFloat( material->restitution ) && material->restitution >= 0.0f );
+		B2_ASSERT( b2IsValidFloat( material->rollingResistance ) && material->rollingResistance >= 0.0f );
+		B2_ASSERT( b2IsValidFloat( material->tangentSpeed ) );
+
+		chainShape->materials[i] = *material;
+	}
+
+	body->headChainId = chainId;
+
+	b2ShapeDef shapeDef = b2DefaultShapeDef();
+	shapeDef.userData = def->userData;
+	shapeDef.filter = def->filter;
+	shapeDef.enableSensorEvents = def->enableSensorEvents;
+	shapeDef.enableContactEvents = false;
+	shapeDef.enableHitEvents = false;
+
+	const b2Vec2* points = def->points;
+	int n = def->count;
+
+	if ( def->isLoop )
+	{
+		chainShape->count = n;
+		chainShape->shapeIndices = b2Alloc( chainShape->count * sizeof( int ) );
+
+		b2ChainSegment chainSegment;
+
+		int prevIndex = n - 1;
+		for ( int i = 0; i < n - 2; ++i )
+		{
+			chainSegment.ghost1 = points[prevIndex];
+			chainSegment.segment.point1 = points[i];
+			chainSegment.segment.point2 = points[i + 1];
+			chainSegment.ghost2 = points[i + 2];
+			chainSegment.chainId = chainId;
+			prevIndex = i;
+
+			int materialIndex = materialCount == 1 ? 0 : i;
+			shapeDef.material = def->materials[materialIndex];
+
+			b2Shape* shape = b2CreateShapeInternal( world, body, transform, &shapeDef, &chainSegment, b2_chainSegmentShape );
+			chainShape->shapeIndices[i] = shape->id;
+		}
+
+		{
+			chainSegment.ghost1 = points[n - 3];
+			chainSegment.segment.point1 = points[n - 2];
+			chainSegment.segment.point2 = points[n - 1];
+			chainSegment.ghost2 = points[0];
+			chainSegment.chainId = chainId;
+
+			int materialIndex = materialCount == 1 ? 0 : n - 2;
+			shapeDef.material = def->materials[materialIndex];
+
+			b2Shape* shape = b2CreateShapeInternal( world, body, transform, &shapeDef, &chainSegment, b2_chainSegmentShape );
+			chainShape->shapeIndices[n - 2] = shape->id;
+		}
+
+		{
+			chainSegment.ghost1 = points[n - 2];
+			chainSegment.segment.point1 = points[n - 1];
+			chainSegment.segment.point2 = points[0];
+			chainSegment.ghost2 = points[1];
+			chainSegment.chainId = chainId;
+
+			int materialIndex = materialCount == 1 ? 0 : n - 1;
+			shapeDef.material = def->materials[materialIndex];
+
+			b2Shape* shape = b2CreateShapeInternal( world, body, transform, &shapeDef, &chainSegment, b2_chainSegmentShape );
+			chainShape->shapeIndices[n - 1] = shape->id;
+		}
+	}
+	else
+	{
+		chainShape->count = n - 3;
+		chainShape->shapeIndices = b2Alloc( chainShape->count * sizeof( int ) );
+
+		b2ChainSegment chainSegment;
+
+		for ( int i = 0; i < n - 3; ++i )
+		{
+			chainSegment.ghost1 = points[i];
+			chainSegment.segment.point1 = points[i + 1];
+			chainSegment.segment.point2 = points[i + 2];
+			chainSegment.ghost2 = points[i + 3];
+			chainSegment.chainId = chainId;
+
+			// Material is associated with leading point of solid segment
+			int materialIndex = materialCount == 1 ? 0 : i + 1;
+			shapeDef.material = def->materials[materialIndex];
+
+			b2Shape* shape = b2CreateShapeInternal( world, body, transform, &shapeDef, &chainSegment, b2_chainSegmentShape );
+			chainShape->shapeIndices[i] = shape->id;
+		}
+	}
+
+	b2ChainId id = { chainId + 1, world->worldId, chainShape->generation };
+	return id;
+}
+
+void b2FreeChainData(b2ChainShape* chain)
+{
+	b2Free( chain->shapeIndices, chain->count * sizeof( int ) );
+	chain->shapeIndices = NULL;
+
+	b2Free( chain->materials, chain->materialCount * sizeof( b2SurfaceMaterial ) );
+	chain->materials = NULL;
+}
+
+void b2DestroyChain( b2ChainId chainId )
+{
+	b2World* world = b2GetWorldLocked( chainId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2ChainShape* chain = b2GetChainShape( world, chainId );
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, chain->bodyId );
+
+	// Remove the chain from the body's singly linked list.
+	int* chainIdPtr = &body->headChainId;
+	bool found = false;
+	while ( *chainIdPtr != B2_NULL_INDEX )
+	{
+		if ( *chainIdPtr == chain->id )
+		{
+			*chainIdPtr = chain->nextChainId;
+			found = true;
+			break;
+		}
+
+		chainIdPtr = &( world->chainShapes.data[*chainIdPtr].nextChainId );
+	}
+
+	B2_ASSERT( found == true );
+	if ( found == false )
+	{
+		return;
+	}
+
+	int count = chain->count;
+	for ( int i = 0; i < count; ++i )
+	{
+		int shapeId = chain->shapeIndices[i];
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		bool wakeBodies = true;
+		b2DestroyShapeInternal( world, shape, body, wakeBodies );
+	}
+
+	b2FreeChainData( chain );
+
+	// Return chain to free list.
+	b2FreeId( &world->chainIdPool, chain->id );
+	chain->id = B2_NULL_INDEX;
+
+	b2ValidateSolverSets( world );
+}
+
+b2WorldId b2Chain_GetWorld( b2ChainId chainId )
+{
+	b2World* world = b2GetWorld( chainId.world0 );
+	return ( b2WorldId ){ chainId.world0 + 1, world->generation };
+}
+
+int b2Chain_GetSegmentCount( b2ChainId chainId )
+{
+	b2World* world = b2GetWorldLocked( chainId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2ChainShape* chain = b2GetChainShape( world, chainId );
+	return chain->count;
+}
+
+int b2Chain_GetSegments( b2ChainId chainId, b2ShapeId* segmentArray, int capacity )
+{
+	b2World* world = b2GetWorldLocked( chainId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2ChainShape* chain = b2GetChainShape( world, chainId );
+
+	int count = b2MinInt( chain->count, capacity );
+	for ( int i = 0; i < count; ++i )
+	{
+		int shapeId = chain->shapeIndices[i];
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		segmentArray[i] = ( b2ShapeId ){ shapeId + 1, chainId.world0, shape->generation };
+	}
+
+	return count;
+}
+
+b2AABB b2ComputeShapeAABB( const b2Shape* shape, b2Transform xf )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			return b2ComputeCapsuleAABB( &shape->capsule, xf );
+		case b2_circleShape:
+			return b2ComputeCircleAABB( &shape->circle, xf );
+		case b2_polygonShape:
+			return b2ComputePolygonAABB( &shape->polygon, xf );
+		case b2_segmentShape:
+			return b2ComputeSegmentAABB( &shape->segment, xf );
+		case b2_chainSegmentShape:
+			return b2ComputeSegmentAABB( &shape->chainSegment.segment, xf );
+		default:
+		{
+			B2_ASSERT( false );
+			b2AABB empty = { xf.p, xf.p };
+			return empty;
+		}
+	}
+}
+
+b2Vec2 b2GetShapeCentroid( const b2Shape* shape )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			return b2Lerp( shape->capsule.center1, shape->capsule.center2, 0.5f );
+		case b2_circleShape:
+			return shape->circle.center;
+		case b2_polygonShape:
+			return shape->polygon.centroid;
+		case b2_segmentShape:
+			return b2Lerp( shape->segment.point1, shape->segment.point2, 0.5f );
+		case b2_chainSegmentShape:
+			return b2Lerp( shape->chainSegment.segment.point1, shape->chainSegment.segment.point2, 0.5f );
+		default:
+			return b2Vec2_zero;
+	}
+}
+
+// todo_erin maybe compute this on shape creation
+float b2GetShapePerimeter( const b2Shape* shape )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			return 2.0f * b2Length( b2Sub( shape->capsule.center1, shape->capsule.center2 ) ) +
+				   2.0f * B2_PI * shape->capsule.radius;
+		case b2_circleShape:
+			return 2.0f * B2_PI * shape->circle.radius;
+		case b2_polygonShape:
+		{
+			const b2Vec2* points = shape->polygon.vertices;
+			int count = shape->polygon.count;
+			float perimeter = 2.0f * B2_PI * shape->polygon.radius;
+			B2_ASSERT( count > 0 );
+			b2Vec2 prev = points[count - 1];
+			for ( int i = 0; i < count; ++i )
+			{
+				b2Vec2 next = points[i];
+				perimeter += b2Length( b2Sub( next, prev ) );
+				prev = next;
+			}
+
+			return perimeter;
+		}
+		case b2_segmentShape:
+			return 2.0f * b2Length( b2Sub( shape->segment.point1, shape->segment.point2 ) );
+		case b2_chainSegmentShape:
+			return 2.0f * b2Length( b2Sub( shape->chainSegment.segment.point1, shape->chainSegment.segment.point2 ) );
+		default:
+			return 0.0f;
+	}
+}
+
+// This projects the shape perimeter onto an infinite line
+float b2GetShapeProjectedPerimeter( const b2Shape* shape, b2Vec2 line )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+		{
+			b2Vec2 axis = b2Sub( shape->capsule.center2, shape->capsule.center1 );
+			float projectedLength = b2AbsFloat( b2Dot( axis, line ) );
+			return projectedLength + 2.0f * shape->capsule.radius;
+		}
+
+		case b2_circleShape:
+			return 2.0f * shape->circle.radius;
+
+		case b2_polygonShape:
+		{
+			const b2Vec2* points = shape->polygon.vertices;
+			int count = shape->polygon.count;
+			B2_ASSERT( count > 0 );
+			float value = b2Dot( points[0], line );
+			float lower = value;
+			float upper = value;
+			for ( int i = 1; i < count; ++i )
+			{
+				value = b2Dot( points[i], line );
+				lower = b2MinFloat( lower, value );
+				upper = b2MaxFloat( upper, value );
+			}
+
+			return ( upper - lower ) + 2.0f * shape->polygon.radius;
+		}
+
+		case b2_segmentShape:
+		{
+			float value1 = b2Dot( shape->segment.point1, line );
+			float value2 = b2Dot( shape->segment.point2, line );
+			return b2AbsFloat( value2 - value1 );
+		}
+
+		case b2_chainSegmentShape:
+		{
+			float value1 = b2Dot( shape->chainSegment.segment.point1, line );
+			float value2 = b2Dot( shape->chainSegment.segment.point2, line );
+			return b2AbsFloat( value2 - value1 );
+		}
+
+		default:
+			return 0.0f;
+	}
+}
+
+b2MassData b2ComputeShapeMass( const b2Shape* shape )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			return b2ComputeCapsuleMass( &shape->capsule, shape->density );
+		case b2_circleShape:
+			return b2ComputeCircleMass( &shape->circle, shape->density );
+		case b2_polygonShape:
+			return b2ComputePolygonMass( &shape->polygon, shape->density );
+		default:
+			return ( b2MassData ){ 0 };
+	}
+}
+
+b2ShapeExtent b2ComputeShapeExtent( const b2Shape* shape, b2Vec2 localCenter )
+{
+	b2ShapeExtent extent = { 0 };
+
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+		{
+			float radius = shape->capsule.radius;
+			extent.minExtent = radius;
+			b2Vec2 c1 = b2Sub( shape->capsule.center1, localCenter );
+			b2Vec2 c2 = b2Sub( shape->capsule.center2, localCenter );
+			extent.maxExtent = sqrtf( b2MaxFloat( b2LengthSquared( c1 ), b2LengthSquared( c2 ) ) ) + radius;
+		}
+		break;
+
+		case b2_circleShape:
+		{
+			float radius = shape->circle.radius;
+			extent.minExtent = radius;
+			extent.maxExtent = b2Length( b2Sub( shape->circle.center, localCenter ) ) + radius;
+		}
+		break;
+
+		case b2_polygonShape:
+		{
+			const b2Polygon* poly = &shape->polygon;
+			float minExtent = B2_HUGE;
+			float maxExtentSqr = 0.0f;
+			int count = poly->count;
+			for ( int i = 0; i < count; ++i )
+			{
+				b2Vec2 v = poly->vertices[i];
+				float planeOffset = b2Dot( poly->normals[i], b2Sub( v, poly->centroid ) );
+				minExtent = b2MinFloat( minExtent, planeOffset );
+
+				float distanceSqr = b2LengthSquared( b2Sub( v, localCenter ) );
+				maxExtentSqr = b2MaxFloat( maxExtentSqr, distanceSqr );
+			}
+
+			extent.minExtent = minExtent + poly->radius;
+			extent.maxExtent = sqrtf( maxExtentSqr ) + poly->radius;
+		}
+		break;
+
+		case b2_segmentShape:
+		{
+			extent.minExtent = 0.0f;
+			b2Vec2 c1 = b2Sub( shape->segment.point1, localCenter );
+			b2Vec2 c2 = b2Sub( shape->segment.point2, localCenter );
+			extent.maxExtent = sqrtf( b2MaxFloat( b2LengthSquared( c1 ), b2LengthSquared( c2 ) ) );
+		}
+		break;
+
+		case b2_chainSegmentShape:
+		{
+			extent.minExtent = 0.0f;
+			b2Vec2 c1 = b2Sub( shape->chainSegment.segment.point1, localCenter );
+			b2Vec2 c2 = b2Sub( shape->chainSegment.segment.point2, localCenter );
+			extent.maxExtent = sqrtf( b2MaxFloat( b2LengthSquared( c1 ), b2LengthSquared( c2 ) ) );
+		}
+		break;
+
+		default:
+			break;
+	}
+
+	return extent;
+}
+
+b2CastOutput b2RayCastShape( const b2RayCastInput* input, const b2Shape* shape, b2Transform transform )
+{
+	b2RayCastInput localInput = *input;
+	localInput.origin = b2InvTransformPoint( transform, input->origin );
+	localInput.translation = b2InvRotateVector( transform.q, input->translation );
+
+	b2CastOutput output = { 0 };
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			output = b2RayCastCapsule( &localInput, &shape->capsule );
+			break;
+		case b2_circleShape:
+			output = b2RayCastCircle( &localInput, &shape->circle );
+			break;
+		case b2_polygonShape:
+			output = b2RayCastPolygon( &localInput, &shape->polygon );
+			break;
+		case b2_segmentShape:
+			output = b2RayCastSegment( &localInput, &shape->segment, false );
+			break;
+		case b2_chainSegmentShape:
+			output = b2RayCastSegment( &localInput, &shape->chainSegment.segment, true );
+			break;
+		default:
+			return output;
+	}
+
+	output.point = b2TransformPoint( transform, output.point );
+	output.normal = b2RotateVector( transform.q, output.normal );
+	return output;
+}
+
+b2CastOutput b2ShapeCastShape( const b2ShapeCastInput* input, const b2Shape* shape, b2Transform transform )
+{
+	b2ShapeCastInput localInput = *input;
+
+	for ( int i = 0; i < localInput.proxy.count; ++i )
+	{
+		localInput.proxy.points[i] = b2InvTransformPoint( transform, input->proxy.points[i] );
+	}
+
+	localInput.translation = b2InvRotateVector( transform.q, input->translation );
+
+	b2CastOutput output = { 0 };
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			output = b2ShapeCastCapsule( &localInput, &shape->capsule );
+			break;
+		case b2_circleShape:
+			output = b2ShapeCastCircle( &localInput, &shape->circle );
+			break;
+		case b2_polygonShape:
+			output = b2ShapeCastPolygon( &localInput, &shape->polygon );
+			break;
+		case b2_segmentShape:
+			output = b2ShapeCastSegment( &localInput, &shape->segment );
+			break;
+		case b2_chainSegmentShape:
+			output = b2ShapeCastSegment( &localInput, &shape->chainSegment.segment );
+			break;
+		default:
+			return output;
+	}
+
+	output.point = b2TransformPoint( transform, output.point );
+	output.normal = b2RotateVector( transform.q, output.normal );
+	return output;
+}
+
+b2PlaneResult b2CollideMover( const b2Shape* shape, b2Transform transform, const b2Capsule* mover )
+{
+	b2Capsule localMover;
+	localMover.center1 = b2InvTransformPoint( transform, mover->center1 );
+	localMover.center2 = b2InvTransformPoint( transform, mover->center2 );
+	localMover.radius = mover->radius;
+
+	b2PlaneResult result = { 0 };
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			result = b2CollideMoverAndCapsule( &shape->capsule, &localMover );
+			break;
+		case b2_circleShape:
+			result = b2CollideMoverAndCircle( &shape->circle, &localMover );
+			break;
+		case b2_polygonShape:
+			result = b2CollideMoverAndPolygon( &shape->polygon, &localMover );
+			break;
+		case b2_segmentShape:
+			result = b2CollideMoverAndSegment( &shape->segment, &localMover );
+			break;
+		case b2_chainSegmentShape:
+			result = b2CollideMoverAndSegment( &shape->chainSegment.segment, &localMover );
+			break;
+		default:
+			return result;
+	}
+
+	if (result.hit == false)
+	{
+		return result;
+	}
+
+	result.plane.normal = b2RotateVector( transform.q, result.plane.normal );
+	return result;
+}
+
+void b2CreateShapeProxy( b2Shape* shape, b2BroadPhase* bp, b2BodyType type, b2Transform transform, bool forcePairCreation )
+{
+	B2_ASSERT( shape->proxyKey == B2_NULL_INDEX );
+
+	b2UpdateShapeAABBs( shape, transform, type );
+
+	// Create proxies in the broad-phase.
+	shape->proxyKey =
+		b2BroadPhase_CreateProxy( bp, type, shape->fatAABB, shape->filter.categoryBits, shape->id, forcePairCreation );
+	B2_ASSERT( B2_PROXY_TYPE( shape->proxyKey ) < b2_bodyTypeCount );
+}
+
+void b2DestroyShapeProxy( b2Shape* shape, b2BroadPhase* bp )
+{
+	if ( shape->proxyKey != B2_NULL_INDEX )
+	{
+		b2BroadPhase_DestroyProxy( bp, shape->proxyKey );
+		shape->proxyKey = B2_NULL_INDEX;
+	}
+}
+
+b2ShapeProxy b2MakeShapeDistanceProxy( const b2Shape* shape )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			return b2MakeProxy( &shape->capsule.center1, 2, shape->capsule.radius );
+		case b2_circleShape:
+			return b2MakeProxy( &shape->circle.center, 1, shape->circle.radius );
+		case b2_polygonShape:
+			return b2MakeProxy( shape->polygon.vertices, shape->polygon.count, shape->polygon.radius );
+		case b2_segmentShape:
+			return b2MakeProxy( &shape->segment.point1, 2, 0.0f );
+		case b2_chainSegmentShape:
+			return b2MakeProxy( &shape->chainSegment.segment.point1, 2, 0.0f );
+		default:
+		{
+			B2_ASSERT( false );
+			b2ShapeProxy empty = { 0 };
+			return empty;
+		}
+	}
+}
+
+b2BodyId b2Shape_GetBody( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return b2MakeBodyId( world, shape->bodyId );
+}
+
+b2WorldId b2Shape_GetWorld( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	return ( b2WorldId ){ shapeId.world0 + 1, world->generation };
+}
+
+void b2Shape_SetUserData( b2ShapeId shapeId, void* userData )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->userData = userData;
+}
+
+void* b2Shape_GetUserData( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->userData;
+}
+
+bool b2Shape_IsSensor( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->sensorIndex != B2_NULL_INDEX;
+}
+
+bool b2Shape_TestPoint( b2ShapeId shapeId, b2Vec2 point )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+
+	b2Transform transform = b2GetBodyTransform( world, shape->bodyId );
+	b2Vec2 localPoint = b2InvTransformPoint( transform, point );
+
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			return b2PointInCapsule( localPoint, &shape->capsule );
+
+		case b2_circleShape:
+			return b2PointInCircle( localPoint, &shape->circle );
+
+		case b2_polygonShape:
+			return b2PointInPolygon( localPoint, &shape->polygon );
+
+		default:
+			return false;
+	}
+}
+
+// todo_erin untested
+b2CastOutput b2Shape_RayCast( b2ShapeId shapeId, const b2RayCastInput* input )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+
+	b2Transform transform = b2GetBodyTransform( world, shape->bodyId );
+
+	// input in local coordinates
+	b2RayCastInput localInput;
+	localInput.origin = b2InvTransformPoint( transform, input->origin );
+	localInput.translation = b2InvRotateVector( transform.q, input->translation );
+	localInput.maxFraction = input->maxFraction;
+
+	b2CastOutput output = { 0 };
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			output = b2RayCastCapsule( &localInput, &shape->capsule );
+			break;
+
+		case b2_circleShape:
+			output = b2RayCastCircle( &localInput, &shape->circle );
+			break;
+
+		case b2_segmentShape:
+			output = b2RayCastSegment( &localInput, &shape->segment, false );
+			break;
+
+		case b2_polygonShape:
+			output = b2RayCastPolygon( &localInput, &shape->polygon );
+			break;
+
+		case b2_chainSegmentShape:
+			output = b2RayCastSegment( &localInput, &shape->chainSegment.segment, true );
+			break;
+
+		default:
+			B2_ASSERT( false );
+			return output;
+	}
+
+	if ( output.hit )
+	{
+		// convert to world coordinates
+		output.normal = b2RotateVector( transform.q, output.normal );
+		output.point = b2TransformPoint( transform, output.point );
+	}
+
+	return output;
+}
+
+void b2Shape_SetDensity( b2ShapeId shapeId, float density, bool updateBodyMass )
+{
+	B2_ASSERT( b2IsValidFloat( density ) && density >= 0.0f );
+
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	if ( density == shape->density )
+	{
+		// early return to avoid expensive function
+		return;
+	}
+
+	shape->density = density;
+
+	if ( updateBodyMass == true )
+	{
+		b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+		b2UpdateBodyMassData( world, body );
+	}
+}
+
+float b2Shape_GetDensity( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->density;
+}
+
+void b2Shape_SetFriction( b2ShapeId shapeId, float friction )
+{
+	B2_ASSERT( b2IsValidFloat( friction ) && friction >= 0.0f );
+
+	b2World* world = b2GetWorld( shapeId.world0 );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->friction = friction;
+}
+
+float b2Shape_GetFriction( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->friction;
+}
+
+void b2Shape_SetRestitution( b2ShapeId shapeId, float restitution )
+{
+	B2_ASSERT( b2IsValidFloat( restitution ) && restitution >= 0.0f );
+
+	b2World* world = b2GetWorld( shapeId.world0 );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->restitution = restitution;
+}
+
+float b2Shape_GetRestitution( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->restitution;
+}
+
+void b2Shape_SetMaterial( b2ShapeId shapeId, int material )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->userMaterialId = material;
+}
+
+int b2Shape_GetMaterial( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->userMaterialId;
+}
+
+b2Filter b2Shape_GetFilter( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->filter;
+}
+
+static void b2ResetProxy( b2World* world, b2Shape* shape, bool wakeBodies, bool destroyProxy )
+{
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+
+	int shapeId = shape->id;
+
+	// destroy all contacts associated with this shape
+	int contactKey = body->headContactKey;
+	while ( contactKey != B2_NULL_INDEX )
+	{
+		int contactId = contactKey >> 1;
+		int edgeIndex = contactKey & 1;
+
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+		contactKey = contact->edges[edgeIndex].nextKey;
+
+		if ( contact->shapeIdA == shapeId || contact->shapeIdB == shapeId )
+		{
+			b2DestroyContact( world, contact, wakeBodies );
+		}
+	}
+
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	if ( shape->proxyKey != B2_NULL_INDEX )
+	{
+		b2BodyType proxyType = B2_PROXY_TYPE( shape->proxyKey );
+		b2UpdateShapeAABBs( shape, transform, proxyType );
+
+		if ( destroyProxy )
+		{
+			b2BroadPhase_DestroyProxy( &world->broadPhase, shape->proxyKey );
+
+			bool forcePairCreation = true;
+			shape->proxyKey = b2BroadPhase_CreateProxy( &world->broadPhase, proxyType, shape->fatAABB, shape->filter.categoryBits,
+														shapeId, forcePairCreation );
+		}
+		else
+		{
+			b2BroadPhase_MoveProxy( &world->broadPhase, shape->proxyKey, shape->fatAABB );
+		}
+	}
+	else
+	{
+		b2BodyType proxyType = body->type;
+		b2UpdateShapeAABBs( shape, transform, proxyType );
+	}
+
+	b2ValidateSolverSets( world );
+}
+
+void b2Shape_SetFilter( b2ShapeId shapeId, b2Filter filter )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	if ( filter.maskBits == shape->filter.maskBits && filter.categoryBits == shape->filter.categoryBits &&
+		 filter.groupIndex == shape->filter.groupIndex )
+	{
+		return;
+	}
+
+	// If the category bits change, I need to destroy the proxy because it affects the tree sorting.
+	bool destroyProxy = filter.categoryBits != shape->filter.categoryBits;
+
+	shape->filter = filter;
+
+	// need to wake bodies because a filter change may destroy contacts
+	bool wakeBodies = true;
+	b2ResetProxy( world, shape, wakeBodies, destroyProxy );
+
+	// note: this does not immediately update sensor overlaps. Instead sensor
+	// overlaps are updated the next time step
+}
+
+void b2Shape_EnableSensorEvents( b2ShapeId shapeId, bool flag )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->enableSensorEvents = flag;
+}
+
+bool b2Shape_AreSensorEventsEnabled( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->enableSensorEvents;
+}
+
+void b2Shape_EnableContactEvents( b2ShapeId shapeId, bool flag )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->enableContactEvents = flag;
+}
+
+bool b2Shape_AreContactEventsEnabled( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->enableContactEvents;
+}
+
+void b2Shape_EnablePreSolveEvents( b2ShapeId shapeId, bool flag )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->enablePreSolveEvents = flag;
+}
+
+bool b2Shape_ArePreSolveEventsEnabled( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->enablePreSolveEvents;
+}
+
+void b2Shape_EnableHitEvents( b2ShapeId shapeId, bool flag )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->enableHitEvents = flag;
+}
+
+bool b2Shape_AreHitEventsEnabled( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->enableHitEvents;
+}
+
+b2ShapeType b2Shape_GetType( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->type;
+}
+
+b2Circle b2Shape_GetCircle( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	B2_ASSERT( shape->type == b2_circleShape );
+	return shape->circle;
+}
+
+b2Segment b2Shape_GetSegment( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	B2_ASSERT( shape->type == b2_segmentShape );
+	return shape->segment;
+}
+
+b2ChainSegment b2Shape_GetChainSegment( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	B2_ASSERT( shape->type == b2_chainSegmentShape );
+	return shape->chainSegment;
+}
+
+b2Capsule b2Shape_GetCapsule( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	B2_ASSERT( shape->type == b2_capsuleShape );
+	return shape->capsule;
+}
+
+b2Polygon b2Shape_GetPolygon( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	B2_ASSERT( shape->type == b2_polygonShape );
+	return shape->polygon;
+}
+
+void b2Shape_SetCircle( b2ShapeId shapeId, const b2Circle* circle )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->circle = *circle;
+	shape->type = b2_circleShape;
+
+	// need to wake bodies so they can react to the shape change
+	bool wakeBodies = true;
+	bool destroyProxy = true;
+	b2ResetProxy( world, shape, wakeBodies, destroyProxy );
+}
+
+void b2Shape_SetCapsule( b2ShapeId shapeId, const b2Capsule* capsule )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->capsule = *capsule;
+	shape->type = b2_capsuleShape;
+
+	// need to wake bodies so they can react to the shape change
+	bool wakeBodies = true;
+	bool destroyProxy = true;
+	b2ResetProxy( world, shape, wakeBodies, destroyProxy );
+}
+
+void b2Shape_SetSegment( b2ShapeId shapeId, const b2Segment* segment )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->segment = *segment;
+	shape->type = b2_segmentShape;
+
+	// need to wake bodies so they can react to the shape change
+	bool wakeBodies = true;
+	bool destroyProxy = true;
+	b2ResetProxy( world, shape, wakeBodies, destroyProxy );
+}
+
+void b2Shape_SetPolygon( b2ShapeId shapeId, const b2Polygon* polygon )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	shape->polygon = *polygon;
+	shape->type = b2_polygonShape;
+
+	// need to wake bodies so they can react to the shape change
+	bool wakeBodies = true;
+	bool destroyProxy = true;
+	b2ResetProxy( world, shape, wakeBodies, destroyProxy );
+}
+
+b2ChainId b2Shape_GetParentChain( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	b2Shape* shape = b2GetShape( world, shapeId );
+	if ( shape->type == b2_chainSegmentShape )
+	{
+		int chainId = shape->chainSegment.chainId;
+		if ( chainId != B2_NULL_INDEX )
+		{
+			b2ChainShape* chain = b2ChainShapeArray_Get( &world->chainShapes, chainId );
+			b2ChainId id = { chainId + 1, shapeId.world0, chain->generation };
+			return id;
+		}
+	}
+
+	return ( b2ChainId ){ 0 };
+}
+
+void b2Chain_SetFriction( b2ChainId chainId, float friction )
+{
+	B2_ASSERT( b2IsValidFloat( friction ) && friction >= 0.0f );
+
+	b2World* world = b2GetWorldLocked( chainId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2ChainShape* chainShape = b2GetChainShape( world, chainId );
+
+	int materialCount = chainShape->materialCount;
+	for ( int i = 0; i < materialCount; ++i )
+	{
+		chainShape->materials[i].friction = friction;
+	}
+
+	int count = chainShape->count;
+
+	for ( int i = 0; i < count; ++i )
+	{
+		int shapeId = chainShape->shapeIndices[i];
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shape->friction = friction;
+	}
+}
+
+float b2Chain_GetFriction( b2ChainId chainId )
+{
+	b2World* world = b2GetWorld( chainId.world0 );
+	b2ChainShape* chainShape = b2GetChainShape( world, chainId );
+	return chainShape->materials[0].friction;
+}
+
+void b2Chain_SetRestitution( b2ChainId chainId, float restitution )
+{
+	B2_ASSERT( b2IsValidFloat( restitution ) );
+
+	b2World* world = b2GetWorldLocked( chainId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2ChainShape* chainShape = b2GetChainShape( world, chainId );
+
+	int materialCount = chainShape->materialCount;
+	for ( int i = 0; i < materialCount; ++i )
+	{
+		chainShape->materials[i].restitution = restitution;
+	}
+
+	int count = chainShape->count;
+
+	for ( int i = 0; i < count; ++i )
+	{
+		int shapeId = chainShape->shapeIndices[i];
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shape->restitution = restitution;
+	}
+}
+
+float b2Chain_GetRestitution( b2ChainId chainId )
+{
+	b2World* world = b2GetWorld( chainId.world0 );
+	b2ChainShape* chainShape = b2GetChainShape( world, chainId );
+	return chainShape->materials[0].restitution;
+}
+
+void b2Chain_SetMaterial( b2ChainId chainId, int material )
+{
+	b2World* world = b2GetWorldLocked( chainId.world0 );
+	if ( world == NULL )
+	{
+		return;
+	}
+
+	b2ChainShape* chainShape = b2GetChainShape( world, chainId );
+	int materialCount = chainShape->materialCount;
+	for ( int i = 0; i < materialCount; ++i )
+	{
+		chainShape->materials[i].userMaterialId = material;
+	}
+
+	int count = chainShape->count;
+
+	for ( int i = 0; i < count; ++i )
+	{
+		int shapeId = chainShape->shapeIndices[i];
+		b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shape->userMaterialId = material;
+	}
+}
+
+int b2Chain_GetMaterial( b2ChainId chainId )
+{
+	b2World* world = b2GetWorld( chainId.world0 );
+	b2ChainShape* chainShape = b2GetChainShape( world, chainId );
+	return chainShape->materials[0].userMaterialId;
+}
+
+int b2Shape_GetContactCapacity( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	if ( shape->sensorIndex != B2_NULL_INDEX )
+	{
+		return 0;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+
+	// Conservative and fast
+	return body->contactCount;
+}
+
+int b2Shape_GetContactData( b2ShapeId shapeId, b2ContactData* contactData, int capacity )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	if ( shape->sensorIndex != B2_NULL_INDEX )
+	{
+		return 0;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	int contactKey = body->headContactKey;
+	int index = 0;
+	while ( contactKey != B2_NULL_INDEX && index < capacity )
+	{
+		int contactId = contactKey >> 1;
+		int edgeIndex = contactKey & 1;
+
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+
+		// Does contact involve this shape and is it touching?
+		if ( ( contact->shapeIdA == shapeId.index1 - 1 || contact->shapeIdB == shapeId.index1 - 1 ) &&
+			 ( contact->flags & b2_contactTouchingFlag ) != 0 )
+		{
+			b2Shape* shapeA = world->shapes.data + contact->shapeIdA;
+			b2Shape* shapeB = world->shapes.data + contact->shapeIdB;
+
+			contactData[index].shapeIdA = ( b2ShapeId ){ shapeA->id + 1, shapeId.world0, shapeA->generation };
+			contactData[index].shapeIdB = ( b2ShapeId ){ shapeB->id + 1, shapeId.world0, shapeB->generation };
+
+			b2ContactSim* contactSim = b2GetContactSim( world, contact );
+			contactData[index].manifold = contactSim->manifold;
+			index += 1;
+		}
+
+		contactKey = contact->edges[edgeIndex].nextKey;
+	}
+
+	B2_ASSERT( index <= capacity );
+
+	return index;
+}
+
+int b2Shape_GetSensorCapacity( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	if ( shape->sensorIndex == B2_NULL_INDEX )
+	{
+		return 0;
+	}
+
+	b2Sensor* sensor = b2SensorArray_Get( &world->sensors, shape->sensorIndex );
+	return sensor->overlaps2.count;
+}
+
+int b2Shape_GetSensorOverlaps( b2ShapeId shapeId, b2ShapeId* overlaps, int capacity )
+{
+	b2World* world = b2GetWorldLocked( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return 0;
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	if ( shape->sensorIndex == B2_NULL_INDEX )
+	{
+		return 0;
+	}
+
+	b2Sensor* sensor = b2SensorArray_Get( &world->sensors, shape->sensorIndex );
+
+	int count = b2MinInt( sensor->overlaps2.count, capacity );
+	b2ShapeRef* refs = sensor->overlaps2.data;
+	for ( int i = 0; i < count; ++i )
+	{
+		overlaps[i] = ( b2ShapeId ){
+			.index1 = refs[i].shapeId + 1,
+			.generation = refs[i].generation,
+			.world0 = shapeId.world0,
+		};
+	}
+
+	return count;
+}
+
+b2AABB b2Shape_GetAABB( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return ( b2AABB ){ 0 };
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return shape->aabb;
+}
+
+b2MassData b2Shape_GetMassData( b2ShapeId shapeId )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return ( b2MassData ){ 0 };
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	return b2ComputeShapeMass( shape );
+}
+
+b2Vec2 b2Shape_GetClosestPoint( b2ShapeId shapeId, b2Vec2 target )
+{
+	b2World* world = b2GetWorld( shapeId.world0 );
+	if ( world == NULL )
+	{
+		return ( b2Vec2 ){ 0 };
+	}
+
+	b2Shape* shape = b2GetShape( world, shapeId );
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	b2DistanceInput input;
+	input.proxyA = b2MakeShapeDistanceProxy( shape );
+	input.proxyB = b2MakeProxy( &target, 1, 0.0f );
+	input.transformA = transform;
+	input.transformB = b2Transform_identity;
+	input.useRadii = true;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput output = b2ShapeDistance(&input, &cache, NULL, 0 );
+
+	return output.pointA;
+}
diff --git a/src/vendor/box2d/shape.h b/src/vendor/box2d/shape.h
new file mode 100644
index 0000000..9dbb0be
--- /dev/null
+++ b/src/vendor/box2d/shape.h
@@ -0,0 +1,123 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+
+#include "box2d/types.h"
+
+typedef struct b2BroadPhase b2BroadPhase;
+typedef struct b2World b2World;
+
+typedef struct b2Shape
+{
+	int id;
+	int bodyId;
+	int prevShapeId;
+	int nextShapeId;
+	int sensorIndex;
+	b2ShapeType type;
+	float density;
+	float friction;
+	float restitution;
+	float rollingResistance;
+	float tangentSpeed;
+	int userMaterialId;
+
+	b2AABB aabb;
+	b2AABB fatAABB;
+	b2Vec2 localCentroid;
+	int proxyKey;
+
+	b2Filter filter;
+	void* userData;
+	uint32_t customColor;
+
+	union
+	{
+		b2Capsule capsule;
+		b2Circle circle;
+		b2Polygon polygon;
+		b2Segment segment;
+		b2ChainSegment chainSegment;
+	};
+
+	uint16_t generation;
+	bool enableSensorEvents;
+	bool enableContactEvents;
+	bool enableHitEvents;
+	bool enablePreSolveEvents;
+	bool enlargedAABB;
+} b2Shape;
+
+typedef struct b2ChainShape
+{
+	int id;
+	int bodyId;
+	int nextChainId;
+	int count;
+	int materialCount;
+	int* shapeIndices;
+	b2SurfaceMaterial* materials;
+	uint16_t generation;
+} b2ChainShape;
+
+typedef struct b2ShapeExtent
+{
+	float minExtent;
+	float maxExtent;
+} b2ShapeExtent;
+
+// Sensors are shapes that live in the broad-phase but never have contacts.
+// At the end of the time step all sensors are queried for overlap with any other shapes.
+// Sensors ignore body type and sleeping.
+// Sensors generate events when there is a new overlap or and overlap disappears.
+// The sensor overlaps don't get cleared until the next time step regardless of the overlapped
+// shapes being destroyed.
+// When a sensor is destroyed.
+typedef struct
+{
+	b2IntArray overlaps;
+} b2SensorOverlaps;
+
+void b2CreateShapeProxy( b2Shape* shape, b2BroadPhase* bp, b2BodyType type, b2Transform transform, bool forcePairCreation );
+void b2DestroyShapeProxy( b2Shape* shape, b2BroadPhase* bp );
+
+void b2FreeChainData( b2ChainShape* chain );
+
+b2MassData b2ComputeShapeMass( const b2Shape* shape );
+b2ShapeExtent b2ComputeShapeExtent( const b2Shape* shape, b2Vec2 localCenter );
+b2AABB b2ComputeShapeAABB( const b2Shape* shape, b2Transform transform );
+b2Vec2 b2GetShapeCentroid( const b2Shape* shape );
+float b2GetShapePerimeter( const b2Shape* shape );
+float b2GetShapeProjectedPerimeter( const b2Shape* shape, b2Vec2 line );
+
+b2ShapeProxy b2MakeShapeDistanceProxy( const b2Shape* shape );
+
+b2CastOutput b2RayCastShape( const b2RayCastInput* input, const b2Shape* shape, b2Transform transform );
+b2CastOutput b2ShapeCastShape( const b2ShapeCastInput* input, const b2Shape* shape, b2Transform transform );
+
+b2PlaneResult b2CollideMoverAndCircle( const b2Circle* shape, const b2Capsule* mover );
+b2PlaneResult b2CollideMoverAndCapsule( const b2Capsule* shape, const b2Capsule* mover );
+b2PlaneResult b2CollideMoverAndPolygon( const b2Polygon* shape, const b2Capsule* mover );
+b2PlaneResult b2CollideMoverAndSegment( const b2Segment* shape, const b2Capsule* mover );
+b2PlaneResult b2CollideMover( const b2Shape* shape, b2Transform transform, const b2Capsule* mover );
+
+static inline float b2GetShapeRadius( const b2Shape* shape )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+			return shape->capsule.radius;
+		case b2_circleShape:
+			return shape->circle.radius;
+		case b2_polygonShape:
+			return shape->polygon.radius;
+		default:
+			return 0.0f;
+	}
+}
+
+B2_ARRAY_INLINE( b2ChainShape, b2ChainShape )
+B2_ARRAY_INLINE( b2Shape, b2Shape )
diff --git a/src/vendor/box2d/solver.c b/src/vendor/box2d/solver.c
new file mode 100644
index 0000000..4f009a8
--- /dev/null
+++ b/src/vendor/box2d/solver.c
@@ -0,0 +1,2038 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "solver.h"
+
+#include "array.h"
+#include "atomic.h"
+#include "bitset.h"
+#include "body.h"
+#include "contact.h"
+#include "contact_solver.h"
+#include "core.h"
+#include "ctz.h"
+#include "island.h"
+#include "joint.h"
+#include "shape.h"
+#include "solver_set.h"
+#include "arena_allocator.h"
+#include "world.h"
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+// Compare to SDL_CPUPauseInstruction
+#if ( defined( __GNUC__ ) || defined( __clang__ ) ) && ( defined( __i386__ ) || defined( __x86_64__ ) )
+static inline void b2Pause( void )
+{
+	__asm__ __volatile__( "pause\n" );
+}
+#elif ( defined( __arm__ ) && defined( __ARM_ARCH ) && __ARM_ARCH >= 7 ) || defined( __aarch64__ )
+static inline void b2Pause( void )
+{
+	__asm__ __volatile__( "yield" ::: "memory" );
+}
+#elif defined( _MSC_VER ) && ( defined( _M_IX86 ) || defined( _M_X64 ) )
+//#include <immintrin.h>
+static inline void b2Pause( void )
+{
+	_mm_pause();
+}
+#elif defined( _MSC_VER ) && ( defined( _M_ARM ) || defined( _M_ARM64 ) )
+static inline void b2Pause( void )
+{
+	__yield();
+}
+#else
+static inline void b2Pause( void )
+{
+}
+#endif
+
+typedef struct b2WorkerContext
+{
+	b2StepContext* context;
+	int workerIndex;
+	void* userTask;
+} b2WorkerContext;
+
+// Integrate velocities and apply damping
+static void b2IntegrateVelocitiesTask( int startIndex, int endIndex, b2StepContext* context )
+{
+	b2TracyCZoneNC( integrate_velocity, "IntVel", b2_colorDeepPink, true );
+
+	b2BodyState* states = context->states;
+	b2BodySim* sims = context->sims;
+
+	b2Vec2 gravity = context->world->gravity;
+	float h = context->h;
+	float maxLinearSpeed = context->maxLinearVelocity;
+	float maxAngularSpeed = B2_MAX_ROTATION * context->inv_dt;
+	float maxLinearSpeedSquared = maxLinearSpeed * maxLinearSpeed;
+	float maxAngularSpeedSquared = maxAngularSpeed * maxAngularSpeed;
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2BodySim* sim = sims + i;
+		b2BodyState* state = states + i;
+
+		b2Vec2 v = state->linearVelocity;
+		float w = state->angularVelocity;
+
+		// Apply forces, torque, gravity, and damping
+		// Apply damping.
+		// Differential equation: dv/dt + c * v = 0
+		// Solution: v(t) = v0 * exp(-c * t)
+		// Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v(t) * exp(-c * dt)
+		// v2 = exp(-c * dt) * v1
+		// Pade approximation:
+		// v2 = v1 * 1 / (1 + c * dt)
+		float linearDamping = 1.0f / ( 1.0f + h * sim->linearDamping );
+		float angularDamping = 1.0f / ( 1.0f + h * sim->angularDamping );
+
+		// Gravity scale will be zero for kinematic bodies
+		float gravityScale = sim->invMass > 0.0f ? sim->gravityScale : 0.0f;
+
+		// lvd = h * im * f + h * g
+		b2Vec2 linearVelocityDelta = b2Add( b2MulSV( h * sim->invMass, sim->force ), b2MulSV( h * gravityScale, gravity ) );
+		float angularVelocityDelta = h * sim->invInertia * sim->torque;
+
+		v = b2MulAdd( linearVelocityDelta, linearDamping, v );
+		w = angularVelocityDelta + angularDamping * w;
+
+		// Clamp to max linear speed
+		if ( b2Dot( v, v ) > maxLinearSpeedSquared )
+		{
+			float ratio = maxLinearSpeed / b2Length( v );
+			v = b2MulSV( ratio, v );
+			sim->isSpeedCapped = true;
+		}
+
+		// Clamp to max angular speed
+		if ( w * w > maxAngularSpeedSquared && sim->allowFastRotation == false )
+		{
+			float ratio = maxAngularSpeed / b2AbsFloat( w );
+			w *= ratio;
+			sim->isSpeedCapped = true;
+		}
+
+		state->linearVelocity = v;
+		state->angularVelocity = w;
+	}
+
+	b2TracyCZoneEnd( integrate_velocity );
+}
+
+static void b2PrepareJointsTask( int startIndex, int endIndex, b2StepContext* context )
+{
+	b2TracyCZoneNC( prepare_joints, "PrepJoints", b2_colorOldLace, true );
+
+	b2JointSim** joints = context->joints;
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2JointSim* joint = joints[i];
+		b2PrepareJoint( joint, context );
+	}
+
+	b2TracyCZoneEnd( prepare_joints );
+}
+
+static void b2WarmStartJointsTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex )
+{
+	b2TracyCZoneNC( warm_joints, "WarmJoints", b2_colorGold, true );
+
+	b2GraphColor* color = context->graph->colors + colorIndex;
+	b2JointSim* joints = color->jointSims.data;
+	B2_ASSERT( 0 <= startIndex && startIndex < color->jointSims.count );
+	B2_ASSERT( startIndex <= endIndex && endIndex <= color->jointSims.count );
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2JointSim* joint = joints + i;
+		b2WarmStartJoint( joint, context );
+	}
+
+	b2TracyCZoneEnd( warm_joints );
+}
+
+static void b2SolveJointsTask( int startIndex, int endIndex, b2StepContext* context, int colorIndex, bool useBias )
+{
+	b2TracyCZoneNC( solve_joints, "SolveJoints", b2_colorLemonChiffon, true );
+
+	b2GraphColor* color = context->graph->colors + colorIndex;
+	b2JointSim* joints = color->jointSims.data;
+	B2_ASSERT( 0 <= startIndex && startIndex < color->jointSims.count );
+	B2_ASSERT( startIndex <= endIndex && endIndex <= color->jointSims.count );
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2JointSim* joint = joints + i;
+		b2SolveJoint( joint, context, useBias );
+	}
+
+	b2TracyCZoneEnd( solve_joints );
+}
+
+static void b2IntegratePositionsTask( int startIndex, int endIndex, b2StepContext* context )
+{
+	b2TracyCZoneNC( integrate_positions, "IntPos", b2_colorDarkSeaGreen, true );
+
+	b2BodyState* states = context->states;
+	float h = context->h;
+
+	B2_ASSERT( startIndex <= endIndex );
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		b2BodyState* state = states + i;
+		state->deltaRotation = b2IntegrateRotation( state->deltaRotation, h * state->angularVelocity );
+		state->deltaPosition = b2MulAdd( state->deltaPosition, h, state->linearVelocity );
+	}
+
+	b2TracyCZoneEnd( integrate_positions );
+}
+
+struct b2ContinuousContext
+{
+	b2World* world;
+	b2BodySim* fastBodySim;
+	b2Shape* fastShape;
+	b2Vec2 centroid1, centroid2;
+	b2Sweep sweep;
+	float fraction;
+};
+
+// This is called from b2DynamicTree_Query for continuous collision
+static bool b2ContinuousQueryCallback( int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	struct b2ContinuousContext* continuousContext = context;
+	b2Shape* fastShape = continuousContext->fastShape;
+	b2BodySim* fastBodySim = continuousContext->fastBodySim;
+
+	// Skip same shape
+	if ( shapeId == fastShape->id )
+	{
+		return true;
+	}
+
+	b2World* world = continuousContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+	// Skip same body
+	if ( shape->bodyId == fastShape->bodyId )
+	{
+		return true;
+	}
+
+	// Skip sensors
+	if ( shape->sensorIndex != B2_NULL_INDEX )
+	{
+		return true;
+	}
+
+	// Skip filtered shapes
+	bool canCollide = b2ShouldShapesCollide( fastShape->filter, shape->filter );
+	if ( canCollide == false )
+	{
+		return true;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+
+	b2BodySim* bodySim = b2GetBodySim( world, body );
+	B2_ASSERT( body->type == b2_staticBody || fastBodySim->isBullet );
+
+	// Skip bullets
+	if ( bodySim->isBullet )
+	{
+		return true;
+	}
+
+	// Skip filtered bodies
+	b2Body* fastBody = b2BodyArray_Get( &world->bodies, fastBodySim->bodyId );
+	canCollide = b2ShouldBodiesCollide( world, fastBody, body );
+	if ( canCollide == false )
+	{
+		return true;
+	}
+
+	// Custom user filtering
+	b2CustomFilterFcn* customFilterFcn = world->customFilterFcn;
+	if ( customFilterFcn != NULL )
+	{
+		b2ShapeId idA = { shape->id + 1, world->worldId, shape->generation };
+		b2ShapeId idB = { fastShape->id + 1, world->worldId, fastShape->generation };
+		canCollide = customFilterFcn( idA, idB, world->customFilterContext );
+		if ( canCollide == false )
+		{
+			return true;
+		}
+	}
+
+	// Prevent pausing on chain segment junctions
+	if ( shape->type == b2_chainSegmentShape )
+	{
+		b2Transform transform = bodySim->transform;
+		b2Vec2 p1 = b2TransformPoint( transform, shape->chainSegment.segment.point1 );
+		b2Vec2 p2 = b2TransformPoint( transform, shape->chainSegment.segment.point2 );
+		b2Vec2 e = b2Sub( p2, p1 );
+		float length;
+		e = b2GetLengthAndNormalize( &length, e );
+		if (length > B2_LINEAR_SLOP)
+		{
+			b2Vec2 c1 = continuousContext->centroid1;
+			float offset1 = b2Cross( b2Sub( c1, p1 ), e );
+			b2Vec2 c2 = continuousContext->centroid2;
+			float offset2 = b2Cross( b2Sub( c2, p1 ), e );
+
+			// todo this should use the min extent of the fast shape, not the body
+			const float allowedFraction = 0.25f;
+			if ( offset1 < 0.0f || offset1 - offset2 < allowedFraction * fastBodySim->minExtent )
+			{
+				// Minimal clipping
+				return true;
+			}
+		}
+	}
+
+	// todo_erin testing early out for segments
+#if 0
+	if ( shape->type == b2_segmentShape )
+	{
+		b2Transform transform = bodySim->transform;
+		b2Vec2 p1 = b2TransformPoint( transform, shape->segment.point1 );
+		b2Vec2 p2 = b2TransformPoint( transform, shape->segment.point2 );
+		b2Vec2 e = b2Sub( p2, p1 );
+		b2Vec2 c1 = continuousContext->centroid1;
+		b2Vec2 c2 = continuousContext->centroid2;
+		float offset1 = b2Cross( b2Sub( c1, p1 ), e );
+		float offset2 = b2Cross( b2Sub( c2, p1 ), e );
+
+		if ( offset1 > 0.0f && offset2 > 0.0f )
+		{
+			// Started behind or finished in front
+			return true;
+		}
+
+		if ( offset1 < 0.0f && offset2 < 0.0f )
+		{
+			// Started behind or finished in front
+			return true;
+		}
+	}
+#endif
+
+	b2TOIInput input;
+	input.proxyA = b2MakeShapeDistanceProxy( shape );
+	input.proxyB = b2MakeShapeDistanceProxy( fastShape );
+	input.sweepA = b2MakeSweep( bodySim );
+	input.sweepB = continuousContext->sweep;
+	input.maxFraction = continuousContext->fraction;
+
+	float hitFraction = continuousContext->fraction;
+
+	bool didHit = false;
+	b2TOIOutput output = b2TimeOfImpact( &input );
+	if ( 0.0f < output.fraction && output.fraction < continuousContext->fraction )
+	{
+		hitFraction = output.fraction;
+		didHit = true;
+	}
+	else if ( 0.0f == output.fraction )
+	{
+		// fallback to TOI of a small circle around the fast shape centroid
+		b2Vec2 centroid = b2GetShapeCentroid( fastShape );
+		b2ShapeExtent extent = b2ComputeShapeExtent( fastShape, centroid );
+		float radius = 0.25f * extent.minExtent;
+		input.proxyB = b2MakeProxy( &centroid, 1, radius );
+		output = b2TimeOfImpact( &input );
+		if ( 0.0f < output.fraction && output.fraction < continuousContext->fraction )
+		{
+			hitFraction = output.fraction;
+			didHit = true;
+		}
+	}
+
+	if ( didHit && ( shape->enablePreSolveEvents || fastShape->enablePreSolveEvents ) )
+	{
+		// Pre-solve is expensive because I need to compute a temporary manifold
+		b2Transform transformA = b2GetSweepTransform( &input.sweepA, hitFraction );
+		b2Transform transformB = b2GetSweepTransform( &input.sweepB, hitFraction );
+		b2Manifold manifold = b2ComputeManifold( shape, transformA, fastShape, transformB );
+		b2ShapeId shapeIdA = { shape->id + 1, world->worldId, shape->generation };
+		b2ShapeId shapeIdB = { fastShape->id + 1, world->worldId, fastShape->generation };
+
+		// The user may modify the temporary manifold here but it doesn't matter. They will be able to
+		// modify the real manifold in the discrete solver.
+		didHit = world->preSolveFcn( shapeIdA, shapeIdB, &manifold, world->preSolveContext );
+	}
+
+	if ( didHit )
+	{
+		continuousContext->fraction = hitFraction;
+	}
+
+	return true;
+}
+
+// Continuous collision of dynamic versus static
+static void b2SolveContinuous( b2World* world, int bodySimIndex )
+{
+	b2TracyCZoneNC( ccd, "CCD", b2_colorDarkGoldenRod, true );
+
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	b2BodySim* fastBodySim = b2BodySimArray_Get( &awakeSet->bodySims, bodySimIndex );
+	B2_ASSERT( fastBodySim->isFast );
+
+	b2Sweep sweep = b2MakeSweep( fastBodySim );
+
+	b2Transform xf1;
+	xf1.q = sweep.q1;
+	xf1.p = b2Sub( sweep.c1, b2RotateVector( sweep.q1, sweep.localCenter ) );
+
+	b2Transform xf2;
+	xf2.q = sweep.q2;
+	xf2.p = b2Sub( sweep.c2, b2RotateVector( sweep.q2, sweep.localCenter ) );
+
+	b2DynamicTree* staticTree = world->broadPhase.trees + b2_staticBody;
+	b2DynamicTree* kinematicTree = world->broadPhase.trees + b2_kinematicBody;
+	b2DynamicTree* dynamicTree = world->broadPhase.trees + b2_dynamicBody;
+	b2Body* fastBody = b2BodyArray_Get( &world->bodies, fastBodySim->bodyId );
+
+	struct b2ContinuousContext context;
+	context.world = world;
+	context.sweep = sweep;
+	context.fastBodySim = fastBodySim;
+	context.fraction = 1.0f;
+
+	bool isBullet = fastBodySim->isBullet;
+
+	int shapeId = fastBody->headShapeId;
+	while ( shapeId != B2_NULL_INDEX )
+	{
+		b2Shape* fastShape = b2ShapeArray_Get( &world->shapes, shapeId );
+		shapeId = fastShape->nextShapeId;
+
+		context.fastShape = fastShape;
+		context.centroid1 = b2TransformPoint( xf1, fastShape->localCentroid );
+		context.centroid2 = b2TransformPoint( xf2, fastShape->localCentroid );
+
+		b2AABB box1 = fastShape->aabb;
+		b2AABB box2 = b2ComputeShapeAABB( fastShape, xf2 );
+		b2AABB box = b2AABB_Union( box1, box2 );
+
+		// Store this to avoid double computation in the case there is no impact event
+		fastShape->aabb = box2;
+
+		// No continuous collision for sensors (but still need the updated bounds)
+		if ( fastShape->sensorIndex != B2_NULL_INDEX )
+		{
+			continue;
+		}
+
+		b2DynamicTree_Query( staticTree, box, B2_DEFAULT_MASK_BITS, b2ContinuousQueryCallback, &context );
+
+		if ( isBullet )
+		{
+			b2DynamicTree_Query( kinematicTree, box, B2_DEFAULT_MASK_BITS, b2ContinuousQueryCallback, &context );
+			b2DynamicTree_Query( dynamicTree, box, B2_DEFAULT_MASK_BITS, b2ContinuousQueryCallback, &context );
+		}
+	}
+
+	const float speculativeDistance = B2_SPECULATIVE_DISTANCE;
+	const float aabbMargin = B2_AABB_MARGIN;
+
+	if ( context.fraction < 1.0f )
+	{
+		// Handle time of impact event
+		b2Rot q = b2NLerp( sweep.q1, sweep.q2, context.fraction );
+		b2Vec2 c = b2Lerp( sweep.c1, sweep.c2, context.fraction );
+		b2Vec2 origin = b2Sub( c, b2RotateVector( q, sweep.localCenter ) );
+
+		// Advance body
+		b2Transform transform = { origin, q };
+		fastBodySim->transform = transform;
+		fastBodySim->center = c;
+		fastBodySim->rotation0 = q;
+		fastBodySim->center0 = c;
+
+		// Update body move event
+		b2BodyMoveEvent* event = b2BodyMoveEventArray_Get( &world->bodyMoveEvents, bodySimIndex );
+		event->transform = transform;
+
+		// Prepare AABBs for broad-phase.
+		// Even though a body is fast, it may not move much. So the
+		// AABB may not need enlargement.
+
+		shapeId = fastBody->headShapeId;
+		while ( shapeId != B2_NULL_INDEX )
+		{
+			b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+			// Must recompute aabb at the interpolated transform
+			b2AABB aabb = b2ComputeShapeAABB( shape, transform );
+			aabb.lowerBound.x -= speculativeDistance;
+			aabb.lowerBound.y -= speculativeDistance;
+			aabb.upperBound.x += speculativeDistance;
+			aabb.upperBound.y += speculativeDistance;
+			shape->aabb = aabb;
+
+			if ( b2AABB_Contains( shape->fatAABB, aabb ) == false )
+			{
+				b2AABB fatAABB;
+				fatAABB.lowerBound.x = aabb.lowerBound.x - aabbMargin;
+				fatAABB.lowerBound.y = aabb.lowerBound.y - aabbMargin;
+				fatAABB.upperBound.x = aabb.upperBound.x + aabbMargin;
+				fatAABB.upperBound.y = aabb.upperBound.y + aabbMargin;
+				shape->fatAABB = fatAABB;
+
+				shape->enlargedAABB = true;
+				fastBodySim->enlargeAABB = true;
+			}
+
+			shapeId = shape->nextShapeId;
+		}
+	}
+	else
+	{
+		// No time of impact event
+
+		// Advance body
+		fastBodySim->rotation0 = fastBodySim->transform.q;
+		fastBodySim->center0 = fastBodySim->center;
+
+		// Prepare AABBs for broad-phase
+		shapeId = fastBody->headShapeId;
+		while ( shapeId != B2_NULL_INDEX )
+		{
+			b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+			// shape->aabb is still valid from above
+
+			if ( b2AABB_Contains( shape->fatAABB, shape->aabb ) == false )
+			{
+				b2AABB fatAABB;
+				fatAABB.lowerBound.x = shape->aabb.lowerBound.x - aabbMargin;
+				fatAABB.lowerBound.y = shape->aabb.lowerBound.y - aabbMargin;
+				fatAABB.upperBound.x = shape->aabb.upperBound.x + aabbMargin;
+				fatAABB.upperBound.y = shape->aabb.upperBound.y + aabbMargin;
+				shape->fatAABB = fatAABB;
+
+				shape->enlargedAABB = true;
+				fastBodySim->enlargeAABB = true;
+			}
+
+			shapeId = shape->nextShapeId;
+		}
+	}
+
+	b2TracyCZoneEnd( ccd );
+}
+
+static void b2FinalizeBodiesTask( int startIndex, int endIndex, uint32_t threadIndex, void* context )
+{
+	b2TracyCZoneNC( finalize_transfprms, "Transforms", b2_colorMediumSeaGreen, true );
+
+	b2StepContext* stepContext = context;
+	b2World* world = stepContext->world;
+	bool enableSleep = world->enableSleep;
+	b2BodyState* states = stepContext->states;
+	b2BodySim* sims = stepContext->sims;
+	b2Body* bodies = world->bodies.data;
+	float timeStep = stepContext->dt;
+	float invTimeStep = stepContext->inv_dt;
+
+	uint16_t worldId = world->worldId;
+
+	// The body move event array should already have the correct size
+	B2_ASSERT( endIndex <= world->bodyMoveEvents.count );
+	b2BodyMoveEvent* moveEvents = world->bodyMoveEvents.data;
+
+	b2BitSet* enlargedSimBitSet = &world->taskContexts.data[threadIndex].enlargedSimBitSet;
+	b2BitSet* awakeIslandBitSet = &world->taskContexts.data[threadIndex].awakeIslandBitSet;
+	b2TaskContext* taskContext = world->taskContexts.data + threadIndex;
+
+	bool enableContinuous = world->enableContinuous;
+
+	const float speculativeDistance = B2_SPECULATIVE_DISTANCE;
+	const float aabbMargin = B2_AABB_MARGIN;
+
+	B2_ASSERT( startIndex <= endIndex );
+
+	for ( int simIndex = startIndex; simIndex < endIndex; ++simIndex )
+	{
+		b2BodyState* state = states + simIndex;
+		b2BodySim* sim = sims + simIndex;
+
+		b2Vec2 v = state->linearVelocity;
+		float w = state->angularVelocity;
+
+		B2_ASSERT( b2IsValidVec2( v ) );
+		B2_ASSERT( b2IsValidFloat( w ) );
+
+		sim->center = b2Add( sim->center, state->deltaPosition );
+		sim->transform.q = b2NormalizeRot( b2MulRot( state->deltaRotation, sim->transform.q ) );
+
+		// Use the velocity of the farthest point on the body to account for rotation.
+		float maxVelocity = b2Length( v ) + b2AbsFloat( w ) * sim->maxExtent;
+
+		// Sleep needs to observe position correction as well as true velocity.
+		float maxDeltaPosition = b2Length( state->deltaPosition ) + b2AbsFloat( state->deltaRotation.s ) * sim->maxExtent;
+
+		// Position correction is not as important for sleep as true velocity.
+		float positionSleepFactor = 0.5f;
+
+		float sleepVelocity = b2MaxFloat( maxVelocity, positionSleepFactor * invTimeStep * maxDeltaPosition );
+
+		// reset state deltas
+		state->deltaPosition = b2Vec2_zero;
+		state->deltaRotation = b2Rot_identity;
+
+		sim->transform.p = b2Sub( sim->center, b2RotateVector( sim->transform.q, sim->localCenter ) );
+
+		// cache miss here, however I need the shape list below
+		b2Body* body = bodies + sim->bodyId;
+		body->bodyMoveIndex = simIndex;
+		moveEvents[simIndex].transform = sim->transform;
+		moveEvents[simIndex].bodyId = ( b2BodyId ){ sim->bodyId + 1, worldId, body->generation };
+		moveEvents[simIndex].userData = body->userData;
+		moveEvents[simIndex].fellAsleep = false;
+
+		// reset applied force and torque
+		sim->force = b2Vec2_zero;
+		sim->torque = 0.0f;
+
+		body->isSpeedCapped = sim->isSpeedCapped;
+		sim->isSpeedCapped = false;
+
+		sim->isFast = false;
+
+		if ( enableSleep == false || body->enableSleep == false || sleepVelocity > body->sleepThreshold )
+		{
+			// Body is not sleepy
+			body->sleepTime = 0.0f;
+
+			if ( body->type == b2_dynamicBody && enableContinuous && maxVelocity * timeStep > 0.5f * sim->minExtent )
+			{
+				// This flag is only retained for debug draw
+				sim->isFast = true;
+
+				// Store in fast array for the continuous collision stage
+				// This is deterministic because the order of TOI sweeps doesn't matter
+				if ( sim->isBullet )
+				{
+					int bulletIndex = b2AtomicFetchAddInt( &stepContext->bulletBodyCount, 1 );
+					stepContext->bulletBodies[bulletIndex] = simIndex;
+				}
+				else
+				{
+					b2SolveContinuous( world, simIndex );
+				}
+			}
+			else
+			{
+				// Body is safe to advance
+				sim->center0 = sim->center;
+				sim->rotation0 = sim->transform.q;
+			}
+		}
+		else
+		{
+			// Body is safe to advance and is falling asleep
+			sim->center0 = sim->center;
+			sim->rotation0 = sim->transform.q;
+			body->sleepTime += timeStep;
+		}
+
+		// Any single body in an island can keep it awake
+		b2Island* island = b2IslandArray_Get( &world->islands, body->islandId );
+		if ( body->sleepTime < B2_TIME_TO_SLEEP )
+		{
+			// keep island awake
+			int islandIndex = island->localIndex;
+			b2SetBit( awakeIslandBitSet, islandIndex );
+		}
+		else if ( island->constraintRemoveCount > 0 )
+		{
+			// body wants to sleep but its island needs splitting first
+			if ( body->sleepTime > taskContext->splitSleepTime )
+			{
+				// pick the sleepiest candidate
+				taskContext->splitIslandId = body->islandId;
+				taskContext->splitSleepTime = body->sleepTime;
+			}
+		}
+
+		// Update shapes AABBs
+		b2Transform transform = sim->transform;
+		bool isFast = sim->isFast;
+		int shapeId = body->headShapeId;
+		while ( shapeId != B2_NULL_INDEX )
+		{
+			b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+			if ( isFast )
+			{
+				// For fast non-bullet bodies the AABB has already been updated in b2SolveContinuous
+				// For fast bullet bodies the AABB will be updated at a later stage
+
+				// Add to enlarged shapes regardless of AABB changes.
+				// Bit-set to keep the move array sorted
+				b2SetBit( enlargedSimBitSet, simIndex );
+			}
+			else
+			{
+				b2AABB aabb = b2ComputeShapeAABB( shape, transform );
+				aabb.lowerBound.x -= speculativeDistance;
+				aabb.lowerBound.y -= speculativeDistance;
+				aabb.upperBound.x += speculativeDistance;
+				aabb.upperBound.y += speculativeDistance;
+				shape->aabb = aabb;
+
+				B2_ASSERT( shape->enlargedAABB == false );
+
+				if ( b2AABB_Contains( shape->fatAABB, aabb ) == false )
+				{
+					b2AABB fatAABB;
+					fatAABB.lowerBound.x = aabb.lowerBound.x - aabbMargin;
+					fatAABB.lowerBound.y = aabb.lowerBound.y - aabbMargin;
+					fatAABB.upperBound.x = aabb.upperBound.x + aabbMargin;
+					fatAABB.upperBound.y = aabb.upperBound.y + aabbMargin;
+					shape->fatAABB = fatAABB;
+
+					shape->enlargedAABB = true;
+
+					// Bit-set to keep the move array sorted
+					b2SetBit( enlargedSimBitSet, simIndex );
+				}
+			}
+
+			shapeId = shape->nextShapeId;
+		}
+	}
+
+	b2TracyCZoneEnd( finalize_transfprms );
+}
+
+/*
+ typedef enum b2SolverStageType
+{
+	b2_stagePrepareJoints,
+	b2_stagePrepareContacts,
+	b2_stageIntegrateVelocities,
+	b2_stageWarmStart,
+	b2_stageSolve,
+	b2_stageIntegratePositions,
+	b2_stageRelax,
+	b2_stageRestitution,
+	b2_stageStoreImpulses
+} b2SolverStageType;
+
+typedef enum b2SolverBlockType
+{
+	b2_bodyBlock,
+	b2_jointBlock,
+	b2_contactBlock,
+	b2_graphJointBlock,
+	b2_graphContactBlock
+} b2SolverBlockType;
+*/
+
+static void b2ExecuteBlock( b2SolverStage* stage, b2StepContext* context, b2SolverBlock* block )
+{
+	b2SolverStageType stageType = stage->type;
+	b2SolverBlockType blockType = block->blockType;
+	int startIndex = block->startIndex;
+	int endIndex = startIndex + block->count;
+
+	switch ( stageType )
+	{
+		case b2_stagePrepareJoints:
+			b2PrepareJointsTask( startIndex, endIndex, context );
+			break;
+
+		case b2_stagePrepareContacts:
+			b2PrepareContactsTask( startIndex, endIndex, context );
+			break;
+
+		case b2_stageIntegrateVelocities:
+			b2IntegrateVelocitiesTask( startIndex, endIndex, context );
+			break;
+
+		case b2_stageWarmStart:
+			if ( context->world->enableWarmStarting )
+			{
+				if ( blockType == b2_graphContactBlock )
+				{
+					b2WarmStartContactsTask( startIndex, endIndex, context, stage->colorIndex );
+				}
+				else if ( blockType == b2_graphJointBlock )
+				{
+					b2WarmStartJointsTask( startIndex, endIndex, context, stage->colorIndex );
+				}
+			}
+			break;
+
+		case b2_stageSolve:
+			if ( blockType == b2_graphContactBlock )
+			{
+				b2SolveContactsTask( startIndex, endIndex, context, stage->colorIndex, true );
+			}
+			else if ( blockType == b2_graphJointBlock )
+			{
+				b2SolveJointsTask( startIndex, endIndex, context, stage->colorIndex, true );
+			}
+			break;
+
+		case b2_stageIntegratePositions:
+			b2IntegratePositionsTask( startIndex, endIndex, context );
+			break;
+
+		case b2_stageRelax:
+			if ( blockType == b2_graphContactBlock )
+			{
+				b2SolveContactsTask( startIndex, endIndex, context, stage->colorIndex, false );
+			}
+			else if ( blockType == b2_graphJointBlock )
+			{
+				b2SolveJointsTask( startIndex, endIndex, context, stage->colorIndex, false );
+			}
+			break;
+
+		case b2_stageRestitution:
+			if ( blockType == b2_graphContactBlock )
+			{
+				b2ApplyRestitutionTask( startIndex, endIndex, context, stage->colorIndex );
+			}
+			break;
+
+		case b2_stageStoreImpulses:
+			b2StoreImpulsesTask( startIndex, endIndex, context );
+			break;
+	}
+}
+
+static inline int GetWorkerStartIndex( int workerIndex, int blockCount, int workerCount )
+{
+	if ( blockCount <= workerCount )
+	{
+		return workerIndex < blockCount ? workerIndex : B2_NULL_INDEX;
+	}
+
+	int blocksPerWorker = blockCount / workerCount;
+	int remainder = blockCount - blocksPerWorker * workerCount;
+	return blocksPerWorker * workerIndex + b2MinInt( remainder, workerIndex );
+}
+
+static void b2ExecuteStage( b2SolverStage* stage, b2StepContext* context, int previousSyncIndex, int syncIndex, int workerIndex )
+{
+	int completedCount = 0;
+	b2SolverBlock* blocks = stage->blocks;
+	int blockCount = stage->blockCount;
+
+	int expectedSyncIndex = previousSyncIndex;
+
+	int startIndex = GetWorkerStartIndex( workerIndex, blockCount, context->workerCount );
+	if ( startIndex == B2_NULL_INDEX )
+	{
+		return;
+	}
+
+	B2_ASSERT( 0 <= startIndex && startIndex < blockCount );
+
+	int blockIndex = startIndex;
+
+	while ( b2AtomicCompareExchangeInt( &blocks[blockIndex].syncIndex, expectedSyncIndex, syncIndex ) == true )
+	{
+		B2_ASSERT( stage->type != b2_stagePrepareContacts || syncIndex < 2 );
+
+		B2_ASSERT( completedCount < blockCount );
+
+		b2ExecuteBlock( stage, context, blocks + blockIndex );
+
+		completedCount += 1;
+		blockIndex += 1;
+		if ( blockIndex >= blockCount )
+		{
+			// Keep looking for work
+			blockIndex = 0;
+		}
+
+		expectedSyncIndex = previousSyncIndex;
+	}
+
+	// Search backwards for blocks
+	blockIndex = startIndex - 1;
+	while ( true )
+	{
+		if ( blockIndex < 0 )
+		{
+			blockIndex = blockCount - 1;
+		}
+
+		expectedSyncIndex = previousSyncIndex;
+
+		if ( b2AtomicCompareExchangeInt( &blocks[blockIndex].syncIndex, expectedSyncIndex, syncIndex ) == false )
+		{
+			break;
+		}
+
+		b2ExecuteBlock( stage, context, blocks + blockIndex );
+		completedCount += 1;
+		blockIndex -= 1;
+	}
+
+	(void)b2AtomicFetchAddInt( &stage->completionCount, completedCount );
+}
+
+static void b2ExecuteMainStage( b2SolverStage* stage, b2StepContext* context, uint32_t syncBits )
+{
+	int blockCount = stage->blockCount;
+	if ( blockCount == 0 )
+	{
+		return;
+	}
+
+	if ( blockCount == 1 )
+	{
+		b2ExecuteBlock( stage, context, stage->blocks );
+	}
+	else
+	{
+		b2AtomicStoreU32( &context->atomicSyncBits, syncBits );
+
+		int syncIndex = ( syncBits >> 16 ) & 0xFFFF;
+		B2_ASSERT( syncIndex > 0 );
+		int previousSyncIndex = syncIndex - 1;
+
+		b2ExecuteStage( stage, context, previousSyncIndex, syncIndex, 0 );
+
+		// todo consider using the cycle counter as well
+		while ( b2AtomicLoadInt( &stage->completionCount ) != blockCount )
+		{
+			b2Pause();
+		}
+
+		b2AtomicStoreInt( &stage->completionCount, 0 );
+	}
+}
+
+// This should not use the thread index because thread 0 can be called twice by enkiTS.
+static void b2SolverTask( int startIndex, int endIndex, uint32_t threadIndexIgnore, void* taskContext )
+{
+	B2_UNUSED( startIndex, endIndex, threadIndexIgnore );
+
+	b2WorkerContext* workerContext = taskContext;
+	int workerIndex = workerContext->workerIndex;
+	b2StepContext* context = workerContext->context;
+	int activeColorCount = context->activeColorCount;
+	b2SolverStage* stages = context->stages;
+	b2Profile* profile = &context->world->profile;
+
+	if ( workerIndex == 0 )
+	{
+		// Main thread synchronizes the workers and does work itself.
+		//
+		// Stages are re-used by loops so that I don't need more stages for large iteration counts.
+		// The sync indices grow monotonically for the body/graph/constraint groupings because they share solver blocks.
+		// The stage index and sync indices are combined in to sync bits for atomic synchronization.
+		// The workers need to compute the previous sync index for a given stage so that CAS works correctly. This
+		// setup makes this easy to do.
+
+		/*
+		b2_stagePrepareJoints,
+		b2_stagePrepareContacts,
+		b2_stageIntegrateVelocities,
+		b2_stageWarmStart,
+		b2_stageSolve,
+		b2_stageIntegratePositions,
+		b2_stageRelax,
+		b2_stageRestitution,
+		b2_stageStoreImpulses
+		*/
+
+		uint64_t ticks = b2GetTicks();
+
+		int bodySyncIndex = 1;
+		int stageIndex = 0;
+
+		// This stage loops over all awake joints
+		uint32_t jointSyncIndex = 1;
+		uint32_t syncBits = ( jointSyncIndex << 16 ) | stageIndex;
+		B2_ASSERT( stages[stageIndex].type == b2_stagePrepareJoints );
+		b2ExecuteMainStage( stages + stageIndex, context, syncBits );
+		stageIndex += 1;
+		jointSyncIndex += 1;
+
+		// This stage loops over all contact constraints
+		uint32_t contactSyncIndex = 1;
+		syncBits = ( contactSyncIndex << 16 ) | stageIndex;
+		B2_ASSERT( stages[stageIndex].type == b2_stagePrepareContacts );
+		b2ExecuteMainStage( stages + stageIndex, context, syncBits );
+		stageIndex += 1;
+		contactSyncIndex += 1;
+
+		int graphSyncIndex = 1;
+
+		// Single-threaded overflow work. These constraints don't fit in the graph coloring.
+		b2PrepareOverflowJoints( context );
+		b2PrepareOverflowContacts( context );
+
+		profile->prepareConstraints += b2GetMillisecondsAndReset( &ticks );
+
+		int subStepCount = context->subStepCount;
+		for ( int i = 0; i < subStepCount; ++i )
+		{
+			// stage index restarted each iteration
+			// syncBits still increases monotonically because the upper bits increase each iteration
+			int iterStageIndex = stageIndex;
+
+			// integrate velocities
+			syncBits = ( bodySyncIndex << 16 ) | iterStageIndex;
+			B2_ASSERT( stages[iterStageIndex].type == b2_stageIntegrateVelocities );
+			b2ExecuteMainStage( stages + iterStageIndex, context, syncBits );
+			iterStageIndex += 1;
+			bodySyncIndex += 1;
+
+			profile->integrateVelocities += b2GetMillisecondsAndReset( &ticks );
+
+			// warm start constraints
+			b2WarmStartOverflowJoints( context );
+			b2WarmStartOverflowContacts( context );
+
+			for ( int colorIndex = 0; colorIndex < activeColorCount; ++colorIndex )
+			{
+				syncBits = ( graphSyncIndex << 16 ) | iterStageIndex;
+				B2_ASSERT( stages[iterStageIndex].type == b2_stageWarmStart );
+				b2ExecuteMainStage( stages + iterStageIndex, context, syncBits );
+				iterStageIndex += 1;
+			}
+			graphSyncIndex += 1;
+
+			profile->warmStart += b2GetMillisecondsAndReset( &ticks );
+
+			// solve constraints
+			bool useBias = true;
+			b2SolveOverflowJoints( context, useBias );
+			b2SolveOverflowContacts( context, useBias );
+
+			for ( int colorIndex = 0; colorIndex < activeColorCount; ++colorIndex )
+			{
+				syncBits = ( graphSyncIndex << 16 ) | iterStageIndex;
+				B2_ASSERT( stages[iterStageIndex].type == b2_stageSolve );
+				b2ExecuteMainStage( stages + iterStageIndex, context, syncBits );
+				iterStageIndex += 1;
+			}
+			graphSyncIndex += 1;
+
+			profile->solveImpulses += b2GetMillisecondsAndReset( &ticks );
+
+			// integrate positions
+			B2_ASSERT( stages[iterStageIndex].type == b2_stageIntegratePositions );
+			syncBits = ( bodySyncIndex << 16 ) | iterStageIndex;
+			b2ExecuteMainStage( stages + iterStageIndex, context, syncBits );
+			iterStageIndex += 1;
+			bodySyncIndex += 1;
+
+			profile->integratePositions += b2GetMillisecondsAndReset( &ticks );
+
+			// relax constraints
+			useBias = false;
+			b2SolveOverflowJoints( context, useBias );
+			b2SolveOverflowContacts( context, useBias );
+
+			for ( int colorIndex = 0; colorIndex < activeColorCount; ++colorIndex )
+			{
+				syncBits = ( graphSyncIndex << 16 ) | iterStageIndex;
+				B2_ASSERT( stages[iterStageIndex].type == b2_stageRelax );
+				b2ExecuteMainStage( stages + iterStageIndex, context, syncBits );
+				iterStageIndex += 1;
+			}
+			graphSyncIndex += 1;
+
+			profile->relaxImpulses += b2GetMillisecondsAndReset( &ticks );
+		}
+
+		// advance the stage according to the sub-stepping tasks just completed
+		// integrate velocities / warm start / solve / integrate positions / relax
+		stageIndex += 1 + activeColorCount + activeColorCount + 1 + activeColorCount;
+
+		// Restitution
+		{
+			b2ApplyOverflowRestitution( context );
+
+			int iterStageIndex = stageIndex;
+			for ( int colorIndex = 0; colorIndex < activeColorCount; ++colorIndex )
+			{
+				syncBits = ( graphSyncIndex << 16 ) | iterStageIndex;
+				B2_ASSERT( stages[iterStageIndex].type == b2_stageRestitution );
+				b2ExecuteMainStage( stages + iterStageIndex, context, syncBits );
+				iterStageIndex += 1;
+			}
+			// graphSyncIndex += 1;
+			stageIndex += activeColorCount;
+		}
+
+		profile->applyRestitution += b2GetMillisecondsAndReset( &ticks );
+
+		b2StoreOverflowImpulses( context );
+
+		syncBits = ( contactSyncIndex << 16 ) | stageIndex;
+		B2_ASSERT( stages[stageIndex].type == b2_stageStoreImpulses );
+		b2ExecuteMainStage( stages + stageIndex, context, syncBits );
+
+		profile->storeImpulses += b2GetMillisecondsAndReset( &ticks );
+
+		// Signal workers to finish
+		b2AtomicStoreU32( &context->atomicSyncBits, UINT_MAX );
+
+		B2_ASSERT( stageIndex + 1 == context->stageCount );
+		return;
+	}
+
+	// Worker spins and waits for work
+	uint32_t lastSyncBits = 0;
+	// uint64_t maxSpinTime = 10;
+	while ( true )
+	{
+		// Spin until main thread bumps changes the sync bits. This can waste significant time overall, but it is necessary for
+		// parallel simulation with graph coloring.
+		uint32_t syncBits;
+		int spinCount = 0;
+		while ( ( syncBits = b2AtomicLoadU32( &context->atomicSyncBits ) ) == lastSyncBits )
+		{
+			if ( spinCount > 5 )
+			{
+				b2Yield();
+				spinCount = 0;
+			}
+			else
+			{
+				// Using the cycle counter helps to account for variation in mm_pause timing across different
+				// CPUs. However, this is X64 only.
+				// uint64_t prev = __rdtsc();
+				// do
+				//{
+				//	b2Pause();
+				//}
+				// while ((__rdtsc() - prev) < maxSpinTime);
+				// maxSpinTime += 10;
+				b2Pause();
+				b2Pause();
+				spinCount += 1;
+			}
+		}
+
+		if ( syncBits == UINT_MAX )
+		{
+			// sentinel hit
+			break;
+		}
+
+		int stageIndex = syncBits & 0xFFFF;
+		B2_ASSERT( stageIndex < context->stageCount );
+
+		int syncIndex = ( syncBits >> 16 ) & 0xFFFF;
+		B2_ASSERT( syncIndex > 0 );
+
+		int previousSyncIndex = syncIndex - 1;
+
+		b2SolverStage* stage = stages + stageIndex;
+		b2ExecuteStage( stage, context, previousSyncIndex, syncIndex, workerIndex );
+
+		lastSyncBits = syncBits;
+	}
+}
+
+static void b2BulletBodyTask( int startIndex, int endIndex, uint32_t threadIndex, void* taskContext )
+{
+	B2_UNUSED( threadIndex );
+
+	b2TracyCZoneNC( bullet_body_task, "Bullet", b2_colorLightSkyBlue, true );
+
+	b2StepContext* stepContext = taskContext;
+
+	B2_ASSERT( startIndex <= endIndex );
+
+	for ( int i = startIndex; i < endIndex; ++i )
+	{
+		int simIndex = stepContext->bulletBodies[i];
+		b2SolveContinuous( stepContext->world, simIndex );
+	}
+
+	b2TracyCZoneEnd( bullet_body_task );
+}
+
+#if B2_SIMD_WIDTH == 8
+#define B2_SIMD_SHIFT 3
+#elif B2_SIMD_WIDTH == 4
+#define B2_SIMD_SHIFT 2
+#else
+#define B2_SIMD_SHIFT 0
+#endif
+
+// Solve with graph coloring
+void b2Solve( b2World* world, b2StepContext* stepContext )
+{
+	world->stepIndex += 1;
+
+	// Merge islands
+	{
+		b2TracyCZoneNC( merge, "Merge", b2_colorLightGoldenRodYellow, true );
+		uint64_t mergeTicks = b2GetTicks();
+
+		b2MergeAwakeIslands( world );
+
+		world->profile.mergeIslands = b2GetMilliseconds( mergeTicks );
+		b2TracyCZoneEnd( merge );
+	}
+
+	// Are there any awake bodies? This scenario should not be important for profiling.
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	int awakeBodyCount = awakeSet->bodySims.count;
+	if ( awakeBodyCount == 0 )
+	{
+		// Nothing to simulate, however the tree rebuild must be finished.
+		if ( world->userTreeTask != NULL )
+		{
+			world->finishTaskFcn( world->userTreeTask, world->userTaskContext );
+			world->userTreeTask = NULL;
+			world->activeTaskCount -= 1;
+		}
+
+		b2ValidateNoEnlarged( &world->broadPhase );
+		return;
+	}
+
+	// Solve constraints using graph coloring
+	{
+		// Prepare buffers for bullets
+		b2AtomicStoreInt(&stepContext->bulletBodyCount, 0);
+		stepContext->bulletBodies = b2AllocateArenaItem( &world->arena, awakeBodyCount * sizeof( int ), "bullet bodies" );
+
+		b2TracyCZoneNC( prepare_stages, "Prepare Stages", b2_colorDarkOrange, true );
+		uint64_t prepareTicks = b2GetTicks();
+
+		b2ConstraintGraph* graph = &world->constraintGraph;
+		b2GraphColor* colors = graph->colors;
+
+		stepContext->sims = awakeSet->bodySims.data;
+		stepContext->states = awakeSet->bodyStates.data;
+
+		// count contacts, joints, and colors
+		int awakeJointCount = 0;
+		int activeColorCount = 0;
+		for ( int i = 0; i < B2_GRAPH_COLOR_COUNT - 1; ++i )
+		{
+			int perColorContactCount = colors[i].contactSims.count;
+			int perColorJointCount = colors[i].jointSims.count;
+			int occupancyCount = perColorContactCount + perColorJointCount;
+			activeColorCount += occupancyCount > 0 ? 1 : 0;
+			awakeJointCount += perColorJointCount;
+		}
+
+		// prepare for move events
+		b2BodyMoveEventArray_Resize( &world->bodyMoveEvents, awakeBodyCount );
+
+		// Each worker receives at most M blocks of work. The workers may receive less blocks if there is not sufficient work.
+		// Each block of work has a minimum number of elements (block size). This in turn may limit the number of blocks.
+		// If there are many elements then the block size is increased so there are still at most M blocks of work per worker.
+		// M is a tunable number that has two goals:
+		// 1. keep M small to reduce overhead
+		// 2. keep M large enough for other workers to be able to steal work
+		// The block size is a power of two to make math efficient.
+
+		int workerCount = world->workerCount;
+		const int blocksPerWorker = 4;
+		const int maxBlockCount = blocksPerWorker * workerCount;
+
+		// Configure blocks for tasks that parallel-for bodies
+		int bodyBlockSize = 1 << 5;
+		int bodyBlockCount;
+		if ( awakeBodyCount > bodyBlockSize * maxBlockCount )
+		{
+			// Too many blocks, increase block size
+			bodyBlockSize = awakeBodyCount / maxBlockCount;
+			bodyBlockCount = maxBlockCount;
+		}
+		else
+		{
+			bodyBlockCount = ( ( awakeBodyCount - 1 ) >> 5 ) + 1;
+		}
+
+		// Configure blocks for tasks parallel-for each active graph color
+		// The blocks are a mix of SIMD contact blocks and joint blocks
+		int activeColorIndices[B2_GRAPH_COLOR_COUNT];
+
+		int colorContactCounts[B2_GRAPH_COLOR_COUNT];
+		int colorContactBlockSizes[B2_GRAPH_COLOR_COUNT];
+		int colorContactBlockCounts[B2_GRAPH_COLOR_COUNT];
+
+		int colorJointCounts[B2_GRAPH_COLOR_COUNT];
+		int colorJointBlockSizes[B2_GRAPH_COLOR_COUNT];
+		int colorJointBlockCounts[B2_GRAPH_COLOR_COUNT];
+
+		int graphBlockCount = 0;
+
+		// c is the active color index
+		int simdContactCount = 0;
+		int c = 0;
+		for ( int i = 0; i < B2_GRAPH_COLOR_COUNT - 1; ++i )
+		{
+			int colorContactCount = colors[i].contactSims.count;
+			int colorJointCount = colors[i].jointSims.count;
+
+			if ( colorContactCount + colorJointCount > 0 )
+			{
+				activeColorIndices[c] = i;
+
+				// 4/8-way SIMD
+				int colorContactCountSIMD = colorContactCount > 0 ? ( ( colorContactCount - 1 ) >> B2_SIMD_SHIFT ) + 1 : 0;
+
+				colorContactCounts[c] = colorContactCountSIMD;
+
+				// determine the number of contact work blocks for this color
+				if ( colorContactCountSIMD > blocksPerWorker * maxBlockCount )
+				{
+					// too many contact blocks
+					colorContactBlockSizes[c] = colorContactCountSIMD / maxBlockCount;
+					colorContactBlockCounts[c] = maxBlockCount;
+				}
+				else if ( colorContactCountSIMD > 0 )
+				{
+					// dividing by blocksPerWorker (4)
+					colorContactBlockSizes[c] = blocksPerWorker;
+					colorContactBlockCounts[c] = ( ( colorContactCountSIMD - 1 ) >> 2 ) + 1;
+				}
+				else
+				{
+					// no contacts in this color
+					colorContactBlockSizes[c] = 0;
+					colorContactBlockCounts[c] = 0;
+				}
+
+				colorJointCounts[c] = colorJointCount;
+
+				// determine number of joint work blocks for this color
+				if ( colorJointCount > blocksPerWorker * maxBlockCount )
+				{
+					// too many joint blocks
+					colorJointBlockSizes[c] = colorJointCount / maxBlockCount;
+					colorJointBlockCounts[c] = maxBlockCount;
+				}
+				else if ( colorJointCount > 0 )
+				{
+					// dividing by blocksPerWorker (4)
+					colorJointBlockSizes[c] = blocksPerWorker;
+					colorJointBlockCounts[c] = ( ( colorJointCount - 1 ) >> 2 ) + 1;
+				}
+				else
+				{
+					colorJointBlockSizes[c] = 0;
+					colorJointBlockCounts[c] = 0;
+				}
+
+				graphBlockCount += colorContactBlockCounts[c] + colorJointBlockCounts[c];
+				simdContactCount += colorContactCountSIMD;
+				c += 1;
+			}
+		}
+		activeColorCount = c;
+
+		// Gather contact pointers for easy parallel-for traversal. Some may be NULL due to SIMD remainders.
+		b2ContactSim** contacts = b2AllocateArenaItem(
+			&world->arena, B2_SIMD_WIDTH * simdContactCount * sizeof( b2ContactSim* ), "contact pointers" );
+
+		// Gather joint pointers for easy parallel-for traversal.
+		b2JointSim** joints =
+			b2AllocateArenaItem( &world->arena, awakeJointCount * sizeof( b2JointSim* ), "joint pointers" );
+
+		int simdConstraintSize = b2GetContactConstraintSIMDByteCount();
+		b2ContactConstraintSIMD* simdContactConstraints =
+			b2AllocateArenaItem( &world->arena, simdContactCount * simdConstraintSize, "contact constraint" );
+
+		int overflowContactCount = colors[B2_OVERFLOW_INDEX].contactSims.count;
+		b2ContactConstraint* overflowContactConstraints = b2AllocateArenaItem(
+			&world->arena, overflowContactCount * sizeof( b2ContactConstraint ), "overflow contact constraint" );
+
+		graph->colors[B2_OVERFLOW_INDEX].overflowConstraints = overflowContactConstraints;
+
+		// Distribute transient constraints to each graph color and build flat arrays of contact and joint pointers
+		{
+			int contactBase = 0;
+			int jointBase = 0;
+			for ( int i = 0; i < activeColorCount; ++i )
+			{
+				int j = activeColorIndices[i];
+				b2GraphColor* color = colors + j;
+
+				int colorContactCount = color->contactSims.count;
+
+				if ( colorContactCount == 0 )
+				{
+					color->simdConstraints = NULL;
+				}
+				else
+				{
+					color->simdConstraints =
+						(b2ContactConstraintSIMD*)( (uint8_t*)simdContactConstraints + contactBase * simdConstraintSize );
+
+					for ( int k = 0; k < colorContactCount; ++k )
+					{
+						contacts[B2_SIMD_WIDTH * contactBase + k] = color->contactSims.data + k;
+					}
+
+					// remainder
+					int colorContactCountSIMD = ( ( colorContactCount - 1 ) >> B2_SIMD_SHIFT ) + 1;
+					for ( int k = colorContactCount; k < B2_SIMD_WIDTH * colorContactCountSIMD; ++k )
+					{
+						contacts[B2_SIMD_WIDTH * contactBase + k] = NULL;
+					}
+
+					contactBase += colorContactCountSIMD;
+				}
+
+				int colorJointCount = color->jointSims.count;
+				for ( int k = 0; k < colorJointCount; ++k )
+				{
+					joints[jointBase + k] = color->jointSims.data + k;
+				}
+				jointBase += colorJointCount;
+			}
+
+			B2_ASSERT( contactBase == simdContactCount );
+			B2_ASSERT( jointBase == awakeJointCount );
+		}
+
+		// Define work blocks for preparing contacts and storing contact impulses
+		int contactBlockSize = blocksPerWorker;
+		int contactBlockCount = simdContactCount > 0 ? ( ( simdContactCount - 1 ) >> 2 ) + 1 : 0;
+		if ( simdContactCount > contactBlockSize * maxBlockCount )
+		{
+			// Too many blocks, increase block size
+			contactBlockSize = simdContactCount / maxBlockCount;
+			contactBlockCount = maxBlockCount;
+		}
+
+		// Define work blocks for preparing joints
+		int jointBlockSize = blocksPerWorker;
+		int jointBlockCount = awakeJointCount > 0 ? ( ( awakeJointCount - 1 ) >> 2 ) + 1 : 0;
+		if ( awakeJointCount > jointBlockSize * maxBlockCount )
+		{
+			// Too many blocks, increase block size
+			jointBlockSize = awakeJointCount / maxBlockCount;
+			jointBlockCount = maxBlockCount;
+		}
+
+		int stageCount = 0;
+
+		// b2_stagePrepareJoints
+		stageCount += 1;
+		// b2_stagePrepareContacts
+		stageCount += 1;
+		// b2_stageIntegrateVelocities
+		stageCount += 1;
+		// b2_stageWarmStart
+		stageCount += activeColorCount;
+		// b2_stageSolve
+		stageCount += activeColorCount;
+		// b2_stageIntegratePositions
+		stageCount += 1;
+		// b2_stageRelax
+		stageCount += activeColorCount;
+		// b2_stageRestitution
+		stageCount += activeColorCount;
+		// b2_stageStoreImpulses
+		stageCount += 1;
+
+		b2SolverStage* stages = b2AllocateArenaItem( &world->arena, stageCount * sizeof( b2SolverStage ), "stages" );
+		b2SolverBlock* bodyBlocks =
+			b2AllocateArenaItem( &world->arena, bodyBlockCount * sizeof( b2SolverBlock ), "body blocks" );
+		b2SolverBlock* contactBlocks =
+			b2AllocateArenaItem( &world->arena, contactBlockCount * sizeof( b2SolverBlock ), "contact blocks" );
+		b2SolverBlock* jointBlocks =
+			b2AllocateArenaItem( &world->arena, jointBlockCount * sizeof( b2SolverBlock ), "joint blocks" );
+		b2SolverBlock* graphBlocks =
+			b2AllocateArenaItem( &world->arena, graphBlockCount * sizeof( b2SolverBlock ), "graph blocks" );
+
+		// Split an awake island. This modifies:
+		// - stack allocator
+		// - world island array and solver set
+		// - island indices on bodies, contacts, and joints
+		// I'm squeezing this task in here because it may be expensive and this is a safe place to put it.
+		// Note: cannot split islands in parallel with FinalizeBodies
+		void* splitIslandTask = NULL;
+		if ( world->splitIslandId != B2_NULL_INDEX )
+		{
+			splitIslandTask = world->enqueueTaskFcn( &b2SplitIslandTask, 1, 1, world, world->userTaskContext );
+			world->taskCount += 1;
+			world->activeTaskCount += splitIslandTask == NULL ? 0 : 1;
+		}
+
+		// Prepare body work blocks
+		for ( int i = 0; i < bodyBlockCount; ++i )
+		{
+			b2SolverBlock* block = bodyBlocks + i;
+			block->startIndex = i * bodyBlockSize;
+			block->count = (int16_t)bodyBlockSize;
+			block->blockType = b2_bodyBlock;
+			b2AtomicStoreInt(&block->syncIndex, 0);
+		}
+		bodyBlocks[bodyBlockCount - 1].count = (int16_t)( awakeBodyCount - ( bodyBlockCount - 1 ) * bodyBlockSize );
+
+		// Prepare joint work blocks
+		for ( int i = 0; i < jointBlockCount; ++i )
+		{
+			b2SolverBlock* block = jointBlocks + i;
+			block->startIndex = i * jointBlockSize;
+			block->count = (int16_t)jointBlockSize;
+			block->blockType = b2_jointBlock;
+			b2AtomicStoreInt( &block->syncIndex, 0 );
+		}
+
+		if ( jointBlockCount > 0 )
+		{
+			jointBlocks[jointBlockCount - 1].count = (int16_t)( awakeJointCount - ( jointBlockCount - 1 ) * jointBlockSize );
+		}
+
+		// Prepare contact work blocks
+		for ( int i = 0; i < contactBlockCount; ++i )
+		{
+			b2SolverBlock* block = contactBlocks + i;
+			block->startIndex = i * contactBlockSize;
+			block->count = (int16_t)contactBlockSize;
+			block->blockType = b2_contactBlock;
+			b2AtomicStoreInt( &block->syncIndex, 0 );
+		}
+
+		if ( contactBlockCount > 0 )
+		{
+			contactBlocks[contactBlockCount - 1].count =
+				(int16_t)( simdContactCount - ( contactBlockCount - 1 ) * contactBlockSize );
+		}
+
+		// Prepare graph work blocks
+		b2SolverBlock* graphColorBlocks[B2_GRAPH_COLOR_COUNT];
+		b2SolverBlock* baseGraphBlock = graphBlocks;
+
+		for ( int i = 0; i < activeColorCount; ++i )
+		{
+			graphColorBlocks[i] = baseGraphBlock;
+
+			int colorJointBlockCount = colorJointBlockCounts[i];
+			int colorJointBlockSize = colorJointBlockSizes[i];
+			for ( int j = 0; j < colorJointBlockCount; ++j )
+			{
+				b2SolverBlock* block = baseGraphBlock + j;
+				block->startIndex = j * colorJointBlockSize;
+				block->count = (int16_t)colorJointBlockSize;
+				block->blockType = b2_graphJointBlock;
+				b2AtomicStoreInt( &block->syncIndex, 0 );
+			}
+
+			if ( colorJointBlockCount > 0 )
+			{
+				baseGraphBlock[colorJointBlockCount - 1].count =
+					(int16_t)( colorJointCounts[i] - ( colorJointBlockCount - 1 ) * colorJointBlockSize );
+				baseGraphBlock += colorJointBlockCount;
+			}
+
+			int colorContactBlockCount = colorContactBlockCounts[i];
+			int colorContactBlockSize = colorContactBlockSizes[i];
+			for ( int j = 0; j < colorContactBlockCount; ++j )
+			{
+				b2SolverBlock* block = baseGraphBlock + j;
+				block->startIndex = j * colorContactBlockSize;
+				block->count = (int16_t)colorContactBlockSize;
+				block->blockType = b2_graphContactBlock;
+				b2AtomicStoreInt( &block->syncIndex, 0 );
+			}
+
+			if ( colorContactBlockCount > 0 )
+			{
+				baseGraphBlock[colorContactBlockCount - 1].count =
+					(int16_t)( colorContactCounts[i] - ( colorContactBlockCount - 1 ) * colorContactBlockSize );
+				baseGraphBlock += colorContactBlockCount;
+			}
+		}
+
+		B2_ASSERT( (ptrdiff_t)(baseGraphBlock - graphBlocks) == graphBlockCount );
+
+		b2SolverStage* stage = stages;
+
+		// Prepare joints
+		stage->type = b2_stagePrepareJoints;
+		stage->blocks = jointBlocks;
+		stage->blockCount = jointBlockCount;
+		stage->colorIndex = -1;
+		b2AtomicStoreInt(&stage->completionCount, 0);
+		stage += 1;
+
+		// Prepare contacts
+		stage->type = b2_stagePrepareContacts;
+		stage->blocks = contactBlocks;
+		stage->blockCount = contactBlockCount;
+		stage->colorIndex = -1;
+		b2AtomicStoreInt( &stage->completionCount, 0 );
+		stage += 1;
+
+		// Integrate velocities
+		stage->type = b2_stageIntegrateVelocities;
+		stage->blocks = bodyBlocks;
+		stage->blockCount = bodyBlockCount;
+		stage->colorIndex = -1;
+		b2AtomicStoreInt( &stage->completionCount, 0 );
+		stage += 1;
+
+		// Warm start
+		for ( int i = 0; i < activeColorCount; ++i )
+		{
+			stage->type = b2_stageWarmStart;
+			stage->blocks = graphColorBlocks[i];
+			stage->blockCount = colorJointBlockCounts[i] + colorContactBlockCounts[i];
+			stage->colorIndex = activeColorIndices[i];
+			b2AtomicStoreInt( &stage->completionCount, 0 );
+			stage += 1;
+		}
+
+		// Solve graph
+		for ( int i = 0; i < activeColorCount; ++i )
+		{
+			stage->type = b2_stageSolve;
+			stage->blocks = graphColorBlocks[i];
+			stage->blockCount = colorJointBlockCounts[i] + colorContactBlockCounts[i];
+			stage->colorIndex = activeColorIndices[i];
+			b2AtomicStoreInt( &stage->completionCount, 0 );
+			stage += 1;
+		}
+
+		// Integrate positions
+		stage->type = b2_stageIntegratePositions;
+		stage->blocks = bodyBlocks;
+		stage->blockCount = bodyBlockCount;
+		stage->colorIndex = -1;
+		b2AtomicStoreInt( &stage->completionCount, 0 );
+		stage += 1;
+
+		// Relax constraints
+		for ( int i = 0; i < activeColorCount; ++i )
+		{
+			stage->type = b2_stageRelax;
+			stage->blocks = graphColorBlocks[i];
+			stage->blockCount = colorJointBlockCounts[i] + colorContactBlockCounts[i];
+			stage->colorIndex = activeColorIndices[i];
+			b2AtomicStoreInt( &stage->completionCount, 0 );
+			stage += 1;
+		}
+
+		// Restitution
+		// Note: joint blocks mixed in, could have joint limit restitution
+		for ( int i = 0; i < activeColorCount; ++i )
+		{
+			stage->type = b2_stageRestitution;
+			stage->blocks = graphColorBlocks[i];
+			stage->blockCount = colorJointBlockCounts[i] + colorContactBlockCounts[i];
+			stage->colorIndex = activeColorIndices[i];
+			b2AtomicStoreInt( &stage->completionCount, 0 );
+			stage += 1;
+		}
+
+		// Store impulses
+		stage->type = b2_stageStoreImpulses;
+		stage->blocks = contactBlocks;
+		stage->blockCount = contactBlockCount;
+		stage->colorIndex = -1;
+		b2AtomicStoreInt( &stage->completionCount, 0 );
+		stage += 1;
+
+		B2_ASSERT( (int)( stage - stages ) == stageCount );
+
+		B2_ASSERT( workerCount <= B2_MAX_WORKERS );
+		b2WorkerContext workerContext[B2_MAX_WORKERS];
+
+		stepContext->graph = graph;
+		stepContext->joints = joints;
+		stepContext->contacts = contacts;
+		stepContext->simdContactConstraints = simdContactConstraints;
+		stepContext->activeColorCount = activeColorCount;
+		stepContext->workerCount = workerCount;
+		stepContext->stageCount = stageCount;
+		stepContext->stages = stages;
+		b2AtomicStoreU32(&stepContext->atomicSyncBits, 0);
+
+		world->profile.prepareStages = b2GetMillisecondsAndReset( &prepareTicks );
+		b2TracyCZoneEnd( prepare_stages );
+
+		b2TracyCZoneNC( solve_constraints, "Solve Constraints", b2_colorIndigo, true );
+		uint64_t constraintTicks = b2GetTicks();
+
+		// Must use worker index because thread 0 can be assigned multiple tasks by enkiTS
+		for ( int i = 0; i < workerCount; ++i )
+		{
+			workerContext[i].context = stepContext;
+			workerContext[i].workerIndex = i;
+			workerContext[i].userTask = world->enqueueTaskFcn( b2SolverTask, 1, 1, workerContext + i, world->userTaskContext );
+			world->taskCount += 1;
+			world->activeTaskCount += workerContext[i].userTask == NULL ? 0 : 1;
+		}
+
+		// Finish island split
+		if ( splitIslandTask != NULL )
+		{
+			world->finishTaskFcn( splitIslandTask, world->userTaskContext );
+			world->activeTaskCount -= 1;
+		}
+		world->splitIslandId = B2_NULL_INDEX;
+
+		// Finish constraint solve
+		for ( int i = 0; i < workerCount; ++i )
+		{
+			if ( workerContext[i].userTask != NULL )
+			{
+				world->finishTaskFcn( workerContext[i].userTask, world->userTaskContext );
+				world->activeTaskCount -= 1;
+			}
+		}
+
+		world->profile.solveConstraints = b2GetMillisecondsAndReset( &constraintTicks );
+		b2TracyCZoneEnd( solve_constraints );
+
+		b2TracyCZoneNC( update_transforms, "Update Transforms", b2_colorMediumSeaGreen, true );
+		uint64_t transformTicks = b2GetTicks();
+
+		// Prepare contact, enlarged body, and island bit sets used in body finalization.
+		int awakeIslandCount = awakeSet->islandSims.count;
+		for ( int i = 0; i < world->workerCount; ++i )
+		{
+			b2TaskContext* taskContext = world->taskContexts.data + i;
+			b2SetBitCountAndClear( &taskContext->enlargedSimBitSet, awakeBodyCount );
+			b2SetBitCountAndClear( &taskContext->awakeIslandBitSet, awakeIslandCount );
+			taskContext->splitIslandId = B2_NULL_INDEX;
+			taskContext->splitSleepTime = 0.0f;
+		}
+
+		// Finalize bodies. Must happen after the constraint solver and after island splitting.
+		void* finalizeBodiesTask =
+			world->enqueueTaskFcn( b2FinalizeBodiesTask, awakeBodyCount, 64, stepContext, world->userTaskContext );
+		world->taskCount += 1;
+		if ( finalizeBodiesTask != NULL )
+		{
+			world->finishTaskFcn( finalizeBodiesTask, world->userTaskContext );
+		}
+
+		b2FreeArenaItem( &world->arena, graphBlocks );
+		b2FreeArenaItem( &world->arena, jointBlocks );
+		b2FreeArenaItem( &world->arena, contactBlocks );
+		b2FreeArenaItem( &world->arena, bodyBlocks );
+		b2FreeArenaItem( &world->arena, stages );
+		b2FreeArenaItem( &world->arena, overflowContactConstraints );
+		b2FreeArenaItem( &world->arena, simdContactConstraints );
+		b2FreeArenaItem( &world->arena, joints );
+		b2FreeArenaItem( &world->arena, contacts );
+
+		world->profile.transforms = b2GetMilliseconds( transformTicks );
+		b2TracyCZoneEnd( update_transforms );
+	}
+
+	// Report hit events
+	// todo_erin perhaps optimize this with a bitset
+	// todo_erin perhaps do this in parallel with other work below
+	{
+		b2TracyCZoneNC( hit_events, "Hit Events", b2_colorRosyBrown, true );
+		uint64_t hitTicks = b2GetTicks();
+
+		B2_ASSERT( world->contactHitEvents.count == 0 );
+
+		float threshold = world->hitEventThreshold;
+		b2GraphColor* colors = world->constraintGraph.colors;
+		for ( int i = 0; i < B2_GRAPH_COLOR_COUNT; ++i )
+		{
+			b2GraphColor* color = colors + i;
+			int contactCount = color->contactSims.count;
+			b2ContactSim* contactSims = color->contactSims.data;
+			for ( int j = 0; j < contactCount; ++j )
+			{
+				b2ContactSim* contactSim = contactSims + j;
+				if ( ( contactSim->simFlags & b2_simEnableHitEvent ) == 0 )
+				{
+					continue;
+				}
+
+				b2ContactHitEvent event = { 0 };
+				event.approachSpeed = threshold;
+
+				bool hit = false;
+				int pointCount = contactSim->manifold.pointCount;
+				for ( int k = 0; k < pointCount; ++k )
+				{
+					b2ManifoldPoint* mp = contactSim->manifold.points + k;
+					float approachSpeed = -mp->normalVelocity;
+
+					// Need to check total impulse because the point may be speculative and not colliding
+					if ( approachSpeed > event.approachSpeed && mp->totalNormalImpulse > 0.0f )
+					{
+						event.approachSpeed = approachSpeed;
+						event.point = mp->point;
+						hit = true;
+					}
+				}
+
+				if ( hit == true )
+				{
+					event.normal = contactSim->manifold.normal;
+
+					b2Shape* shapeA = b2ShapeArray_Get( &world->shapes, contactSim->shapeIdA );
+					b2Shape* shapeB = b2ShapeArray_Get( &world->shapes, contactSim->shapeIdB );
+
+					event.shapeIdA = ( b2ShapeId ){ shapeA->id + 1, world->worldId, shapeA->generation };
+					event.shapeIdB = ( b2ShapeId ){ shapeB->id + 1, world->worldId, shapeB->generation };
+
+					b2ContactHitEventArray_Push( &world->contactHitEvents, event );
+				}
+			}
+		}
+
+		world->profile.hitEvents = b2GetMilliseconds( hitTicks );
+		b2TracyCZoneEnd( hit_events );
+	}
+
+	{
+		b2TracyCZoneNC( refit_bvh, "Refit BVH", b2_colorFireBrick, true );
+		uint64_t refitTicks = b2GetTicks();
+
+		// Finish the user tree task that was queued earlier in the time step. This must be complete before touching the
+		// broad-phase.
+		if ( world->userTreeTask != NULL )
+		{
+			world->finishTaskFcn( world->userTreeTask, world->userTaskContext );
+			world->userTreeTask = NULL;
+			world->activeTaskCount -= 1;
+		}
+
+		b2ValidateNoEnlarged( &world->broadPhase );
+
+		// Gather bits for all sim bodies that have enlarged AABBs
+		b2BitSet* enlargedBodyBitSet = &world->taskContexts.data[0].enlargedSimBitSet;
+		for ( int i = 1; i < world->workerCount; ++i )
+		{
+			b2InPlaceUnion( enlargedBodyBitSet, &world->taskContexts.data[i].enlargedSimBitSet );
+		}
+
+		// Enlarge broad-phase proxies and build move array
+		// Apply shape AABB changes to broad-phase. This also create the move array which must be
+		// in deterministic order. I'm tracking sim bodies because the number of shape ids can be huge.
+		// This has to happen before bullets are processed.
+		{
+			b2BroadPhase* broadPhase = &world->broadPhase;
+			uint32_t wordCount = enlargedBodyBitSet->blockCount;
+			uint64_t* bits = enlargedBodyBitSet->bits;
+
+			// Fast array access is important here
+			b2Body* bodyArray = world->bodies.data;
+			b2BodySim* bodySimArray = awakeSet->bodySims.data;
+			b2Shape* shapeArray = world->shapes.data;
+
+			for ( uint32_t k = 0; k < wordCount; ++k )
+			{
+				uint64_t word = bits[k];
+				while ( word != 0 )
+				{
+					uint32_t ctz = b2CTZ64( word );
+					uint32_t bodySimIndex = 64 * k + ctz;
+
+					b2BodySim* bodySim = bodySimArray + bodySimIndex;
+
+					b2Body* body = bodyArray + bodySim->bodyId;
+
+					int shapeId = body->headShapeId;
+					if ( bodySim->isBullet && bodySim->isFast )
+					{
+						// Fast bullet bodies don't have their final AABB yet
+						while ( shapeId != B2_NULL_INDEX )
+						{
+							b2Shape* shape = shapeArray + shapeId;
+
+							// Shape is fast. It's aabb will be enlarged in continuous collision.
+							// Update the move array here for determinism because bullets are processed
+							// below in non-deterministic order.
+							b2BufferMove( broadPhase, shape->proxyKey );
+
+							shapeId = shape->nextShapeId;
+						}
+					}
+					else
+					{
+						while ( shapeId != B2_NULL_INDEX )
+						{
+							b2Shape* shape = shapeArray + shapeId;
+
+							// The AABB may not have been enlarged, despite the body being flagged as enlarged.
+							// For example, a body with multiple shapes may have not have all shapes enlarged.
+							// A fast body may have been flagged as enlarged despite having no shapes enlarged.
+							if ( shape->enlargedAABB )
+							{
+								b2BroadPhase_EnlargeProxy( broadPhase, shape->proxyKey, shape->fatAABB );
+								shape->enlargedAABB = false;
+							}
+
+							shapeId = shape->nextShapeId;
+						}
+					}
+
+					// Clear the smallest set bit
+					word = word & ( word - 1 );
+				}
+			}
+		}
+
+		b2ValidateBroadphase( &world->broadPhase );
+
+		world->profile.refit = b2GetMilliseconds( refitTicks );
+		b2TracyCZoneEnd( refit_bvh );
+	}
+
+	int bulletBodyCount = b2AtomicLoadInt( &stepContext->bulletBodyCount );
+	if ( bulletBodyCount > 0 )
+	{
+		b2TracyCZoneNC( bullets, "Bullets", b2_colorLightYellow, true );
+		uint64_t bulletTicks = b2GetTicks();
+
+		// Fast bullet bodies
+		// Note: a bullet body may be moving slow
+		int minRange = 8;
+		void* userBulletBodyTask = world->enqueueTaskFcn( &b2BulletBodyTask, bulletBodyCount, minRange, stepContext,
+														  world->userTaskContext );
+		world->taskCount += 1;
+		if ( userBulletBodyTask != NULL )
+		{
+			world->finishTaskFcn( userBulletBodyTask, world->userTaskContext );
+		}
+
+		// Serially enlarge broad-phase proxies for bullet shapes
+		b2BroadPhase* broadPhase = &world->broadPhase;
+		b2DynamicTree* dynamicTree = broadPhase->trees + b2_dynamicBody;
+
+		// Fast array access is important here
+		b2Body* bodyArray = world->bodies.data;
+		b2BodySim* bodySimArray = awakeSet->bodySims.data;
+		b2Shape* shapeArray = world->shapes.data;
+
+		// Serially enlarge broad-phase proxies for bullet shapes
+		int* bulletBodySimIndices = stepContext->bulletBodies;
+
+		// This loop has non-deterministic order but it shouldn't affect the result
+		for ( int i = 0; i < bulletBodyCount; ++i )
+		{
+			b2BodySim* bulletBodySim = bodySimArray + bulletBodySimIndices[i];
+			if ( bulletBodySim->enlargeAABB == false )
+			{
+				continue;
+			}
+
+			// clear flag
+			bulletBodySim->enlargeAABB = false;
+
+			int bodyId = bulletBodySim->bodyId;
+			B2_ASSERT( 0 <= bodyId && bodyId < world->bodies.count );
+			b2Body* bulletBody = bodyArray + bodyId;
+
+			int shapeId = bulletBody->headShapeId;
+			while ( shapeId != B2_NULL_INDEX )
+			{
+				b2Shape* shape = shapeArray + shapeId;
+				if ( shape->enlargedAABB == false )
+				{
+					shapeId = shape->nextShapeId;
+					continue;
+				}
+
+				// clear flag
+				shape->enlargedAABB = false;
+
+				int proxyKey = shape->proxyKey;
+				int proxyId = B2_PROXY_ID( proxyKey );
+				B2_ASSERT( B2_PROXY_TYPE( proxyKey ) == b2_dynamicBody );
+
+				// all fast bullet shapes should already be in the move buffer
+				B2_ASSERT( b2ContainsKey( &broadPhase->moveSet, proxyKey + 1 ) );
+
+				b2DynamicTree_EnlargeProxy( dynamicTree, proxyId, shape->fatAABB );
+
+				shapeId = shape->nextShapeId;
+			}
+		}
+
+		world->profile.bullets = b2GetMilliseconds( bulletTicks );
+		b2TracyCZoneEnd( bullets );
+	}
+
+	// Need to free this even if no bullets got processed.
+	b2FreeArenaItem( &world->arena, stepContext->bulletBodies );
+	stepContext->bulletBodies = NULL;
+	b2AtomicStoreInt(&stepContext->bulletBodyCount, 0);
+
+	// Island sleeping
+	// This must be done last because putting islands to sleep invalidates the enlarged body bits.
+	// todo_erin figure out how to do this in parallel with tree refit
+	if ( world->enableSleep == true )
+	{
+		b2TracyCZoneNC( sleep_islands, "Island Sleep", b2_colorLightSlateGray, true );
+		uint64_t sleepTicks = b2GetTicks();
+
+		// Collect split island candidate for the next time step. No need to split if sleeping is disabled.
+		B2_ASSERT( world->splitIslandId == B2_NULL_INDEX );
+		float splitSleepTimer = 0.0f;
+		for ( int i = 0; i < world->workerCount; ++i )
+		{
+			b2TaskContext* taskContext = world->taskContexts.data + i;
+			if ( taskContext->splitIslandId != B2_NULL_INDEX && taskContext->splitSleepTime >= splitSleepTimer )
+			{
+				B2_ASSERT( taskContext->splitSleepTime > 0.0f );
+
+				// Tie breaking for determinism. Largest island id wins. Needed due to work stealing.
+				if ( taskContext->splitSleepTime == splitSleepTimer && taskContext->splitIslandId < world->splitIslandId )
+				{
+					continue;
+				}
+
+				world->splitIslandId = taskContext->splitIslandId;
+				splitSleepTimer = taskContext->splitSleepTime;
+			}
+		}
+
+		b2BitSet* awakeIslandBitSet = &world->taskContexts.data[0].awakeIslandBitSet;
+		for ( int i = 1; i < world->workerCount; ++i )
+		{
+			b2InPlaceUnion( awakeIslandBitSet, &world->taskContexts.data[i].awakeIslandBitSet );
+		}
+
+		// Need to process in reverse because this moves islands to sleeping solver sets.
+		b2IslandSim* islands = awakeSet->islandSims.data;
+		int count = awakeSet->islandSims.count;
+		for ( int islandIndex = count - 1; islandIndex >= 0; islandIndex -= 1 )
+		{
+			if ( b2GetBit( awakeIslandBitSet, islandIndex ) == true )
+			{
+				// this island is still awake
+				continue;
+			}
+
+			b2IslandSim* island = islands + islandIndex;
+			int islandId = island->islandId;
+
+			b2TrySleepIsland( world, islandId );
+		}
+
+		b2ValidateSolverSets( world );
+
+		world->profile.sleepIslands = b2GetMilliseconds( sleepTicks );
+		b2TracyCZoneEnd( sleep_islands );
+	}
+}
diff --git a/src/vendor/box2d/solver.h b/src/vendor/box2d/solver.h
new file mode 100644
index 0000000..2a2d4a1
--- /dev/null
+++ b/src/vendor/box2d/solver.h
@@ -0,0 +1,155 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "box2d/math_functions.h"
+
+#include "core.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef struct b2BodySim b2BodySim;
+typedef struct b2BodyState b2BodyState;
+typedef struct b2ContactSim b2ContactSim;
+typedef struct b2JointSim b2JointSim;
+typedef struct b2World b2World;
+
+typedef struct b2Softness
+{
+	float biasRate;
+	float massScale;
+	float impulseScale;
+} b2Softness;
+
+typedef enum b2SolverStageType
+{
+	b2_stagePrepareJoints,
+	b2_stagePrepareContacts,
+	b2_stageIntegrateVelocities,
+	b2_stageWarmStart,
+	b2_stageSolve,
+	b2_stageIntegratePositions,
+	b2_stageRelax,
+	b2_stageRestitution,
+	b2_stageStoreImpulses
+} b2SolverStageType;
+
+typedef enum b2SolverBlockType
+{
+	b2_bodyBlock,
+	b2_jointBlock,
+	b2_contactBlock,
+	b2_graphJointBlock,
+	b2_graphContactBlock
+} b2SolverBlockType;
+
+// Each block of work has a sync index that gets incremented when a worker claims the block. This ensures only a single worker
+// claims a block, yet lets work be distributed dynamically across multiple workers (work stealing). This also reduces contention
+// on a single block index atomic. For non-iterative stages the sync index is simply set to one. For iterative stages (solver
+// iteration) the same block of work is executed once per iteration and the atomic sync index is shared across iterations, so it
+// increases monotonically.
+typedef struct b2SolverBlock
+{
+	int startIndex;
+	int16_t count;
+	int16_t blockType; // b2SolverBlockType
+	// todo consider false sharing of this atomic
+	b2AtomicInt syncIndex;
+} b2SolverBlock;
+
+// Each stage must be completed before going to the next stage.
+// Non-iterative stages use a stage instance once while iterative stages re-use the same instance each iteration.
+typedef struct b2SolverStage
+{
+	b2SolverStageType type;
+	b2SolverBlock* blocks;
+	int blockCount;
+	int colorIndex;
+	// todo consider false sharing of this atomic
+	b2AtomicInt completionCount;
+} b2SolverStage;
+
+// Context for a time step. Recreated each time step.
+typedef struct b2StepContext
+{
+	// time step
+	float dt;
+
+	// inverse time step (0 if dt == 0).
+	float inv_dt;
+
+	// sub-step
+	float h;
+	float inv_h;
+
+	int subStepCount;
+
+	b2Softness jointSoftness;
+	b2Softness contactSoftness;
+	b2Softness staticSoftness;
+
+	float restitutionThreshold;
+	float maxLinearVelocity;
+
+	struct b2World* world;
+	struct b2ConstraintGraph* graph;
+
+	// shortcut to body states from awake set
+	b2BodyState* states;
+
+	// shortcut to body sims from awake set
+	b2BodySim* sims;
+
+	// array of all shape ids for shapes that have enlarged AABBs
+	int* enlargedShapes;
+	int enlargedShapeCount;
+
+	// Array of bullet bodies that need continuous collision handling
+	int* bulletBodies;
+	b2AtomicInt bulletBodyCount;
+
+	// joint pointers for simplified parallel-for access.
+	b2JointSim** joints;
+
+	// contact pointers for simplified parallel-for access.
+	// - parallel-for collide with no gaps
+	// - parallel-for prepare and store contacts with NULL gaps for SIMD remainders
+	// despite being an array of pointers, these are contiguous sub-arrays corresponding
+	// to constraint graph colors
+	b2ContactSim** contacts;
+
+	struct b2ContactConstraintSIMD* simdContactConstraints;
+	int activeColorCount;
+	int workerCount;
+
+	b2SolverStage* stages;
+	int stageCount;
+	bool enableWarmStarting;
+
+	// todo padding to prevent false sharing
+	char dummy1[64];
+
+	// sync index (16-bits) | stage type (16-bits)
+	b2AtomicU32 atomicSyncBits;
+
+	char dummy2[64];
+
+} b2StepContext;
+
+static inline b2Softness b2MakeSoft( float hertz, float zeta, float h )
+{
+	if ( hertz == 0.0f )
+	{
+		return ( b2Softness ){ 0.0f, 1.0f, 0.0f };
+	}
+
+	float omega = 2.0f * B2_PI * hertz;
+	float a1 = 2.0f * zeta + h * omega;
+	float a2 = h * omega * a1;
+	float a3 = 1.0f / ( 1.0f + a2 );
+	return ( b2Softness ){ omega / a1, a2 * a3, a3 };
+}
+
+void b2Solve( b2World* world, b2StepContext* stepContext );
diff --git a/src/vendor/box2d/solver_set.c b/src/vendor/box2d/solver_set.c
new file mode 100644
index 0000000..cbc7553
--- /dev/null
+++ b/src/vendor/box2d/solver_set.c
@@ -0,0 +1,613 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "solver_set.h"
+
+#include "body.h"
+#include "constraint_graph.h"
+#include "contact.h"
+#include "core.h"
+#include "island.h"
+#include "joint.h"
+#include "world.h"
+
+#include <string.h>
+
+B2_ARRAY_SOURCE( b2SolverSet, b2SolverSet )
+
+void b2DestroySolverSet( b2World* world, int setIndex )
+{
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+	b2BodySimArray_Destroy( &set->bodySims );
+	b2BodyStateArray_Destroy( &set->bodyStates );
+	b2ContactSimArray_Destroy( &set->contactSims );
+	b2JointSimArray_Destroy( &set->jointSims );
+	b2IslandSimArray_Destroy( &set->islandSims );
+	b2FreeId( &world->solverSetIdPool, setIndex );
+	*set = ( b2SolverSet ){ 0 };
+	set->setIndex = B2_NULL_INDEX;
+}
+
+// Wake a solver set. Does not merge islands.
+// Contacts can be in several places:
+// 1. non-touching contacts in the disabled set
+// 2. non-touching contacts already in the awake set
+// 3. touching contacts in the sleeping set
+// This handles contact types 1 and 3. Type 2 doesn't need any action.
+void b2WakeSolverSet( b2World* world, int setIndex )
+{
+	B2_ASSERT( setIndex >= b2_firstSleepingSet );
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	b2SolverSet* disabledSet = b2SolverSetArray_Get( &world->solverSets, b2_disabledSet );
+
+	b2Body* bodies = world->bodies.data;
+
+	int bodyCount = set->bodySims.count;
+	for ( int i = 0; i < bodyCount; ++i )
+	{
+		b2BodySim* simSrc = set->bodySims.data + i;
+
+		b2Body* body = bodies + simSrc->bodyId;
+		B2_ASSERT( body->setIndex == setIndex );
+		body->setIndex = b2_awakeSet;
+		body->localIndex = awakeSet->bodySims.count;
+
+		// Reset sleep timer
+		body->sleepTime = 0.0f;
+
+		b2BodySim* simDst = b2BodySimArray_Add( &awakeSet->bodySims );
+		memcpy( simDst, simSrc, sizeof( b2BodySim ) );
+
+		b2BodyState* state = b2BodyStateArray_Add( &awakeSet->bodyStates );
+		*state = b2_identityBodyState;
+
+		// move non-touching contacts from disabled set to awake set
+		int contactKey = body->headContactKey;
+		while ( contactKey != B2_NULL_INDEX )
+		{
+			int edgeIndex = contactKey & 1;
+			int contactId = contactKey >> 1;
+
+			b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+
+			contactKey = contact->edges[edgeIndex].nextKey;
+
+			if ( contact->setIndex != b2_disabledSet )
+			{
+				B2_ASSERT( contact->setIndex == b2_awakeSet || contact->setIndex == setIndex );
+				continue;
+			}
+
+			int localIndex = contact->localIndex;
+			b2ContactSim* contactSim = b2ContactSimArray_Get( &disabledSet->contactSims, localIndex );
+
+			B2_ASSERT( ( contact->flags & b2_contactTouchingFlag ) == 0 && contactSim->manifold.pointCount == 0 );
+
+			contact->setIndex = b2_awakeSet;
+			contact->localIndex = awakeSet->contactSims.count;
+			b2ContactSim* awakeContactSim = b2ContactSimArray_Add( &awakeSet->contactSims );
+			memcpy( awakeContactSim, contactSim, sizeof( b2ContactSim ) );
+
+			int movedLocalIndex = b2ContactSimArray_RemoveSwap( &disabledSet->contactSims, localIndex );
+			if ( movedLocalIndex != B2_NULL_INDEX )
+			{
+				// fix moved element
+				b2ContactSim* movedContactSim = disabledSet->contactSims.data + localIndex;
+				b2Contact* movedContact = b2ContactArray_Get( &world->contacts, movedContactSim->contactId );
+				B2_ASSERT( movedContact->localIndex == movedLocalIndex );
+				movedContact->localIndex = localIndex;
+			}
+		}
+	}
+
+	// transfer touching contacts from sleeping set to contact graph
+	{
+		int contactCount = set->contactSims.count;
+		for ( int i = 0; i < contactCount; ++i )
+		{
+			b2ContactSim* contactSim = set->contactSims.data + i;
+			b2Contact* contact = b2ContactArray_Get( &world->contacts, contactSim->contactId );
+			B2_ASSERT( contact->flags & b2_contactTouchingFlag );
+			B2_ASSERT( contactSim->simFlags & b2_simTouchingFlag );
+			B2_ASSERT( contactSim->manifold.pointCount > 0 );
+			B2_ASSERT( contact->setIndex == setIndex );
+			b2AddContactToGraph( world, contactSim, contact );
+			contact->setIndex = b2_awakeSet;
+		}
+	}
+
+	// transfer joints from sleeping set to awake set
+	{
+		int jointCount = set->jointSims.count;
+		for ( int i = 0; i < jointCount; ++i )
+		{
+			b2JointSim* jointSim = set->jointSims.data + i;
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointSim->jointId );
+			B2_ASSERT( joint->setIndex == setIndex );
+			b2AddJointToGraph( world, jointSim, joint );
+			joint->setIndex = b2_awakeSet;
+		}
+	}
+
+	// transfer island from sleeping set to awake set
+	// Usually a sleeping set has only one island, but it is possible
+	// that joints are created between sleeping islands and they
+	// are moved to the same sleeping set.
+	{
+		int islandCount = set->islandSims.count;
+		for ( int i = 0; i < islandCount; ++i )
+		{
+			b2IslandSim* islandSrc = set->islandSims.data + i;
+			b2Island* island = b2IslandArray_Get( &world->islands, islandSrc->islandId );
+			island->setIndex = b2_awakeSet;
+			island->localIndex = awakeSet->islandSims.count;
+			b2IslandSim* islandDst = b2IslandSimArray_Add( &awakeSet->islandSims );
+			memcpy( islandDst, islandSrc, sizeof( b2IslandSim ) );
+		}
+	}
+
+	// destroy the sleeping set
+	b2DestroySolverSet( world, setIndex );
+
+	b2ValidateSolverSets( world );
+}
+
+void b2TrySleepIsland( b2World* world, int islandId )
+{
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+	B2_ASSERT( island->setIndex == b2_awakeSet );
+
+	// cannot put an island to sleep while it has a pending split
+	if ( island->constraintRemoveCount > 0 )
+	{
+		return;
+	}
+
+	// island is sleeping
+	// - create new sleeping solver set
+	// - move island to sleeping solver set
+	// - identify non-touching contacts that should move to sleeping solver set or disabled set
+	// - remove old island
+	// - fix island
+	int sleepSetId = b2AllocId( &world->solverSetIdPool );
+	if ( sleepSetId == world->solverSets.count )
+	{
+		b2SolverSet set = { 0 };
+		set.setIndex = B2_NULL_INDEX;
+		b2SolverSetArray_Push( &world->solverSets, set );
+	}
+
+	b2SolverSet* sleepSet = b2SolverSetArray_Get( &world->solverSets, sleepSetId );
+	*sleepSet = ( b2SolverSet ){ 0 };
+
+	// grab awake set after creating the sleep set because the solver set array may have been resized
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	B2_ASSERT( 0 <= island->localIndex && island->localIndex < awakeSet->islandSims.count );
+
+	sleepSet->setIndex = sleepSetId;
+	sleepSet->bodySims = b2BodySimArray_Create( island->bodyCount );
+	sleepSet->contactSims = b2ContactSimArray_Create( island->contactCount );
+	sleepSet->jointSims = b2JointSimArray_Create( island->jointCount );
+
+	// move awake bodies to sleeping set
+	// this shuffles around bodies in the awake set
+	{
+		b2SolverSet* disabledSet = b2SolverSetArray_Get( &world->solverSets, b2_disabledSet );
+		int bodyId = island->headBody;
+		while ( bodyId != B2_NULL_INDEX )
+		{
+			b2Body* body = b2BodyArray_Get( &world->bodies, bodyId );
+			B2_ASSERT( body->setIndex == b2_awakeSet );
+			B2_ASSERT( body->islandId == islandId );
+
+			// Update the body move event to indicate this body fell asleep
+			// It could happen the body is forced asleep before it ever moves.
+			if ( body->bodyMoveIndex != B2_NULL_INDEX )
+			{
+				b2BodyMoveEvent* moveEvent = b2BodyMoveEventArray_Get( &world->bodyMoveEvents, body->bodyMoveIndex );
+				B2_ASSERT( moveEvent->bodyId.index1 - 1 == bodyId );
+				B2_ASSERT( moveEvent->bodyId.generation == body->generation );
+				moveEvent->fellAsleep = true;
+				body->bodyMoveIndex = B2_NULL_INDEX;
+			}
+
+			int awakeBodyIndex = body->localIndex;
+			b2BodySim* awakeSim = b2BodySimArray_Get( &awakeSet->bodySims, awakeBodyIndex );
+
+			// move body sim to sleep set
+			int sleepBodyIndex = sleepSet->bodySims.count;
+			b2BodySim* sleepBodySim = b2BodySimArray_Add( &sleepSet->bodySims );
+			memcpy( sleepBodySim, awakeSim, sizeof( b2BodySim ) );
+
+			int movedIndex = b2BodySimArray_RemoveSwap( &awakeSet->bodySims, awakeBodyIndex );
+			if ( movedIndex != B2_NULL_INDEX )
+			{
+				// fix local index on moved element
+				b2BodySim* movedSim = awakeSet->bodySims.data + awakeBodyIndex;
+				int movedId = movedSim->bodyId;
+				b2Body* movedBody = b2BodyArray_Get( &world->bodies, movedId );
+				B2_ASSERT( movedBody->localIndex == movedIndex );
+				movedBody->localIndex = awakeBodyIndex;
+			}
+
+			// destroy state, no need to clone
+			b2BodyStateArray_RemoveSwap( &awakeSet->bodyStates, awakeBodyIndex );
+
+			body->setIndex = sleepSetId;
+			body->localIndex = sleepBodyIndex;
+
+			// Move non-touching contacts to the disabled set.
+			// Non-touching contacts may exist between sleeping islands and there is no clear ownership.
+			int contactKey = body->headContactKey;
+			while ( contactKey != B2_NULL_INDEX )
+			{
+				int contactId = contactKey >> 1;
+				int edgeIndex = contactKey & 1;
+
+				b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+
+				B2_ASSERT( contact->setIndex == b2_awakeSet || contact->setIndex == b2_disabledSet );
+				contactKey = contact->edges[edgeIndex].nextKey;
+
+				if ( contact->setIndex == b2_disabledSet )
+				{
+					// already moved to disabled set by another body in the island
+					continue;
+				}
+
+				if ( contact->colorIndex != B2_NULL_INDEX )
+				{
+					// contact is touching and will be moved separately
+					B2_ASSERT( ( contact->flags & b2_contactTouchingFlag ) != 0 );
+					continue;
+				}
+
+				// the other body may still be awake, it still may go to sleep and then it will be responsible
+				// for moving this contact to the disabled set.
+				int otherEdgeIndex = edgeIndex ^ 1;
+				int otherBodyId = contact->edges[otherEdgeIndex].bodyId;
+				b2Body* otherBody = b2BodyArray_Get( &world->bodies, otherBodyId );
+				if ( otherBody->setIndex == b2_awakeSet )
+				{
+					continue;
+				}
+
+				int localIndex = contact->localIndex;
+				b2ContactSim* contactSim = b2ContactSimArray_Get( &awakeSet->contactSims, localIndex );
+
+				B2_ASSERT( contactSim->manifold.pointCount == 0 );
+				B2_ASSERT( ( contact->flags & b2_contactTouchingFlag ) == 0 );
+
+				// move the non-touching contact to the disabled set
+				contact->setIndex = b2_disabledSet;
+				contact->localIndex = disabledSet->contactSims.count;
+				b2ContactSim* disabledContactSim = b2ContactSimArray_Add( &disabledSet->contactSims );
+				memcpy( disabledContactSim, contactSim, sizeof( b2ContactSim ) );
+
+				int movedLocalIndex = b2ContactSimArray_RemoveSwap( &awakeSet->contactSims, localIndex );
+				if ( movedLocalIndex != B2_NULL_INDEX )
+				{
+					// fix moved element
+					b2ContactSim* movedContactSim = awakeSet->contactSims.data + localIndex;
+					b2Contact* movedContact = b2ContactArray_Get( &world->contacts, movedContactSim->contactId );
+					B2_ASSERT( movedContact->localIndex == movedLocalIndex );
+					movedContact->localIndex = localIndex;
+				}
+			}
+
+			bodyId = body->islandNext;
+		}
+	}
+
+	// move touching contacts
+	// this shuffles contacts in the awake set
+	{
+		int contactId = island->headContact;
+		while ( contactId != B2_NULL_INDEX )
+		{
+			b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+			B2_ASSERT( contact->setIndex == b2_awakeSet );
+			B2_ASSERT( contact->islandId == islandId );
+			int colorIndex = contact->colorIndex;
+			B2_ASSERT( 0 <= colorIndex && colorIndex < B2_GRAPH_COLOR_COUNT );
+
+			b2GraphColor* color = world->constraintGraph.colors + colorIndex;
+
+			// Remove bodies from graph coloring associated with this constraint
+			if ( colorIndex != B2_OVERFLOW_INDEX )
+			{
+				// might clear a bit for a static body, but this has no effect
+				b2ClearBit( &color->bodySet, contact->edges[0].bodyId );
+				b2ClearBit( &color->bodySet, contact->edges[1].bodyId );
+			}
+
+			int localIndex = contact->localIndex;
+			b2ContactSim* awakeContactSim = b2ContactSimArray_Get( &color->contactSims, localIndex );
+
+			int sleepContactIndex = sleepSet->contactSims.count;
+			b2ContactSim* sleepContactSim = b2ContactSimArray_Add( &sleepSet->contactSims );
+			memcpy( sleepContactSim, awakeContactSim, sizeof( b2ContactSim ) );
+
+			int movedLocalIndex = b2ContactSimArray_RemoveSwap( &color->contactSims, localIndex );
+			if ( movedLocalIndex != B2_NULL_INDEX )
+			{
+				// fix moved element
+				b2ContactSim* movedContactSim = color->contactSims.data + localIndex;
+				b2Contact* movedContact = b2ContactArray_Get( &world->contacts, movedContactSim->contactId );
+				B2_ASSERT( movedContact->localIndex == movedLocalIndex );
+				movedContact->localIndex = localIndex;
+			}
+
+			contact->setIndex = sleepSetId;
+			contact->colorIndex = B2_NULL_INDEX;
+			contact->localIndex = sleepContactIndex;
+
+			contactId = contact->islandNext;
+		}
+	}
+
+	// move joints
+	// this shuffles joints in the awake set
+	{
+		int jointId = island->headJoint;
+		while ( jointId != B2_NULL_INDEX )
+		{
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+			B2_ASSERT( joint->setIndex == b2_awakeSet );
+			B2_ASSERT( joint->islandId == islandId );
+			int colorIndex = joint->colorIndex;
+			int localIndex = joint->localIndex;
+
+			B2_ASSERT( 0 <= colorIndex && colorIndex < B2_GRAPH_COLOR_COUNT );
+
+			b2GraphColor* color = world->constraintGraph.colors + colorIndex;
+
+			b2JointSim* awakeJointSim = b2JointSimArray_Get( &color->jointSims, localIndex );
+
+			if ( colorIndex != B2_OVERFLOW_INDEX )
+			{
+				// might clear a bit for a static body, but this has no effect
+				b2ClearBit( &color->bodySet, joint->edges[0].bodyId );
+				b2ClearBit( &color->bodySet, joint->edges[1].bodyId );
+			}
+
+			int sleepJointIndex = sleepSet->jointSims.count;
+			b2JointSim* sleepJointSim = b2JointSimArray_Add( &sleepSet->jointSims );
+			memcpy( sleepJointSim, awakeJointSim, sizeof( b2JointSim ) );
+
+			int movedIndex = b2JointSimArray_RemoveSwap( &color->jointSims, localIndex );
+			if ( movedIndex != B2_NULL_INDEX )
+			{
+				// fix moved element
+				b2JointSim* movedJointSim = color->jointSims.data + localIndex;
+				int movedId = movedJointSim->jointId;
+				b2Joint* movedJoint = b2JointArray_Get( &world->joints, movedId );
+				B2_ASSERT( movedJoint->localIndex == movedIndex );
+				movedJoint->localIndex = localIndex;
+			}
+
+			joint->setIndex = sleepSetId;
+			joint->colorIndex = B2_NULL_INDEX;
+			joint->localIndex = sleepJointIndex;
+
+			jointId = joint->islandNext;
+		}
+	}
+
+	// move island struct
+	{
+		B2_ASSERT( island->setIndex == b2_awakeSet );
+
+		int islandIndex = island->localIndex;
+		b2IslandSim* sleepIsland = b2IslandSimArray_Add( &sleepSet->islandSims );
+		sleepIsland->islandId = islandId;
+
+		int movedIslandIndex = b2IslandSimArray_RemoveSwap( &awakeSet->islandSims, islandIndex );
+		if ( movedIslandIndex != B2_NULL_INDEX )
+		{
+			// fix index on moved element
+			b2IslandSim* movedIslandSim = awakeSet->islandSims.data + islandIndex;
+			int movedIslandId = movedIslandSim->islandId;
+			b2Island* movedIsland = b2IslandArray_Get( &world->islands, movedIslandId );
+			B2_ASSERT( movedIsland->localIndex == movedIslandIndex );
+			movedIsland->localIndex = islandIndex;
+		}
+
+		island->setIndex = sleepSetId;
+		island->localIndex = 0;
+	}
+
+	b2ValidateSolverSets( world );
+}
+
+// This is called when joints are created between sets. I want to allow the sets
+// to continue sleeping if both are asleep. Otherwise one set is waked.
+// Islands will get merge when the set is waked.
+void b2MergeSolverSets( b2World* world, int setId1, int setId2 )
+{
+	B2_ASSERT( setId1 >= b2_firstSleepingSet );
+	B2_ASSERT( setId2 >= b2_firstSleepingSet );
+	b2SolverSet* set1 = b2SolverSetArray_Get( &world->solverSets, setId1 );
+	b2SolverSet* set2 = b2SolverSetArray_Get( &world->solverSets, setId2 );
+
+	// Move the fewest number of bodies
+	if ( set1->bodySims.count < set2->bodySims.count )
+	{
+		b2SolverSet* tempSet = set1;
+		set1 = set2;
+		set2 = tempSet;
+
+		int tempId = setId1;
+		setId1 = setId2;
+		setId2 = tempId;
+	}
+
+	// transfer bodies
+	{
+		b2Body* bodies = world->bodies.data;
+		int bodyCount = set2->bodySims.count;
+		for ( int i = 0; i < bodyCount; ++i )
+		{
+			b2BodySim* simSrc = set2->bodySims.data + i;
+
+			b2Body* body = bodies + simSrc->bodyId;
+			B2_ASSERT( body->setIndex == setId2 );
+			body->setIndex = setId1;
+			body->localIndex = set1->bodySims.count;
+
+			b2BodySim* simDst = b2BodySimArray_Add( &set1->bodySims );
+			memcpy( simDst, simSrc, sizeof( b2BodySim ) );
+		}
+	}
+
+	// transfer contacts
+	{
+		int contactCount = set2->contactSims.count;
+		for ( int i = 0; i < contactCount; ++i )
+		{
+			b2ContactSim* contactSrc = set2->contactSims.data + i;
+
+			b2Contact* contact = b2ContactArray_Get( &world->contacts, contactSrc->contactId );
+			B2_ASSERT( contact->setIndex == setId2 );
+			contact->setIndex = setId1;
+			contact->localIndex = set1->contactSims.count;
+
+			b2ContactSim* contactDst = b2ContactSimArray_Add( &set1->contactSims );
+			memcpy( contactDst, contactSrc, sizeof( b2ContactSim ) );
+		}
+	}
+
+	// transfer joints
+	{
+		int jointCount = set2->jointSims.count;
+		for ( int i = 0; i < jointCount; ++i )
+		{
+			b2JointSim* jointSrc = set2->jointSims.data + i;
+
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointSrc->jointId );
+			B2_ASSERT( joint->setIndex == setId2 );
+			joint->setIndex = setId1;
+			joint->localIndex = set1->jointSims.count;
+
+			b2JointSim* jointDst = b2JointSimArray_Add( &set1->jointSims );
+			memcpy( jointDst, jointSrc, sizeof( b2JointSim ) );
+		}
+	}
+
+	// transfer islands
+	{
+		int islandCount = set2->islandSims.count;
+		for ( int i = 0; i < islandCount; ++i )
+		{
+			b2IslandSim* islandSrc = set2->islandSims.data + i;
+			int islandId = islandSrc->islandId;
+
+			b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+			island->setIndex = setId1;
+			island->localIndex = set1->islandSims.count;
+
+			b2IslandSim* islandDst = b2IslandSimArray_Add( &set1->islandSims );
+			memcpy( islandDst, islandSrc, sizeof( b2IslandSim ) );
+		}
+	}
+
+	// destroy the merged set
+	b2DestroySolverSet( world, setId2 );
+
+	b2ValidateSolverSets( world );
+}
+
+void b2TransferBody( b2World* world, b2SolverSet* targetSet, b2SolverSet* sourceSet, b2Body* body )
+{
+	B2_ASSERT( targetSet != sourceSet );
+
+	int sourceIndex = body->localIndex;
+	b2BodySim* sourceSim = b2BodySimArray_Get( &sourceSet->bodySims, sourceIndex );
+
+	int targetIndex = targetSet->bodySims.count;
+	b2BodySim* targetSim = b2BodySimArray_Add( &targetSet->bodySims );
+	memcpy( targetSim, sourceSim, sizeof( b2BodySim ) );
+
+	// Remove body sim from solver set that owns it
+	int movedIndex = b2BodySimArray_RemoveSwap( &sourceSet->bodySims, sourceIndex );
+	if ( movedIndex != B2_NULL_INDEX )
+	{
+		// Fix moved body index
+		b2BodySim* movedSim = sourceSet->bodySims.data + sourceIndex;
+		int movedId = movedSim->bodyId;
+		b2Body* movedBody = b2BodyArray_Get( &world->bodies, movedId );
+		B2_ASSERT( movedBody->localIndex == movedIndex );
+		movedBody->localIndex = sourceIndex;
+	}
+
+	if ( sourceSet->setIndex == b2_awakeSet )
+	{
+		b2BodyStateArray_RemoveSwap( &sourceSet->bodyStates, sourceIndex );
+	}
+	else if ( targetSet->setIndex == b2_awakeSet )
+	{
+		b2BodyState* state = b2BodyStateArray_Add( &targetSet->bodyStates );
+		*state = b2_identityBodyState;
+	}
+
+	body->setIndex = targetSet->setIndex;
+	body->localIndex = targetIndex;
+}
+
+void b2TransferJoint( b2World* world, b2SolverSet* targetSet, b2SolverSet* sourceSet, b2Joint* joint )
+{
+	B2_ASSERT( targetSet != sourceSet );
+
+	int localIndex = joint->localIndex;
+	int colorIndex = joint->colorIndex;
+
+	// Retrieve source.
+	b2JointSim* sourceSim;
+	if ( sourceSet->setIndex == b2_awakeSet )
+	{
+		B2_ASSERT( 0 <= colorIndex && colorIndex < B2_GRAPH_COLOR_COUNT );
+		b2GraphColor* color = world->constraintGraph.colors + colorIndex;
+
+		sourceSim = b2JointSimArray_Get( &color->jointSims, localIndex );
+	}
+	else
+	{
+		B2_ASSERT( colorIndex == B2_NULL_INDEX );
+		sourceSim = b2JointSimArray_Get( &sourceSet->jointSims, localIndex );
+	}
+
+	// Create target and copy. Fix joint.
+	if ( targetSet->setIndex == b2_awakeSet )
+	{
+		b2AddJointToGraph( world, sourceSim, joint );
+		joint->setIndex = b2_awakeSet;
+	}
+	else
+	{
+		joint->setIndex = targetSet->setIndex;
+		joint->localIndex = targetSet->jointSims.count;
+		joint->colorIndex = B2_NULL_INDEX;
+
+		b2JointSim* targetSim = b2JointSimArray_Add( &targetSet->jointSims );
+		memcpy( targetSim, sourceSim, sizeof( b2JointSim ) );
+	}
+
+	// Destroy source.
+	if ( sourceSet->setIndex == b2_awakeSet )
+	{
+		b2RemoveJointFromGraph( world, joint->edges[0].bodyId, joint->edges[1].bodyId, colorIndex, localIndex );
+	}
+	else
+	{
+		int movedIndex = b2JointSimArray_RemoveSwap( &sourceSet->jointSims, localIndex );
+		if ( movedIndex != B2_NULL_INDEX )
+		{
+			// fix swapped element
+			b2JointSim* movedJointSim = sourceSet->jointSims.data + localIndex;
+			int movedId = movedJointSim->jointId;
+			b2Joint* movedJoint = b2JointArray_Get( &world->joints, movedId );
+			movedJoint->localIndex = localIndex;
+		}
+	}
+}
diff --git a/src/vendor/box2d/solver_set.h b/src/vendor/box2d/solver_set.h
new file mode 100644
index 0000000..540673a
--- /dev/null
+++ b/src/vendor/box2d/solver_set.h
@@ -0,0 +1,57 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+
+typedef struct b2Body b2Body;
+typedef struct b2Joint b2Joint;
+typedef struct b2World b2World;
+
+// This holds solver set data. The following sets are used:
+// - static set for all static bodies (no contacts or joints)
+// - active set for all active bodies with body states (no contacts or joints)
+// - disabled set for disabled bodies and their joints
+// - all further sets are sleeping island sets along with their contacts and joints
+// The purpose of solver sets is to achieve high memory locality.
+// https://www.youtube.com/watch?v=nZNd5FjSquk
+typedef struct b2SolverSet
+{
+	// Body array. Empty for unused set.
+	b2BodySimArray bodySims;
+
+	// Body state only exists for active set
+	b2BodyStateArray bodyStates;
+
+	// This holds sleeping/disabled joints. Empty for static/active set.
+	b2JointSimArray jointSims;
+
+	// This holds all contacts for sleeping sets.
+	// This holds non-touching contacts for the awake set.
+	b2ContactSimArray contactSims;
+
+	// The awake set has an array of islands. Sleeping sets normally have a single islands. However, joints
+	// created between sleeping sets causes the sets to merge, leaving them with multiple islands. These sleeping
+	// islands will be naturally merged with the set is woken.
+	// The static and disabled sets have no islands.
+	// Islands live in the solver sets to limit the number of islands that need to be considered for sleeping.
+	b2IslandSimArray islandSims;
+
+	// Aligns with b2World::solverSetIdPool. Used to create a stable id for body/contact/joint/islands.
+	int setIndex;
+} b2SolverSet;
+
+void b2DestroySolverSet( b2World* world, int setIndex );
+
+void b2WakeSolverSet( b2World* world, int setIndex );
+void b2TrySleepIsland( b2World* world, int islandId );
+
+// Merge set 2 into set 1 then destroy set 2.
+// Warning: any pointers into these sets will be orphaned.
+void b2MergeSolverSets( b2World* world, int setIndex1, int setIndex2 );
+
+void b2TransferBody( b2World* world, b2SolverSet* targetSet, b2SolverSet* sourceSet, b2Body* body );
+void b2TransferJoint( b2World* world, b2SolverSet* targetSet, b2SolverSet* sourceSet, b2Joint* joint );
+
+B2_ARRAY_INLINE( b2SolverSet, b2SolverSet )
diff --git a/src/vendor/box2d/table.c b/src/vendor/box2d/table.c
new file mode 100644
index 0000000..9e1fdc8
--- /dev/null
+++ b/src/vendor/box2d/table.c
@@ -0,0 +1,238 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "table.h"
+
+#include "atomic.h"
+#include "core.h"
+#include "ctz.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#if B2_SNOOP_TABLE_COUNTERS
+b2AtomicInt b2_findCount;
+b2AtomicInt b2_probeCount;
+#endif
+
+// todo compare with https://github.com/skeeto/scratch/blob/master/set32/set32.h
+
+b2HashSet b2CreateSet( int capacity )
+{
+	b2HashSet set = { 0 };
+
+	// Capacity must be a power of 2
+	if ( capacity > 16 )
+	{
+		set.capacity = b2RoundUpPowerOf2( capacity );
+	}
+	else
+	{
+		set.capacity = 16;
+	}
+
+	set.count = 0;
+	set.items = b2Alloc( capacity * sizeof( b2SetItem ) );
+	memset( set.items, 0, capacity * sizeof( b2SetItem ) );
+
+	return set;
+}
+
+void b2DestroySet( b2HashSet* set )
+{
+	b2Free( set->items, set->capacity * sizeof( b2SetItem ) );
+	set->items = NULL;
+	set->count = 0;
+	set->capacity = 0;
+}
+
+void b2ClearSet( b2HashSet* set )
+{
+	set->count = 0;
+	memset( set->items, 0, set->capacity * sizeof( b2SetItem ) );
+}
+
+// I need a good hash because the keys are built from pairs of increasing integers.
+// A simple hash like hash = (integer1 XOR integer2) has many collisions.
+// https://lemire.me/blog/2018/08/15/fast-strongly-universal-64-bit-hashing-everywhere/
+// https://preshing.com/20130107/this-hash-set-is-faster-than-a-judy-array/
+// todo try: https://www.jandrewrogers.com/2019/02/12/fast-perfect-hashing/
+// todo try: https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+static uint32_t b2KeyHash( uint64_t key )
+{
+	// Murmur hash
+	uint64_t h = key;
+	h ^= h >> 33;
+	h *= 0xff51afd7ed558ccduLL;
+	h ^= h >> 33;
+	h *= 0xc4ceb9fe1a85ec53uLL;
+	h ^= h >> 33;
+
+	return (uint32_t)h;
+}
+
+static int b2FindSlot( const b2HashSet* set, uint64_t key, uint32_t hash )
+{
+#if B2_SNOOP_TABLE_COUNTERS 
+		b2AtomicFetchAddInt( &b2_findCount, 1 );
+#endif
+
+	uint32_t capacity = set->capacity;
+	int index = hash & ( capacity - 1 );
+	const b2SetItem* items = set->items;
+	while ( items[index].hash != 0 && items[index].key != key )
+	{
+#if B2_SNOOP_TABLE_COUNTERS
+		b2AtomicFetchAddInt( &b2_probeCount, 1 );
+#endif
+		index = ( index + 1 ) & ( capacity - 1 );
+	}
+
+	return index;
+}
+
+static void b2AddKeyHaveCapacity( b2HashSet* set, uint64_t key, uint32_t hash )
+{
+	int index = b2FindSlot( set, key, hash );
+	b2SetItem* items = set->items;
+	B2_ASSERT( items[index].hash == 0 );
+
+	items[index].key = key;
+	items[index].hash = hash;
+	set->count += 1;
+}
+
+static void b2GrowTable( b2HashSet* set )
+{
+	uint32_t oldCount = set->count;
+	B2_UNUSED( oldCount );
+
+	uint32_t oldCapacity = set->capacity;
+	b2SetItem* oldItems = set->items;
+
+	set->count = 0;
+	// Capacity must be a power of 2
+	set->capacity = 2 * oldCapacity;
+	set->items = b2Alloc( set->capacity * sizeof( b2SetItem ) );
+	memset( set->items, 0, set->capacity * sizeof( b2SetItem ) );
+
+	// Transfer items into new array
+	for ( uint32_t i = 0; i < oldCapacity; ++i )
+	{
+		b2SetItem* item = oldItems + i;
+		if ( item->hash == 0 )
+		{
+			// this item was empty
+			continue;
+		}
+
+		b2AddKeyHaveCapacity( set, item->key, item->hash );
+	}
+
+	B2_ASSERT( set->count == oldCount );
+
+	b2Free( oldItems, oldCapacity * sizeof( b2SetItem ) );
+}
+
+bool b2ContainsKey( const b2HashSet* set, uint64_t key )
+{
+	// key of zero is a sentinel
+	B2_ASSERT( key != 0 );
+	uint32_t hash = b2KeyHash( key );
+	int index = b2FindSlot( set, key, hash );
+	return set->items[index].key == key;
+}
+
+int b2GetHashSetBytes( b2HashSet* set )
+{
+	return set->capacity * (int)sizeof( b2SetItem );
+}
+
+bool b2AddKey( b2HashSet* set, uint64_t key )
+{
+	// key of zero is a sentinel
+	B2_ASSERT( key != 0 );
+
+	uint32_t hash = b2KeyHash( key );
+	B2_ASSERT( hash != 0 );
+
+	int index = b2FindSlot( set, key, hash );
+	if ( set->items[index].hash != 0 )
+	{
+		// Already in set
+		B2_ASSERT( set->items[index].hash == hash && set->items[index].key == key );
+		return true;
+	}
+
+	if ( 2 * set->count >= set->capacity )
+	{
+		b2GrowTable( set );
+	}
+
+	b2AddKeyHaveCapacity( set, key, hash );
+	return false;
+}
+
+// See https://en.wikipedia.org/wiki/Open_addressing
+bool b2RemoveKey( b2HashSet* set, uint64_t key )
+{
+	uint32_t hash = b2KeyHash( key );
+	int i = b2FindSlot( set, key, hash );
+	b2SetItem* items = set->items;
+	if ( items[i].hash == 0 )
+	{
+		// Not in set
+		return false;
+	}
+
+	// Mark item i as unoccupied
+	items[i].key = 0;
+	items[i].hash = 0;
+
+	B2_ASSERT( set->count > 0 );
+	set->count -= 1;
+
+	// Attempt to fill item i
+	int j = i;
+	uint32_t capacity = set->capacity;
+	for ( ;; )
+	{
+		j = ( j + 1 ) & ( capacity - 1 );
+		if ( items[j].hash == 0 )
+		{
+			break;
+		}
+
+		// k is the first item for the hash of j
+		int k = items[j].hash & ( capacity - 1 );
+
+		// determine if k lies cyclically in (i,j]
+		// i <= j: | i..k..j |
+		// i > j: |.k..j  i....| or |....j     i..k.|
+		if ( i <= j )
+		{
+			if ( i < k && k <= j )
+			{
+				continue;
+			}
+		}
+		else
+		{
+			if ( i < k || k <= j )
+			{
+				continue;
+			}
+		}
+
+		// Move j into i
+		items[i] = items[j];
+
+		// Mark item j as unoccupied
+		items[j].key = 0;
+		items[j].hash = 0;
+
+		i = j;
+	}
+
+	return true;
+}
diff --git a/src/vendor/box2d/table.h b/src/vendor/box2d/table.h
new file mode 100644
index 0000000..3c2d275
--- /dev/null
+++ b/src/vendor/box2d/table.h
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define B2_SHAPE_PAIR_KEY( K1, K2 ) K1 < K2 ? (uint64_t)K1 << 32 | (uint64_t)K2 : (uint64_t)K2 << 32 | (uint64_t)K1
+
+typedef struct b2SetItem
+{
+	uint64_t key;
+	uint32_t hash;
+} b2SetItem;
+
+typedef struct b2HashSet
+{
+	b2SetItem* items;
+	uint32_t capacity;
+	uint32_t count;
+} b2HashSet;
+
+b2HashSet b2CreateSet( int capacity );
+void b2DestroySet( b2HashSet* set );
+
+void b2ClearSet( b2HashSet* set );
+
+// Returns true if key was already in set
+bool b2AddKey( b2HashSet* set, uint64_t key );
+
+// Returns true if the key was found
+bool b2RemoveKey( b2HashSet* set, uint64_t key );
+
+bool b2ContainsKey( const b2HashSet* set, uint64_t key );
+
+int b2GetHashSetBytes( b2HashSet* set );
diff --git a/src/vendor/box2d/timer.c b/src/vendor/box2d/timer.c
new file mode 100644
index 0000000..e8e4935
--- /dev/null
+++ b/src/vendor/box2d/timer.c
@@ -0,0 +1,185 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "box2d/base.h"
+
+#include <stddef.h>
+
+#if defined( _MSC_VER )
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
+#endif
+
+#include <windows.h>
+
+static double s_invFrequency = 0.0;
+
+uint64_t b2GetTicks( void )
+{
+	LARGE_INTEGER counter;
+	QueryPerformanceCounter( &counter );
+	return (uint64_t)counter.QuadPart;
+}
+
+float b2GetMilliseconds( uint64_t ticks )
+{
+	if ( s_invFrequency == 0.0 )
+	{
+		LARGE_INTEGER frequency;
+		QueryPerformanceFrequency( &frequency );
+
+		s_invFrequency = (double)frequency.QuadPart;
+		if ( s_invFrequency > 0.0 )
+		{
+			s_invFrequency = 1000.0 / s_invFrequency;
+		}
+	}
+
+	uint64_t ticksNow = b2GetTicks();
+	return (float)( s_invFrequency * ( ticksNow - ticks ) );
+}
+
+float b2GetMillisecondsAndReset( uint64_t* ticks )
+{
+	if ( s_invFrequency == 0.0 )
+	{
+		LARGE_INTEGER frequency;
+		QueryPerformanceFrequency( &frequency );
+
+		s_invFrequency = (double)frequency.QuadPart;
+		if ( s_invFrequency > 0.0 )
+		{
+			s_invFrequency = 1000.0 / s_invFrequency;
+		}
+	}
+
+	uint64_t ticksNow = b2GetTicks();
+	float ms = (float)( s_invFrequency * ( ticksNow - *ticks ) );
+	*ticks = ticksNow;
+	return ms;
+}
+
+void b2Yield( void )
+{
+	SwitchToThread();
+}
+
+#elif defined( __linux__ ) || defined( __EMSCRIPTEN__ )
+
+#include <sched.h>
+#include <time.h>
+
+uint64_t b2GetTicks( void )
+{
+	struct timespec ts;
+	clock_gettime( CLOCK_MONOTONIC, &ts );
+	return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+}
+
+float b2GetMilliseconds( uint64_t ticks )
+{
+	uint64_t ticksNow = b2GetTicks();
+	return (float)( (ticksNow - ticks) / 1000000.0 );
+}
+
+float b2GetMillisecondsAndReset( uint64_t* ticks )
+{
+	uint64_t ticksNow = b2GetTicks();
+	float ms = (float)( (ticksNow - *ticks) / 1000000.0 );
+	*ticks = ticksNow;
+	return ms;
+}
+
+void b2Yield( void )
+{
+	sched_yield();
+}
+
+#elif defined( __APPLE__ )
+
+#include <mach/mach_time.h>
+#include <sched.h>
+#include <sys/time.h>
+
+static double s_invFrequency = 0.0;
+
+uint64_t b2GetTicks( void )
+{
+	return mach_absolute_time();
+}
+
+float b2GetMilliseconds( uint64_t ticks )
+{
+	if ( s_invFrequency == 0 )
+	{
+		mach_timebase_info_data_t timebase;
+		mach_timebase_info( &timebase );
+
+		// convert to ns then to ms
+		s_invFrequency = 1e-6 * (double)timebase.numer / (double)timebase.denom;
+	}
+
+	uint64_t ticksNow = b2GetTicks();
+	return (float)( s_invFrequency * (ticksNow - ticks) );
+}
+
+float b2GetMillisecondsAndReset( uint64_t* ticks )
+{
+	if ( s_invFrequency == 0 )
+	{
+		mach_timebase_info_data_t timebase;
+		mach_timebase_info( &timebase );
+
+		// convert to ns then to ms
+		s_invFrequency = 1e-6 * (double)timebase.numer / (double)timebase.denom;
+	}
+
+	uint64_t ticksNow = b2GetTicks();
+	float ms = (float)( s_invFrequency * ( ticksNow - *ticks ) );
+	*ticks = ticksNow;
+	return ms;
+}
+
+void b2Yield( void )
+{
+	sched_yield();
+}
+
+#else
+
+uint64_t b2GetTicks( void )
+{
+	return 0;
+}
+
+float b2GetMilliseconds( uint64_t ticks )
+{
+	( (void)( ticks ) );
+	return 0.0f;
+}
+
+float b2GetMillisecondsAndReset( uint64_t* ticks )
+{
+	( (void)( ticks ) );
+	return 0.0f;
+}
+
+void b2Yield( void )
+{
+}
+
+#endif
+
+// djb2 hash
+// https://en.wikipedia.org/wiki/List_of_hash_functions
+uint32_t b2Hash( uint32_t hash, const uint8_t* data, int count )
+{
+	uint32_t result = hash;
+	for ( int i = 0; i < count; i++ )
+	{
+		result = ( result << 5 ) + result + data[i];
+	}
+
+	return result;
+}
diff --git a/src/vendor/box2d/types.c b/src/vendor/box2d/types.c
new file mode 100644
index 0000000..0c0a143
--- /dev/null
+++ b/src/vendor/box2d/types.c
@@ -0,0 +1,151 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "box2d/types.h"
+
+#include "constants.h"
+#include "core.h"
+
+b2WorldDef b2DefaultWorldDef( void )
+{
+	b2WorldDef def = { 0 };
+	def.gravity.x = 0.0f;
+	def.gravity.y = -10.0f;
+	def.hitEventThreshold = 1.0f * b2_lengthUnitsPerMeter;
+	def.restitutionThreshold = 1.0f * b2_lengthUnitsPerMeter;
+	def.maxContactPushSpeed = 3.0f * b2_lengthUnitsPerMeter;
+	def.contactHertz = 30.0;
+	def.contactDampingRatio = 10.0f;
+	def.jointHertz = 60.0;
+	def.jointDampingRatio = 2.0f;
+	// 400 meters per second, faster than the speed of sound
+	def.maximumLinearSpeed = 400.0f * b2_lengthUnitsPerMeter;
+	def.enableSleep = true;
+	def.enableContinuous = true;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2BodyDef b2DefaultBodyDef( void )
+{
+	b2BodyDef def = { 0 };
+	def.type = b2_staticBody;
+	def.rotation = b2Rot_identity;
+	def.sleepThreshold = 0.05f * b2_lengthUnitsPerMeter;
+	def.gravityScale = 1.0f;
+	def.enableSleep = true;
+	def.isAwake = true;
+	def.isEnabled = true;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2Filter b2DefaultFilter( void )
+{
+	b2Filter filter = { B2_DEFAULT_CATEGORY_BITS, B2_DEFAULT_MASK_BITS, 0 };
+	return filter;
+}
+
+b2QueryFilter b2DefaultQueryFilter( void )
+{
+	b2QueryFilter filter = { B2_DEFAULT_CATEGORY_BITS, B2_DEFAULT_MASK_BITS };
+	return filter;
+}
+
+b2ShapeDef b2DefaultShapeDef( void )
+{
+	b2ShapeDef def = { 0 };
+	def.material.friction = 0.6f;
+	def.density = 1.0f;
+	def.filter = b2DefaultFilter();
+	def.updateBodyMass = true;
+	def.invokeContactCreation = true;
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+b2SurfaceMaterial b2DefaultSurfaceMaterial( void )
+{
+	b2SurfaceMaterial material = {
+		.friction = 0.6f,
+	};
+
+	return material;
+}
+
+b2ChainDef b2DefaultChainDef( void )
+{
+	static b2SurfaceMaterial defaultMaterial = {
+		.friction = 0.6f,
+	};
+
+	b2ChainDef def = { 0 };
+	def.materials = &defaultMaterial;
+	def.materialCount = 1;
+	def.filter = b2DefaultFilter();
+	def.internalValue = B2_SECRET_COOKIE;
+	return def;
+}
+
+static void b2EmptyDrawPolygon( const b2Vec2* vertices, int vertexCount, b2HexColor color, void* context )
+{
+	B2_UNUSED( vertices, vertexCount, color, context );
+}
+
+static void b2EmptyDrawSolidPolygon( b2Transform transform, const b2Vec2* vertices, int vertexCount, float radius,
+									 b2HexColor color, void* context )
+{
+	B2_UNUSED( transform, vertices, vertexCount, radius, color, context );
+}
+
+static void b2EmptyDrawCircle( b2Vec2 center, float radius, b2HexColor color, void* context )
+{
+	B2_UNUSED( center, radius, color, context );
+}
+
+static void b2EmptyDrawSolidCircle( b2Transform transform, float radius, b2HexColor color, void* context )
+{
+	B2_UNUSED( transform, radius, color, context );
+}
+
+static void b2EmptyDrawSolidCapsule( b2Vec2 p1, b2Vec2 p2, float radius, b2HexColor color, void* context )
+{
+	B2_UNUSED( p1, p2, radius, color, context );
+}
+
+static void b2EmptyDrawSegment( b2Vec2 p1, b2Vec2 p2, b2HexColor color, void* context )
+{
+	B2_UNUSED( p1, p2, color, context );
+}
+
+static void b2EmptyDrawTransform( b2Transform transform, void* context )
+{
+	B2_UNUSED( transform, context );
+}
+
+static void b2EmptyDrawPoint( b2Vec2 p, float size, b2HexColor color, void* context )
+{
+	B2_UNUSED( p, size, color, context );
+}
+
+static void b2EmptyDrawString( b2Vec2 p, const char* s, b2HexColor color, void* context )
+{
+	B2_UNUSED( p, s, color, context );
+}
+
+b2DebugDraw b2DefaultDebugDraw( void )
+{
+	b2DebugDraw draw = { 0 };
+
+	// These allow the user to skip some implementations and not hit null exceptions.
+	draw.DrawPolygonFcn = b2EmptyDrawPolygon;
+	draw.DrawSolidPolygonFcn = b2EmptyDrawSolidPolygon;
+	draw.DrawCircleFcn = b2EmptyDrawCircle;
+	draw.DrawSolidCircleFcn = b2EmptyDrawSolidCircle;
+	draw.DrawSolidCapsuleFcn = b2EmptyDrawSolidCapsule;
+	draw.DrawSegmentFcn = b2EmptyDrawSegment;
+	draw.DrawTransformFcn = b2EmptyDrawTransform;
+	draw.DrawPointFcn = b2EmptyDrawPoint;
+	draw.DrawStringFcn = b2EmptyDrawString;
+	return draw;
+}
diff --git a/src/vendor/box2d/types.h b/src/vendor/box2d/types.h
new file mode 100644
index 0000000..b4a683f
--- /dev/null
+++ b/src/vendor/box2d/types.h
@@ -0,0 +1,1457 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "base.h"
+#include "collision.h"
+#include "id.h"
+#include "math_functions.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define B2_DEFAULT_CATEGORY_BITS 1
+#define B2_DEFAULT_MASK_BITS UINT64_MAX
+
+/// Task interface
+/// This is prototype for a Box2D task. Your task system is expected to invoke the Box2D task with these arguments.
+/// The task spans a range of the parallel-for: [startIndex, endIndex)
+/// The worker index must correctly identify each worker in the user thread pool, expected in [0, workerCount).
+/// A worker must only exist on only one thread at a time and is analogous to the thread index.
+/// The task context is the context pointer sent from Box2D when it is enqueued.
+/// The startIndex and endIndex are expected in the range [0, itemCount) where itemCount is the argument to b2EnqueueTaskCallback
+/// below. Box2D expects startIndex < endIndex and will execute a loop like this:
+///
+/// @code{.c}
+/// for (int i = startIndex; i < endIndex; ++i)
+/// {
+/// 	DoWork();
+/// }
+/// @endcode
+/// @ingroup world
+typedef void b2TaskCallback( int startIndex, int endIndex, uint32_t workerIndex, void* taskContext );
+
+/// These functions can be provided to Box2D to invoke a task system. These are designed to work well with enkiTS.
+/// Returns a pointer to the user's task object. May be nullptr. A nullptr indicates to Box2D that the work was executed
+/// serially within the callback and there is no need to call b2FinishTaskCallback.
+/// The itemCount is the number of Box2D work items that are to be partitioned among workers by the user's task system.
+/// This is essentially a parallel-for. The minRange parameter is a suggestion of the minimum number of items to assign
+/// per worker to reduce overhead. For example, suppose the task is small and that itemCount is 16. A minRange of 8 suggests
+/// that your task system should split the work items among just two workers, even if you have more available.
+/// In general the range [startIndex, endIndex) send to b2TaskCallback should obey:
+/// endIndex - startIndex >= minRange
+/// The exception of course is when itemCount < minRange.
+/// @ingroup world
+typedef void* b2EnqueueTaskCallback( b2TaskCallback* task, int itemCount, int minRange, void* taskContext, void* userContext );
+
+/// Finishes a user task object that wraps a Box2D task.
+/// @ingroup world
+typedef void b2FinishTaskCallback( void* userTask, void* userContext );
+
+/// Optional friction mixing callback. This intentionally provides no context objects because this is called
+/// from a worker thread.
+/// @warning This function should not attempt to modify Box2D state or user application state.
+/// @ingroup world
+typedef float b2FrictionCallback( float frictionA, int userMaterialIdA, float frictionB, int userMaterialIdB );
+
+/// Optional restitution mixing callback. This intentionally provides no context objects because this is called
+/// from a worker thread.
+/// @warning This function should not attempt to modify Box2D state or user application state.
+/// @ingroup world
+typedef float b2RestitutionCallback( float restitutionA, int userMaterialIdA, float restitutionB, int userMaterialIdB );
+
+/// Result from b2World_RayCastClosest
+/// @ingroup world
+typedef struct b2RayResult
+{
+	b2ShapeId shapeId;
+	b2Vec2 point;
+	b2Vec2 normal;
+	float fraction;
+	int nodeVisits;
+	int leafVisits;
+	bool hit;
+} b2RayResult;
+
+/// World definition used to create a simulation world.
+/// Must be initialized using b2DefaultWorldDef().
+/// @ingroup world
+typedef struct b2WorldDef
+{
+	/// Gravity vector. Box2D has no up-vector defined.
+	b2Vec2 gravity;
+
+	/// Restitution speed threshold, usually in m/s. Collisions above this
+	/// speed have restitution applied (will bounce).
+	float restitutionThreshold;
+
+	/// Threshold speed for hit events. Usually meters per second.
+	float hitEventThreshold;
+
+	/// Contact stiffness. Cycles per second. Increasing this increases the speed of overlap recovery, but can introduce jitter.
+	float contactHertz;
+
+	/// Contact bounciness. Non-dimensional. You can speed up overlap recovery by decreasing this with
+	/// the trade-off that overlap resolution becomes more energetic.
+	float contactDampingRatio;
+
+	/// This parameter controls how fast overlap is resolved and usually has units of meters per second. This only
+	/// puts a cap on the resolution speed. The resolution speed is increased by increasing the hertz and/or
+	/// decreasing the damping ratio.
+	float maxContactPushSpeed;
+
+	/// Joint stiffness. Cycles per second.
+	float jointHertz;
+
+	/// Joint bounciness. Non-dimensional.
+	float jointDampingRatio;
+
+	/// Maximum linear speed. Usually meters per second.
+	float maximumLinearSpeed;
+
+	/// Optional mixing callback for friction. The default uses sqrt(frictionA * frictionB).
+	b2FrictionCallback* frictionCallback;
+
+	/// Optional mixing callback for restitution. The default uses max(restitutionA, restitutionB).
+	b2RestitutionCallback* restitutionCallback;
+
+	/// Can bodies go to sleep to improve performance
+	bool enableSleep;
+
+	/// Enable continuous collision
+	bool enableContinuous;
+
+	/// Number of workers to use with the provided task system. Box2D performs best when using only
+	/// performance cores and accessing a single L2 cache. Efficiency cores and hyper-threading provide
+	/// little benefit and may even harm performance.
+	/// @note Box2D does not create threads. This is the number of threads your applications has created
+	/// that you are allocating to b2World_Step.
+	/// @warning Do not modify the default value unless you are also providing a task system and providing
+	/// task callbacks (enqueueTask and finishTask).
+	int workerCount;
+
+	/// Function to spawn tasks
+	b2EnqueueTaskCallback* enqueueTask;
+
+	/// Function to finish a task
+	b2FinishTaskCallback* finishTask;
+
+	/// User context that is provided to enqueueTask and finishTask
+	void* userTaskContext;
+
+	/// User data
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2WorldDef;
+
+/// Use this to initialize your world definition
+/// @ingroup world
+B2_API b2WorldDef b2DefaultWorldDef( void );
+
+/// The body simulation type.
+/// Each body is one of these three types. The type determines how the body behaves in the simulation.
+/// @ingroup body
+typedef enum b2BodyType
+{
+	/// zero mass, zero velocity, may be manually moved
+	b2_staticBody = 0,
+
+	/// zero mass, velocity set by user, moved by solver
+	b2_kinematicBody = 1,
+
+	/// positive mass, velocity determined by forces, moved by solver
+	b2_dynamicBody = 2,
+
+	/// number of body types
+	b2_bodyTypeCount,
+} b2BodyType;
+
+/// A body definition holds all the data needed to construct a rigid body.
+/// You can safely re-use body definitions. Shapes are added to a body after construction.
+/// Body definitions are temporary objects used to bundle creation parameters.
+/// Must be initialized using b2DefaultBodyDef().
+/// @ingroup body
+typedef struct b2BodyDef
+{
+	/// The body type: static, kinematic, or dynamic.
+	b2BodyType type;
+
+	/// The initial world position of the body. Bodies should be created with the desired position.
+	/// @note Creating bodies at the origin and then moving them nearly doubles the cost of body creation, especially
+	/// if the body is moved after shapes have been added.
+	b2Vec2 position;
+
+	/// The initial world rotation of the body. Use b2MakeRot() if you have an angle.
+	b2Rot rotation;
+
+	/// The initial linear velocity of the body's origin. Usually in meters per second.
+	b2Vec2 linearVelocity;
+
+	/// The initial angular velocity of the body. Radians per second.
+	float angularVelocity;
+
+	/// Linear damping is used to reduce the linear velocity. The damping parameter
+	/// can be larger than 1 but the damping effect becomes sensitive to the
+	/// time step when the damping parameter is large.
+	/// Generally linear damping is undesirable because it makes objects move slowly
+	/// as if they are floating.
+	float linearDamping;
+
+	/// Angular damping is used to reduce the angular velocity. The damping parameter
+	/// can be larger than 1.0f but the damping effect becomes sensitive to the
+	/// time step when the damping parameter is large.
+	/// Angular damping can be use slow down rotating bodies.
+	float angularDamping;
+
+	/// Scale the gravity applied to this body. Non-dimensional.
+	float gravityScale;
+
+	/// Sleep speed threshold, default is 0.05 meters per second
+	float sleepThreshold;
+
+	/// Optional body name for debugging. Up to 31 characters (excluding null termination)
+	const char* name;
+
+	/// Use this to store application specific body data.
+	void* userData;
+
+	/// Set this flag to false if this body should never fall asleep.
+	bool enableSleep;
+
+	/// Is this body initially awake or sleeping?
+	bool isAwake;
+
+	/// Should this body be prevented from rotating? Useful for characters.
+	bool fixedRotation;
+
+	/// Treat this body as high speed object that performs continuous collision detection
+	/// against dynamic and kinematic bodies, but not other bullet bodies.
+	/// @warning Bullets should be used sparingly. They are not a solution for general dynamic-versus-dynamic
+	/// continuous collision. They may interfere with joint constraints.
+	bool isBullet;
+
+	/// Used to disable a body. A disabled body does not move or collide.
+	bool isEnabled;
+
+	/// This allows this body to bypass rotational speed limits. Should only be used
+	/// for circular objects, like wheels.
+	bool allowFastRotation;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2BodyDef;
+
+/// Use this to initialize your body definition
+/// @ingroup body
+B2_API b2BodyDef b2DefaultBodyDef( void );
+
+/// This is used to filter collision on shapes. It affects shape-vs-shape collision
+/// and shape-versus-query collision (such as b2World_CastRay).
+/// @ingroup shape
+typedef struct b2Filter
+{
+	/// The collision category bits. Normally you would just set one bit. The category bits should
+	/// represent your application object types. For example:
+	/// @code{.cpp}
+	/// enum MyCategories
+	/// {
+	///    Static  = 0x00000001,
+	///    Dynamic = 0x00000002,
+	///    Debris  = 0x00000004,
+	///    Player  = 0x00000008,
+	///    // etc
+	/// };
+	/// @endcode
+	uint64_t categoryBits;
+
+	/// The collision mask bits. This states the categories that this
+	/// shape would accept for collision.
+	/// For example, you may want your player to only collide with static objects
+	/// and other players.
+	/// @code{.c}
+	/// maskBits = Static | Player;
+	/// @endcode
+	uint64_t maskBits;
+
+	/// Collision groups allow a certain group of objects to never collide (negative)
+	/// or always collide (positive). A group index of zero has no effect. Non-zero group filtering
+	/// always wins against the mask bits.
+	/// For example, you may want ragdolls to collide with other ragdolls but you don't want
+	/// ragdoll self-collision. In this case you would give each ragdoll a unique negative group index
+	/// and apply that group index to all shapes on the ragdoll.
+	int groupIndex;
+} b2Filter;
+
+/// Use this to initialize your filter
+/// @ingroup shape
+B2_API b2Filter b2DefaultFilter( void );
+
+/// The query filter is used to filter collisions between queries and shapes. For example,
+/// you may want a ray-cast representing a projectile to hit players and the static environment
+/// but not debris.
+/// @ingroup shape
+typedef struct b2QueryFilter
+{
+	/// The collision category bits of this query. Normally you would just set one bit.
+	uint64_t categoryBits;
+
+	/// The collision mask bits. This states the shape categories that this
+	/// query would accept for collision.
+	uint64_t maskBits;
+} b2QueryFilter;
+
+/// Use this to initialize your query filter
+/// @ingroup shape
+B2_API b2QueryFilter b2DefaultQueryFilter( void );
+
+/// Shape type
+/// @ingroup shape
+typedef enum b2ShapeType
+{
+	/// A circle with an offset
+	b2_circleShape,
+
+	/// A capsule is an extruded circle
+	b2_capsuleShape,
+
+	/// A line segment
+	b2_segmentShape,
+
+	/// A convex polygon
+	b2_polygonShape,
+
+	/// A line segment owned by a chain shape
+	b2_chainSegmentShape,
+
+	/// The number of shape types
+	b2_shapeTypeCount
+} b2ShapeType;
+
+/// Surface materials allow chain shapes to have per segment surface properties.
+/// @ingroup shape
+typedef struct b2SurfaceMaterial
+{
+	/// The Coulomb (dry) friction coefficient, usually in the range [0,1].
+	float friction;
+
+	/// The coefficient of restitution (bounce) usually in the range [0,1].
+	/// https://en.wikipedia.org/wiki/Coefficient_of_restitution
+	float restitution;
+
+	/// The rolling resistance usually in the range [0,1].
+	float rollingResistance;
+
+	/// The tangent speed for conveyor belts
+	float tangentSpeed;
+
+	/// User material identifier. This is passed with query results and to friction and restitution
+	/// combining functions. It is not used internally.
+	int userMaterialId;
+
+	/// Custom debug draw color.
+	uint32_t customColor;
+} b2SurfaceMaterial;
+
+/// Use this to initialize your surface material
+/// @ingroup shape
+B2_API b2SurfaceMaterial b2DefaultSurfaceMaterial( void );
+
+/// Used to create a shape.
+/// This is a temporary object used to bundle shape creation parameters. You may use
+/// the same shape definition to create multiple shapes.
+/// Must be initialized using b2DefaultShapeDef().
+/// @ingroup shape
+typedef struct b2ShapeDef
+{
+	/// Use this to store application specific shape data.
+	void* userData;
+
+	/// The surface material for this shape.
+	b2SurfaceMaterial material;
+
+	/// The density, usually in kg/m^2.
+	/// This is not part of the surface material because this is for the interior, which may have
+	/// other considerations, such as being hollow. For example a wood barrel may be hollow or full of water.
+	float density;
+
+	/// Collision filtering data.
+	b2Filter filter;
+
+	/// A sensor shape generates overlap events but never generates a collision response.
+	/// Sensors do not have continuous collision. Instead, use a ray or shape cast for those scenarios.
+	/// Sensors still contribute to the body mass if they have non-zero density.
+	/// @note Sensor events are disabled by default.
+	/// @see enableSensorEvents
+	bool isSensor;
+
+	/// Enable sensor events for this shape. This applies to sensors and non-sensors. False by default, even for sensors.
+	bool enableSensorEvents;
+
+	/// Enable contact events for this shape. Only applies to kinematic and dynamic bodies. Ignored for sensors. False by default.
+	bool enableContactEvents;
+
+	/// Enable hit events for this shape. Only applies to kinematic and dynamic bodies. Ignored for sensors. False by default.
+	bool enableHitEvents;
+
+	/// Enable pre-solve contact events for this shape. Only applies to dynamic bodies. These are expensive
+	/// and must be carefully handled due to threading. Ignored for sensors.
+	bool enablePreSolveEvents;
+
+	/// When shapes are created they will scan the environment for collision the next time step. This can significantly slow down
+	/// static body creation when there are many static shapes.
+	/// This is flag is ignored for dynamic and kinematic shapes which always invoke contact creation.
+	bool invokeContactCreation;
+
+	/// Should the body update the mass properties when this shape is created. Default is true.
+	bool updateBodyMass;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2ShapeDef;
+
+/// Use this to initialize your shape definition
+/// @ingroup shape
+B2_API b2ShapeDef b2DefaultShapeDef( void );
+
+/// Used to create a chain of line segments. This is designed to eliminate ghost collisions with some limitations.
+/// - chains are one-sided
+/// - chains have no mass and should be used on static bodies
+/// - chains have a counter-clockwise winding order (normal points right of segment direction)
+/// - chains are either a loop or open
+/// - a chain must have at least 4 points
+/// - the distance between any two points must be greater than B2_LINEAR_SLOP
+/// - a chain shape should not self intersect (this is not validated)
+/// - an open chain shape has NO COLLISION on the first and final edge
+/// - you may overlap two open chains on their first three and/or last three points to get smooth collision
+/// - a chain shape creates multiple line segment shapes on the body
+/// https://en.wikipedia.org/wiki/Polygonal_chain
+/// Must be initialized using b2DefaultChainDef().
+/// @warning Do not use chain shapes unless you understand the limitations. This is an advanced feature.
+/// @ingroup shape
+typedef struct b2ChainDef
+{
+	/// Use this to store application specific shape data.
+	void* userData;
+
+	/// An array of at least 4 points. These are cloned and may be temporary.
+	const b2Vec2* points;
+
+	/// The point count, must be 4 or more.
+	int count;
+
+	/// Surface materials for each segment. These are cloned.
+	const b2SurfaceMaterial* materials;
+
+	/// The material count. Must be 1 or count. This allows you to provide one
+	/// material for all segments or a unique material per segment.
+	int materialCount;
+
+	/// Contact filtering data.
+	b2Filter filter;
+
+	/// Indicates a closed chain formed by connecting the first and last points
+	bool isLoop;
+
+	/// Enable sensors to detect this chain. False by default.
+	bool enableSensorEvents;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2ChainDef;
+
+/// Use this to initialize your chain definition
+/// @ingroup shape
+B2_API b2ChainDef b2DefaultChainDef( void );
+
+//! @cond
+/// Profiling data. Times are in milliseconds.
+typedef struct b2Profile
+{
+	float step;
+	float pairs;
+	float collide;
+	float solve;
+	float mergeIslands;
+	float prepareStages;
+	float solveConstraints;
+	float prepareConstraints;
+	float integrateVelocities;
+	float warmStart;
+	float solveImpulses;
+	float integratePositions;
+	float relaxImpulses;
+	float applyRestitution;
+	float storeImpulses;
+	float splitIslands;
+	float transforms;
+	float hitEvents;
+	float refit;
+	float bullets;
+	float sleepIslands;
+	float sensors;
+} b2Profile;
+
+/// Counters that give details of the simulation size.
+typedef struct b2Counters
+{
+	int bodyCount;
+	int shapeCount;
+	int contactCount;
+	int jointCount;
+	int islandCount;
+	int stackUsed;
+	int staticTreeHeight;
+	int treeHeight;
+	int byteCount;
+	int taskCount;
+	int colorCounts[12];
+} b2Counters;
+//! @endcond
+
+/// Joint type enumeration
+///
+/// This is useful because all joint types use b2JointId and sometimes you
+/// want to get the type of a joint.
+/// @ingroup joint
+typedef enum b2JointType
+{
+	b2_distanceJoint,
+	b2_filterJoint,
+	b2_motorJoint,
+	b2_mouseJoint,
+	b2_prismaticJoint,
+	b2_revoluteJoint,
+	b2_weldJoint,
+	b2_wheelJoint,
+} b2JointType;
+
+/// Distance joint definition
+///
+/// This requires defining an anchor point on both
+/// bodies and the non-zero distance of the distance joint. The definition uses
+/// local anchor points so that the initial configuration can violate the
+/// constraint slightly. This helps when saving and loading a game.
+/// @ingroup distance_joint
+typedef struct b2DistanceJointDef
+{
+	/// The first attached body
+	b2BodyId bodyIdA;
+
+	/// The second attached body
+	b2BodyId bodyIdB;
+
+	/// The local anchor point relative to bodyA's origin
+	b2Vec2 localAnchorA;
+
+	/// The local anchor point relative to bodyB's origin
+	b2Vec2 localAnchorB;
+
+	/// The rest length of this joint. Clamped to a stable minimum value.
+	float length;
+
+	/// Enable the distance constraint to behave like a spring. If false
+	/// then the distance joint will be rigid, overriding the limit and motor.
+	bool enableSpring;
+
+	/// The spring linear stiffness Hertz, cycles per second
+	float hertz;
+
+	/// The spring linear damping ratio, non-dimensional
+	float dampingRatio;
+
+	/// Enable/disable the joint limit
+	bool enableLimit;
+
+	/// Minimum length. Clamped to a stable minimum value.
+	float minLength;
+
+	/// Maximum length. Must be greater than or equal to the minimum length.
+	float maxLength;
+
+	/// Enable/disable the joint motor
+	bool enableMotor;
+
+	/// The maximum motor force, usually in newtons
+	float maxMotorForce;
+
+	/// The desired motor speed, usually in meters per second
+	float motorSpeed;
+
+	/// Set this flag to true if the attached bodies should collide
+	bool collideConnected;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2DistanceJointDef;
+
+/// Use this to initialize your joint definition
+/// @ingroup distance_joint
+B2_API b2DistanceJointDef b2DefaultDistanceJointDef( void );
+
+/// A motor joint is used to control the relative motion between two bodies
+///
+/// A typical usage is to control the movement of a dynamic body with respect to the ground.
+/// @ingroup motor_joint
+typedef struct b2MotorJointDef
+{
+	/// The first attached body
+	b2BodyId bodyIdA;
+
+	/// The second attached body
+	b2BodyId bodyIdB;
+
+	/// Position of bodyB minus the position of bodyA, in bodyA's frame
+	b2Vec2 linearOffset;
+
+	/// The bodyB angle minus bodyA angle in radians
+	float angularOffset;
+
+	/// The maximum motor force in newtons
+	float maxForce;
+
+	/// The maximum motor torque in newton-meters
+	float maxTorque;
+
+	/// Position correction factor in the range [0,1]
+	float correctionFactor;
+
+	/// Set this flag to true if the attached bodies should collide
+	bool collideConnected;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2MotorJointDef;
+
+/// Use this to initialize your joint definition
+/// @ingroup motor_joint
+B2_API b2MotorJointDef b2DefaultMotorJointDef( void );
+
+/// A mouse joint is used to make a point on a body track a specified world point.
+///
+/// This a soft constraint and allows the constraint to stretch without
+/// applying huge forces. This also applies rotation constraint heuristic to improve control.
+/// @ingroup mouse_joint
+typedef struct b2MouseJointDef
+{
+	/// The first attached body. This is assumed to be static.
+	b2BodyId bodyIdA;
+
+	/// The second attached body.
+	b2BodyId bodyIdB;
+
+	/// The initial target point in world space
+	b2Vec2 target;
+
+	/// Stiffness in hertz
+	float hertz;
+
+	/// Damping ratio, non-dimensional
+	float dampingRatio;
+
+	/// Maximum force, typically in newtons
+	float maxForce;
+
+	/// Set this flag to true if the attached bodies should collide.
+	bool collideConnected;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2MouseJointDef;
+
+/// Use this to initialize your joint definition
+/// @ingroup mouse_joint
+B2_API b2MouseJointDef b2DefaultMouseJointDef( void );
+
+/// A filter joint is used to disable collision between two specific bodies.
+///
+/// @ingroup filter_joint
+typedef struct b2FilterJointDef
+{
+	/// The first attached body.
+	b2BodyId bodyIdA;
+
+	/// The second attached body.
+	b2BodyId bodyIdB;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2FilterJointDef;
+
+/// Use this to initialize your joint definition
+/// @ingroup filter_joint
+B2_API b2FilterJointDef b2DefaultFilterJointDef( void );
+
+/// Prismatic joint definition
+///
+/// This requires defining a line of motion using an axis and an anchor point.
+/// The definition uses local anchor points and a local axis so that the initial
+/// configuration can violate the constraint slightly. The joint translation is zero
+/// when the local anchor points coincide in world space.
+/// @ingroup prismatic_joint
+typedef struct b2PrismaticJointDef
+{
+	/// The first attached body
+	b2BodyId bodyIdA;
+
+	/// The second attached body
+	b2BodyId bodyIdB;
+
+	/// The local anchor point relative to bodyA's origin
+	b2Vec2 localAnchorA;
+
+	/// The local anchor point relative to bodyB's origin
+	b2Vec2 localAnchorB;
+
+	/// The local translation unit axis in bodyA
+	b2Vec2 localAxisA;
+
+	/// The constrained angle between the bodies: bodyB_angle - bodyA_angle
+	float referenceAngle;
+
+	/// Enable a linear spring along the prismatic joint axis
+	bool enableSpring;
+
+	/// The spring stiffness Hertz, cycles per second
+	float hertz;
+
+	/// The spring damping ratio, non-dimensional
+	float dampingRatio;
+
+	/// Enable/disable the joint limit
+	bool enableLimit;
+
+	/// The lower translation limit
+	float lowerTranslation;
+
+	/// The upper translation limit
+	float upperTranslation;
+
+	/// Enable/disable the joint motor
+	bool enableMotor;
+
+	/// The maximum motor force, typically in newtons
+	float maxMotorForce;
+
+	/// The desired motor speed, typically in meters per second
+	float motorSpeed;
+
+	/// Set this flag to true if the attached bodies should collide
+	bool collideConnected;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2PrismaticJointDef;
+
+/// Use this to initialize your joint definition
+/// @ingroupd prismatic_joint
+B2_API b2PrismaticJointDef b2DefaultPrismaticJointDef( void );
+
+/// Revolute joint definition
+///
+/// This requires defining an anchor point where the bodies are joined.
+/// The definition uses local anchor points so that the
+/// initial configuration can violate the constraint slightly. You also need to
+/// specify the initial relative angle for joint limits. This helps when saving
+/// and loading a game.
+/// The local anchor points are measured from the body's origin
+/// rather than the center of mass because:
+/// 1. you might not know where the center of mass will be
+/// 2. if you add/remove shapes from a body and recompute the mass, the joints will be broken
+/// @ingroup revolute_joint
+typedef struct b2RevoluteJointDef
+{
+	/// The first attached body
+	b2BodyId bodyIdA;
+
+	/// The second attached body
+	b2BodyId bodyIdB;
+
+	/// The local anchor point relative to bodyA's origin
+	b2Vec2 localAnchorA;
+
+	/// The local anchor point relative to bodyB's origin
+	b2Vec2 localAnchorB;
+
+	/// The bodyB angle minus bodyA angle in the reference state (radians).
+	/// This defines the zero angle for the joint limit.
+	float referenceAngle;
+
+	/// Enable a rotational spring on the revolute hinge axis
+	bool enableSpring;
+
+	/// The spring stiffness Hertz, cycles per second
+	float hertz;
+
+	/// The spring damping ratio, non-dimensional
+	float dampingRatio;
+
+	/// A flag to enable joint limits
+	bool enableLimit;
+
+	/// The lower angle for the joint limit in radians. Minimum of -0.95*pi radians.
+	float lowerAngle;
+
+	/// The upper angle for the joint limit in radians. Maximum of 0.95*pi radians.
+	float upperAngle;
+
+	/// A flag to enable the joint motor
+	bool enableMotor;
+
+	/// The maximum motor torque, typically in newton-meters
+	float maxMotorTorque;
+
+	/// The desired motor speed in radians per second
+	float motorSpeed;
+
+	/// Scale the debug draw
+	float drawSize;
+
+	/// Set this flag to true if the attached bodies should collide
+	bool collideConnected;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2RevoluteJointDef;
+
+/// Use this to initialize your joint definition.
+/// @ingroup revolute_joint
+B2_API b2RevoluteJointDef b2DefaultRevoluteJointDef( void );
+
+/// Weld joint definition
+///
+/// A weld joint connect to bodies together rigidly. This constraint provides springs to mimic
+/// soft-body simulation.
+/// @note The approximate solver in Box2D cannot hold many bodies together rigidly
+/// @ingroup weld_joint
+typedef struct b2WeldJointDef
+{
+	/// The first attached body
+	b2BodyId bodyIdA;
+
+	/// The second attached body
+	b2BodyId bodyIdB;
+
+	/// The local anchor point relative to bodyA's origin
+	b2Vec2 localAnchorA;
+
+	/// The local anchor point relative to bodyB's origin
+	b2Vec2 localAnchorB;
+
+	/// The bodyB angle minus bodyA angle in the reference state (radians)
+	/// todo maybe make this a b2Rot
+	float referenceAngle;
+
+	/// Linear stiffness expressed as Hertz (cycles per second). Use zero for maximum stiffness.
+	float linearHertz;
+
+	/// Angular stiffness as Hertz (cycles per second). Use zero for maximum stiffness.
+	float angularHertz;
+
+	/// Linear damping ratio, non-dimensional. Use 1 for critical damping.
+	float linearDampingRatio;
+
+	/// Linear damping ratio, non-dimensional. Use 1 for critical damping.
+	float angularDampingRatio;
+
+	/// Set this flag to true if the attached bodies should collide
+	bool collideConnected;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2WeldJointDef;
+
+/// Use this to initialize your joint definition
+/// @ingroup weld_joint
+B2_API b2WeldJointDef b2DefaultWeldJointDef( void );
+
+/// Wheel joint definition
+///
+/// This requires defining a line of motion using an axis and an anchor point.
+/// The definition uses local  anchor points and a local axis so that the initial
+/// configuration can violate the constraint slightly. The joint translation is zero
+/// when the local anchor points coincide in world space.
+/// @ingroup wheel_joint
+typedef struct b2WheelJointDef
+{
+	/// The first attached body
+	b2BodyId bodyIdA;
+
+	/// The second attached body
+	b2BodyId bodyIdB;
+
+	/// The local anchor point relative to bodyA's origin
+	b2Vec2 localAnchorA;
+
+	/// The local anchor point relative to bodyB's origin
+	b2Vec2 localAnchorB;
+
+	/// The local translation unit axis in bodyA
+	b2Vec2 localAxisA;
+
+	/// Enable a linear spring along the local axis
+	bool enableSpring;
+
+	/// Spring stiffness in Hertz
+	float hertz;
+
+	/// Spring damping ratio, non-dimensional
+	float dampingRatio;
+
+	/// Enable/disable the joint linear limit
+	bool enableLimit;
+
+	/// The lower translation limit
+	float lowerTranslation;
+
+	/// The upper translation limit
+	float upperTranslation;
+
+	/// Enable/disable the joint rotational motor
+	bool enableMotor;
+
+	/// The maximum motor torque, typically in newton-meters
+	float maxMotorTorque;
+
+	/// The desired motor speed in radians per second
+	float motorSpeed;
+
+	/// Set this flag to true if the attached bodies should collide
+	bool collideConnected;
+
+	/// User data pointer
+	void* userData;
+
+	/// Used internally to detect a valid definition. DO NOT SET.
+	int internalValue;
+} b2WheelJointDef;
+
+/// Use this to initialize your joint definition
+/// @ingroup wheel_joint
+B2_API b2WheelJointDef b2DefaultWheelJointDef( void );
+
+/// The explosion definition is used to configure options for explosions. Explosions
+/// consider shape geometry when computing the impulse.
+/// @ingroup world
+typedef struct b2ExplosionDef
+{
+	/// Mask bits to filter shapes
+	uint64_t maskBits;
+
+	/// The center of the explosion in world space
+	b2Vec2 position;
+
+	/// The radius of the explosion
+	float radius;
+
+	/// The falloff distance beyond the radius. Impulse is reduced to zero at this distance.
+	float falloff;
+
+	/// Impulse per unit length. This applies an impulse according to the shape perimeter that
+	/// is facing the explosion. Explosions only apply to circles, capsules, and polygons. This
+	/// may be negative for implosions.
+	float impulsePerLength;
+} b2ExplosionDef;
+
+/// Use this to initialize your explosion definition
+/// @ingroup world
+B2_API b2ExplosionDef b2DefaultExplosionDef( void );
+
+/**
+ * @defgroup events Events
+ * World event types.
+ *
+ * Events are used to collect events that occur during the world time step. These events
+ * are then available to query after the time step is complete. This is preferable to callbacks
+ * because Box2D uses multithreaded simulation.
+ *
+ * Also when events occur in the simulation step it may be problematic to modify the world, which is
+ * often what applications want to do when events occur.
+ *
+ * With event arrays, you can scan the events in a loop and modify the world. However, you need to be careful
+ * that some event data may become invalid. There are several samples that show how to do this safely.
+ *
+ * @{
+ */
+
+/// A begin touch event is generated when a shape starts to overlap a sensor shape.
+typedef struct b2SensorBeginTouchEvent
+{
+	/// The id of the sensor shape
+	b2ShapeId sensorShapeId;
+
+	/// The id of the dynamic shape that began touching the sensor shape
+	b2ShapeId visitorShapeId;
+} b2SensorBeginTouchEvent;
+
+/// An end touch event is generated when a shape stops overlapping a sensor shape.
+///	These include things like setting the transform, destroying a body or shape, or changing
+///	a filter. You will also get an end event if the sensor or visitor are destroyed.
+///	Therefore you should always confirm the shape id is valid using b2Shape_IsValid.
+typedef struct b2SensorEndTouchEvent
+{
+	/// The id of the sensor shape
+	///	@warning this shape may have been destroyed
+	///	@see b2Shape_IsValid
+	b2ShapeId sensorShapeId;
+
+	/// The id of the dynamic shape that stopped touching the sensor shape
+	///	@warning this shape may have been destroyed
+	///	@see b2Shape_IsValid
+	b2ShapeId visitorShapeId;
+
+} b2SensorEndTouchEvent;
+
+/// Sensor events are buffered in the Box2D world and are available
+/// as begin/end overlap event arrays after the time step is complete.
+/// Note: these may become invalid if bodies and/or shapes are destroyed
+typedef struct b2SensorEvents
+{
+	/// Array of sensor begin touch events
+	b2SensorBeginTouchEvent* beginEvents;
+
+	/// Array of sensor end touch events
+	b2SensorEndTouchEvent* endEvents;
+
+	/// The number of begin touch events
+	int beginCount;
+
+	/// The number of end touch events
+	int endCount;
+} b2SensorEvents;
+
+/// A begin touch event is generated when two shapes begin touching.
+typedef struct b2ContactBeginTouchEvent
+{
+	/// Id of the first shape
+	b2ShapeId shapeIdA;
+
+	/// Id of the second shape
+	b2ShapeId shapeIdB;
+
+	/// The initial contact manifold. This is recorded before the solver is called,
+	/// so all the impulses will be zero.
+	b2Manifold manifold;
+} b2ContactBeginTouchEvent;
+
+/// An end touch event is generated when two shapes stop touching.
+///	You will get an end event if you do anything that destroys contacts previous to the last
+///	world step. These include things like setting the transform, destroying a body
+///	or shape, or changing a filter or body type.
+typedef struct b2ContactEndTouchEvent
+{
+	/// Id of the first shape
+	///	@warning this shape may have been destroyed
+	///	@see b2Shape_IsValid
+	b2ShapeId shapeIdA;
+
+	/// Id of the second shape
+	///	@warning this shape may have been destroyed
+	///	@see b2Shape_IsValid
+	b2ShapeId shapeIdB;
+} b2ContactEndTouchEvent;
+
+/// A hit touch event is generated when two shapes collide with a speed faster than the hit speed threshold.
+typedef struct b2ContactHitEvent
+{
+	/// Id of the first shape
+	b2ShapeId shapeIdA;
+
+	/// Id of the second shape
+	b2ShapeId shapeIdB;
+
+	/// Point where the shapes hit
+	b2Vec2 point;
+
+	/// Normal vector pointing from shape A to shape B
+	b2Vec2 normal;
+
+	/// The speed the shapes are approaching. Always positive. Typically in meters per second.
+	float approachSpeed;
+} b2ContactHitEvent;
+
+/// Contact events are buffered in the Box2D world and are available
+/// as event arrays after the time step is complete.
+/// Note: these may become invalid if bodies and/or shapes are destroyed
+typedef struct b2ContactEvents
+{
+	/// Array of begin touch events
+	b2ContactBeginTouchEvent* beginEvents;
+
+	/// Array of end touch events
+	b2ContactEndTouchEvent* endEvents;
+
+	/// Array of hit events
+	b2ContactHitEvent* hitEvents;
+
+	/// Number of begin touch events
+	int beginCount;
+
+	/// Number of end touch events
+	int endCount;
+
+	/// Number of hit events
+	int hitCount;
+} b2ContactEvents;
+
+/// Body move events triggered when a body moves.
+/// Triggered when a body moves due to simulation. Not reported for bodies moved by the user.
+/// This also has a flag to indicate that the body went to sleep so the application can also
+/// sleep that actor/entity/object associated with the body.
+/// On the other hand if the flag does not indicate the body went to sleep then the application
+/// can treat the actor/entity/object associated with the body as awake.
+/// This is an efficient way for an application to update game object transforms rather than
+/// calling functions such as b2Body_GetTransform() because this data is delivered as a contiguous array
+/// and it is only populated with bodies that have moved.
+/// @note If sleeping is disabled all dynamic and kinematic bodies will trigger move events.
+typedef struct b2BodyMoveEvent
+{
+	b2Transform transform;
+	b2BodyId bodyId;
+	void* userData;
+	bool fellAsleep;
+} b2BodyMoveEvent;
+
+/// Body events are buffered in the Box2D world and are available
+/// as event arrays after the time step is complete.
+/// Note: this data becomes invalid if bodies are destroyed
+typedef struct b2BodyEvents
+{
+	/// Array of move events
+	b2BodyMoveEvent* moveEvents;
+
+	/// Number of move events
+	int moveCount;
+} b2BodyEvents;
+
+/// The contact data for two shapes. By convention the manifold normal points
+/// from shape A to shape B.
+/// @see b2Shape_GetContactData() and b2Body_GetContactData()
+typedef struct b2ContactData
+{
+	b2ShapeId shapeIdA;
+	b2ShapeId shapeIdB;
+	b2Manifold manifold;
+} b2ContactData;
+
+/**@}*/
+
+/// Prototype for a contact filter callback.
+/// This is called when a contact pair is considered for collision. This allows you to
+/// perform custom logic to prevent collision between shapes. This is only called if
+/// one of the two shapes has custom filtering enabled.
+/// Notes:
+/// - this function must be thread-safe
+/// - this is only called if one of the two shapes has enabled custom filtering
+/// - this is called only for awake dynamic bodies
+/// Return false if you want to disable the collision
+/// @see b2ShapeDef
+/// @warning Do not attempt to modify the world inside this callback
+/// @ingroup world
+typedef bool b2CustomFilterFcn( b2ShapeId shapeIdA, b2ShapeId shapeIdB, void* context );
+
+/// Prototype for a pre-solve callback.
+/// This is called after a contact is updated. This allows you to inspect a
+/// contact before it goes to the solver. If you are careful, you can modify the
+/// contact manifold (e.g. modify the normal).
+/// Notes:
+/// - this function must be thread-safe
+/// - this is only called if the shape has enabled pre-solve events
+/// - this is called only for awake dynamic bodies
+/// - this is not called for sensors
+/// - the supplied manifold has impulse values from the previous step
+/// Return false if you want to disable the contact this step
+/// @warning Do not attempt to modify the world inside this callback
+/// @ingroup world
+typedef bool b2PreSolveFcn( b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, void* context );
+
+/// Prototype callback for overlap queries.
+/// Called for each shape found in the query.
+/// @see b2World_OverlapABB
+/// @return false to terminate the query.
+/// @ingroup world
+typedef bool b2OverlapResultFcn( b2ShapeId shapeId, void* context );
+
+/// Prototype callback for ray casts.
+/// Called for each shape found in the query. You control how the ray cast
+/// proceeds by returning a float:
+/// return -1: ignore this shape and continue
+/// return 0: terminate the ray cast
+/// return fraction: clip the ray to this point
+/// return 1: don't clip the ray and continue
+/// @param shapeId the shape hit by the ray
+/// @param point the point of initial intersection
+/// @param normal the normal vector at the point of intersection
+/// @param fraction the fraction along the ray at the point of intersection
+/// @param context the user context
+/// @return -1 to filter, 0 to terminate, fraction to clip the ray for closest hit, 1 to continue
+/// @see b2World_CastRay
+/// @ingroup world
+typedef float b2CastResultFcn( b2ShapeId shapeId, b2Vec2 point, b2Vec2 normal, float fraction, void* context );
+
+// Used to collect collision planes for character movers.
+// Return true to continue gathering planes.
+typedef bool b2PlaneResultFcn( b2ShapeId shapeId, const b2PlaneResult* plane, void* context );
+
+/// These colors are used for debug draw and mostly match the named SVG colors.
+/// See https://www.rapidtables.com/web/color/index.html
+/// https://johndecember.com/html/spec/colorsvg.html
+/// https://upload.wikimedia.org/wikipedia/commons/2/2b/SVG_Recognized_color_keyword_names.svg
+typedef enum b2HexColor
+{
+	b2_colorAliceBlue = 0xF0F8FF,
+	b2_colorAntiqueWhite = 0xFAEBD7,
+	b2_colorAqua = 0x00FFFF,
+	b2_colorAquamarine = 0x7FFFD4,
+	b2_colorAzure = 0xF0FFFF,
+	b2_colorBeige = 0xF5F5DC,
+	b2_colorBisque = 0xFFE4C4,
+	b2_colorBlack = 0x000000,
+	b2_colorBlanchedAlmond = 0xFFEBCD,
+	b2_colorBlue = 0x0000FF,
+	b2_colorBlueViolet = 0x8A2BE2,
+	b2_colorBrown = 0xA52A2A,
+	b2_colorBurlywood = 0xDEB887,
+	b2_colorCadetBlue = 0x5F9EA0,
+	b2_colorChartreuse = 0x7FFF00,
+	b2_colorChocolate = 0xD2691E,
+	b2_colorCoral = 0xFF7F50,
+	b2_colorCornflowerBlue = 0x6495ED,
+	b2_colorCornsilk = 0xFFF8DC,
+	b2_colorCrimson = 0xDC143C,
+	b2_colorCyan = 0x00FFFF,
+	b2_colorDarkBlue = 0x00008B,
+	b2_colorDarkCyan = 0x008B8B,
+	b2_colorDarkGoldenRod = 0xB8860B,
+	b2_colorDarkGray = 0xA9A9A9,
+	b2_colorDarkGreen = 0x006400,
+	b2_colorDarkKhaki = 0xBDB76B,
+	b2_colorDarkMagenta = 0x8B008B,
+	b2_colorDarkOliveGreen = 0x556B2F,
+	b2_colorDarkOrange = 0xFF8C00,
+	b2_colorDarkOrchid = 0x9932CC,
+	b2_colorDarkRed = 0x8B0000,
+	b2_colorDarkSalmon = 0xE9967A,
+	b2_colorDarkSeaGreen = 0x8FBC8F,
+	b2_colorDarkSlateBlue = 0x483D8B,
+	b2_colorDarkSlateGray = 0x2F4F4F,
+	b2_colorDarkTurquoise = 0x00CED1,
+	b2_colorDarkViolet = 0x9400D3,
+	b2_colorDeepPink = 0xFF1493,
+	b2_colorDeepSkyBlue = 0x00BFFF,
+	b2_colorDimGray = 0x696969,
+	b2_colorDodgerBlue = 0x1E90FF,
+	b2_colorFireBrick = 0xB22222,
+	b2_colorFloralWhite = 0xFFFAF0,
+	b2_colorForestGreen = 0x228B22,
+	b2_colorFuchsia = 0xFF00FF,
+	b2_colorGainsboro = 0xDCDCDC,
+	b2_colorGhostWhite = 0xF8F8FF,
+	b2_colorGold = 0xFFD700,
+	b2_colorGoldenRod = 0xDAA520,
+	b2_colorGray = 0x808080,
+	b2_colorGreen = 0x008000,
+	b2_colorGreenYellow = 0xADFF2F,
+	b2_colorHoneyDew = 0xF0FFF0,
+	b2_colorHotPink = 0xFF69B4,
+	b2_colorIndianRed = 0xCD5C5C,
+	b2_colorIndigo = 0x4B0082,
+	b2_colorIvory = 0xFFFFF0,
+	b2_colorKhaki = 0xF0E68C,
+	b2_colorLavender = 0xE6E6FA,
+	b2_colorLavenderBlush = 0xFFF0F5,
+	b2_colorLawnGreen = 0x7CFC00,
+	b2_colorLemonChiffon = 0xFFFACD,
+	b2_colorLightBlue = 0xADD8E6,
+	b2_colorLightCoral = 0xF08080,
+	b2_colorLightCyan = 0xE0FFFF,
+	b2_colorLightGoldenRodYellow = 0xFAFAD2,
+	b2_colorLightGray = 0xD3D3D3,
+	b2_colorLightGreen = 0x90EE90,
+	b2_colorLightPink = 0xFFB6C1,
+	b2_colorLightSalmon = 0xFFA07A,
+	b2_colorLightSeaGreen = 0x20B2AA,
+	b2_colorLightSkyBlue = 0x87CEFA,
+	b2_colorLightSlateGray = 0x778899,
+	b2_colorLightSteelBlue = 0xB0C4DE,
+	b2_colorLightYellow = 0xFFFFE0,
+	b2_colorLime = 0x00FF00,
+	b2_colorLimeGreen = 0x32CD32,
+	b2_colorLinen = 0xFAF0E6,
+	b2_colorMagenta = 0xFF00FF,
+	b2_colorMaroon = 0x800000,
+	b2_colorMediumAquaMarine = 0x66CDAA,
+	b2_colorMediumBlue = 0x0000CD,
+	b2_colorMediumOrchid = 0xBA55D3,
+	b2_colorMediumPurple = 0x9370DB,
+	b2_colorMediumSeaGreen = 0x3CB371,
+	b2_colorMediumSlateBlue = 0x7B68EE,
+	b2_colorMediumSpringGreen = 0x00FA9A,
+	b2_colorMediumTurquoise = 0x48D1CC,
+	b2_colorMediumVioletRed = 0xC71585,
+	b2_colorMidnightBlue = 0x191970,
+	b2_colorMintCream = 0xF5FFFA,
+	b2_colorMistyRose = 0xFFE4E1,
+	b2_colorMoccasin = 0xFFE4B5,
+	b2_colorNavajoWhite = 0xFFDEAD,
+	b2_colorNavy = 0x000080,
+	b2_colorOldLace = 0xFDF5E6,
+	b2_colorOlive = 0x808000,
+	b2_colorOliveDrab = 0x6B8E23,
+	b2_colorOrange = 0xFFA500,
+	b2_colorOrangeRed = 0xFF4500,
+	b2_colorOrchid = 0xDA70D6,
+	b2_colorPaleGoldenRod = 0xEEE8AA,
+	b2_colorPaleGreen = 0x98FB98,
+	b2_colorPaleTurquoise = 0xAFEEEE,
+	b2_colorPaleVioletRed = 0xDB7093,
+	b2_colorPapayaWhip = 0xFFEFD5,
+	b2_colorPeachPuff = 0xFFDAB9,
+	b2_colorPeru = 0xCD853F,
+	b2_colorPink = 0xFFC0CB,
+	b2_colorPlum = 0xDDA0DD,
+	b2_colorPowderBlue = 0xB0E0E6,
+	b2_colorPurple = 0x800080,
+	b2_colorRebeccaPurple = 0x663399,
+	b2_colorRed = 0xFF0000,
+	b2_colorRosyBrown = 0xBC8F8F,
+	b2_colorRoyalBlue = 0x4169E1,
+	b2_colorSaddleBrown = 0x8B4513,
+	b2_colorSalmon = 0xFA8072,
+	b2_colorSandyBrown = 0xF4A460,
+	b2_colorSeaGreen = 0x2E8B57,
+	b2_colorSeaShell = 0xFFF5EE,
+	b2_colorSienna = 0xA0522D,
+	b2_colorSilver = 0xC0C0C0,
+	b2_colorSkyBlue = 0x87CEEB,
+	b2_colorSlateBlue = 0x6A5ACD,
+	b2_colorSlateGray = 0x708090,
+	b2_colorSnow = 0xFFFAFA,
+	b2_colorSpringGreen = 0x00FF7F,
+	b2_colorSteelBlue = 0x4682B4,
+	b2_colorTan = 0xD2B48C,
+	b2_colorTeal = 0x008080,
+	b2_colorThistle = 0xD8BFD8,
+	b2_colorTomato = 0xFF6347,
+	b2_colorTurquoise = 0x40E0D0,
+	b2_colorViolet = 0xEE82EE,
+	b2_colorWheat = 0xF5DEB3,
+	b2_colorWhite = 0xFFFFFF,
+	b2_colorWhiteSmoke = 0xF5F5F5,
+	b2_colorYellow = 0xFFFF00,
+	b2_colorYellowGreen = 0x9ACD32,
+
+	b2_colorBox2DRed = 0xDC3132,
+	b2_colorBox2DBlue = 0x30AEBF,
+	b2_colorBox2DGreen = 0x8CC924,
+	b2_colorBox2DYellow = 0xFFEE8C
+} b2HexColor;
+
+/// This struct holds callbacks you can implement to draw a Box2D world.
+/// This structure should be zero initialized.
+/// @ingroup world
+typedef struct b2DebugDraw
+{
+	/// Draw a closed polygon provided in CCW order.
+	void ( *DrawPolygonFcn )( const b2Vec2* vertices, int vertexCount, b2HexColor color, void* context );
+
+	/// Draw a solid closed polygon provided in CCW order.
+	void ( *DrawSolidPolygonFcn )( b2Transform transform, const b2Vec2* vertices, int vertexCount, float radius, b2HexColor color,
+								void* context );
+
+	/// Draw a circle.
+	void ( *DrawCircleFcn )( b2Vec2 center, float radius, b2HexColor color, void* context );
+
+	/// Draw a solid circle.
+	void ( *DrawSolidCircleFcn )( b2Transform transform, float radius, b2HexColor color, void* context );
+
+	/// Draw a solid capsule.
+	void ( *DrawSolidCapsuleFcn )( b2Vec2 p1, b2Vec2 p2, float radius, b2HexColor color, void* context );
+
+	/// Draw a line segment.
+	void ( *DrawSegmentFcn )( b2Vec2 p1, b2Vec2 p2, b2HexColor color, void* context );
+
+	/// Draw a transform. Choose your own length scale.
+	void ( *DrawTransformFcn )( b2Transform transform, void* context );
+
+	/// Draw a point.
+	void ( *DrawPointFcn )( b2Vec2 p, float size, b2HexColor color, void* context );
+
+	/// Draw a string in world space
+	void ( *DrawStringFcn )( b2Vec2 p, const char* s, b2HexColor color, void* context );
+
+	/// Bounds to use if restricting drawing to a rectangular region
+	b2AABB drawingBounds;
+
+	/// Option to restrict drawing to a rectangular region. May suffer from unstable depth sorting.
+	bool useDrawingBounds;
+
+	/// Option to draw shapes
+	bool drawShapes;
+
+	/// Option to draw joints
+	bool drawJoints;
+
+	/// Option to draw additional information for joints
+	bool drawJointExtras;
+
+	/// Option to draw the bounding boxes for shapes
+	bool drawBounds;
+
+	/// Option to draw the mass and center of mass of dynamic bodies
+	bool drawMass;
+
+	/// Option to draw body names
+	bool drawBodyNames;
+
+	/// Option to draw contact points
+	bool drawContacts;
+
+	/// Option to visualize the graph coloring used for contacts and joints
+	bool drawGraphColors;
+
+	/// Option to draw contact normals
+	bool drawContactNormals;
+
+	/// Option to draw contact normal impulses
+	bool drawContactImpulses;
+
+	/// Option to draw contact feature ids
+	bool drawContactFeatures;
+
+	/// Option to draw contact friction impulses
+	bool drawFrictionImpulses;
+
+	/// Option to draw islands as bounding boxes
+	bool drawIslands;
+
+	/// User context that is passed as an argument to drawing callback functions
+	void* context;
+} b2DebugDraw;
+
+/// Use this to initialize your drawing interface. This allows you to implement a sub-set
+/// of the drawing functions.
+B2_API b2DebugDraw b2DefaultDebugDraw( void );
diff --git a/src/vendor/box2d/weld_joint.c b/src/vendor/box2d/weld_joint.c
new file mode 100644
index 0000000..a305d0a
--- /dev/null
+++ b/src/vendor/box2d/weld_joint.c
@@ -0,0 +1,310 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "body.h"
+#include "core.h"
+#include "joint.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+float b2WeldJoint_GetReferenceAngle( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	return joint->weldJoint.referenceAngle;
+}
+
+void b2WeldJoint_SetReferenceAngle( b2JointId jointId, float angleInRadians )
+{
+	B2_ASSERT( b2IsValidFloat( angleInRadians ) );
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	joint->weldJoint.referenceAngle = b2ClampFloat(angleInRadians, -B2_PI, B2_PI);
+}
+
+void b2WeldJoint_SetLinearHertz( b2JointId jointId, float hertz )
+{
+	B2_ASSERT( b2IsValidFloat( hertz ) && hertz >= 0.0f );
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	joint->weldJoint.linearHertz = hertz;
+}
+
+float b2WeldJoint_GetLinearHertz( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	return joint->weldJoint.linearHertz;
+}
+
+void b2WeldJoint_SetLinearDampingRatio( b2JointId jointId, float dampingRatio )
+{
+	B2_ASSERT( b2IsValidFloat( dampingRatio ) && dampingRatio >= 0.0f );
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	joint->weldJoint.linearDampingRatio = dampingRatio;
+}
+
+float b2WeldJoint_GetLinearDampingRatio( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	return joint->weldJoint.linearDampingRatio;
+}
+
+void b2WeldJoint_SetAngularHertz( b2JointId jointId, float hertz )
+{
+	B2_ASSERT( b2IsValidFloat( hertz ) && hertz >= 0.0f );
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	joint->weldJoint.angularHertz = hertz;
+}
+
+float b2WeldJoint_GetAngularHertz( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	return joint->weldJoint.angularHertz;
+}
+
+void b2WeldJoint_SetAngularDampingRatio( b2JointId jointId, float dampingRatio )
+{
+	B2_ASSERT( b2IsValidFloat( dampingRatio ) && dampingRatio >= 0.0f );
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	joint->weldJoint.angularDampingRatio = dampingRatio;
+}
+
+float b2WeldJoint_GetAngularDampingRatio( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_weldJoint );
+	return joint->weldJoint.angularDampingRatio;
+}
+
+b2Vec2 b2GetWeldJointForce( b2World* world, b2JointSim* base )
+{
+	b2Vec2 force = b2MulSV( world->inv_h, base->weldJoint.linearImpulse );
+	return force;
+}
+
+float b2GetWeldJointTorque( b2World* world, b2JointSim* base )
+{
+	return world->inv_h * base->weldJoint.angularImpulse;
+}
+
+// Point-to-point constraint
+// C = p2 - p1
+// Cdot = v2 - v1
+//      = v2 + cross(w2, r2) - v1 - cross(w1, r1)
+// J = [-I -r1_skew I r2_skew ]
+// Identity used:
+// w k % (rx i + ry j) = w * (-ry i + rx j)
+
+// Angle constraint
+// C = angle2 - angle1 - referenceAngle
+// Cdot = w2 - w1
+// J = [0 0 -1 0 0 1]
+// K = invI1 + invI2
+
+void b2PrepareWeldJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_weldJoint );
+
+	// chase body id to the solver set where the body lives
+	int idA = base->bodyIdA;
+	int idB = base->bodyIdB;
+
+	b2World* world = context->world;
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, idA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, idB );
+
+	B2_ASSERT( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet );
+	b2SolverSet* setA = b2SolverSetArray_Get( &world->solverSets, bodyA->setIndex );
+	b2SolverSet* setB = b2SolverSetArray_Get( &world->solverSets, bodyB->setIndex );
+
+	int localIndexA = bodyA->localIndex;
+	int localIndexB = bodyB->localIndex;
+
+	b2BodySim* bodySimA = b2BodySimArray_Get( &setA->bodySims, localIndexA );
+	b2BodySim* bodySimB = b2BodySimArray_Get( &setB->bodySims, localIndexB );
+
+	float mA = bodySimA->invMass;
+	float iA = bodySimA->invInertia;
+	float mB = bodySimB->invMass;
+	float iB = bodySimB->invInertia;
+
+	base->invMassA = mA;
+	base->invMassB = mB;
+	base->invIA = iA;
+	base->invIB = iB;
+
+	b2WeldJoint* joint = &base->weldJoint;
+	joint->indexA = bodyA->setIndex == b2_awakeSet ? localIndexA : B2_NULL_INDEX;
+	joint->indexB = bodyB->setIndex == b2_awakeSet ? localIndexB : B2_NULL_INDEX;
+
+	b2Rot qA = bodySimA->transform.q;
+	b2Rot qB = bodySimB->transform.q;
+
+	joint->anchorA = b2RotateVector( qA, b2Sub( base->localOriginAnchorA, bodySimA->localCenter ) );
+	joint->anchorB = b2RotateVector( qB, b2Sub( base->localOriginAnchorB, bodySimB->localCenter ) );
+	joint->deltaCenter = b2Sub( bodySimB->center, bodySimA->center );
+	joint->deltaAngle = b2RelativeAngle( qB, qA ) - joint->referenceAngle;
+	joint->deltaAngle = b2UnwindAngle( joint->deltaAngle );
+
+	float ka = iA + iB;
+	joint->axialMass = ka > 0.0f ? 1.0f / ka : 0.0f;
+
+	if ( joint->linearHertz == 0.0f )
+	{
+		joint->linearSoftness = context->jointSoftness;
+	}
+	else
+	{
+		joint->linearSoftness = b2MakeSoft( joint->linearHertz, joint->linearDampingRatio, context->h );
+	}
+
+	if ( joint->angularHertz == 0.0f )
+	{
+		joint->angularSoftness = context->jointSoftness;
+	}
+	else
+	{
+		joint->angularSoftness = b2MakeSoft( joint->angularHertz, joint->angularDampingRatio, context->h );
+	}
+
+	if ( context->enableWarmStarting == false )
+	{
+		joint->linearImpulse = b2Vec2_zero;
+		joint->angularImpulse = 0.0f;
+	}
+}
+
+void b2WarmStartWeldJoint( b2JointSim* base, b2StepContext* context )
+{
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2WeldJoint* joint = &base->weldJoint;
+
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	stateA->linearVelocity = b2MulSub( stateA->linearVelocity, mA, joint->linearImpulse );
+	stateA->angularVelocity -= iA * ( b2Cross( rA, joint->linearImpulse ) + joint->angularImpulse );
+
+	stateB->linearVelocity = b2MulAdd( stateB->linearVelocity, mB, joint->linearImpulse );
+	stateB->angularVelocity += iB * ( b2Cross( rB, joint->linearImpulse ) + joint->angularImpulse );
+}
+
+void b2SolveWeldJoint( b2JointSim* base, b2StepContext* context, bool useBias )
+{
+	B2_ASSERT( base->type == b2_weldJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2WeldJoint* joint = &base->weldJoint;
+
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 vA = stateA->linearVelocity;
+	float wA = stateA->angularVelocity;
+	b2Vec2 vB = stateB->linearVelocity;
+	float wB = stateB->angularVelocity;
+
+	// angular constraint
+	{
+		float bias = 0.0f;
+		float massScale = 1.0f;
+		float impulseScale = 0.0f;
+		if ( useBias || joint->angularHertz > 0.0f )
+		{
+			float C = b2RelativeAngle( stateB->deltaRotation, stateA->deltaRotation ) + joint->deltaAngle;
+			bias = joint->angularSoftness.biasRate * C;
+			massScale = joint->angularSoftness.massScale;
+			impulseScale = joint->angularSoftness.impulseScale;
+		}
+
+		float Cdot = wB - wA;
+		float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->angularImpulse;
+		joint->angularImpulse += impulse;
+
+		wA -= iA * impulse;
+		wB += iB * impulse;
+	}
+
+	// linear constraint
+	{
+		b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+		b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+		b2Vec2 bias = b2Vec2_zero;
+		float massScale = 1.0f;
+		float impulseScale = 0.0f;
+		if ( useBias || joint->linearHertz > 0.0f )
+		{
+			b2Vec2 dcA = stateA->deltaPosition;
+			b2Vec2 dcB = stateB->deltaPosition;
+			b2Vec2 C = b2Add( b2Add( b2Sub( dcB, dcA ), b2Sub( rB, rA ) ), joint->deltaCenter );
+
+			bias = b2MulSV( joint->linearSoftness.biasRate, C );
+			massScale = joint->linearSoftness.massScale;
+			impulseScale = joint->linearSoftness.impulseScale;
+		}
+
+		b2Vec2 Cdot = b2Sub( b2Add( vB, b2CrossSV( wB, rB ) ), b2Add( vA, b2CrossSV( wA, rA ) ) );
+
+		b2Mat22 K;
+		K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB;
+		K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB;
+		K.cx.y = K.cy.x;
+		K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB;
+		b2Vec2 b = b2Solve22( K, b2Add( Cdot, bias ) );
+
+		b2Vec2 impulse = {
+			-massScale * b.x - impulseScale * joint->linearImpulse.x,
+			-massScale * b.y - impulseScale * joint->linearImpulse.y,
+		};
+
+		joint->linearImpulse = b2Add( joint->linearImpulse, impulse );
+
+		vA = b2MulSub( vA, mA, impulse );
+		wA -= iA * b2Cross( rA, impulse );
+		vB = b2MulAdd( vB, mB, impulse );
+		wB += iB * b2Cross( rB, impulse );
+	}
+
+	stateA->linearVelocity = vA;
+	stateA->angularVelocity = wA;
+	stateB->linearVelocity = vB;
+	stateB->angularVelocity = wB;
+}
+
+#if 0
+void b2DumpWeldJoint()
+{
+	int32 indexA = m_bodyA->m_islandIndex;
+	int32 indexB = m_bodyB->m_islandIndex;
+
+	b2Dump("  b2WeldJointDef jd;\n");
+	b2Dump("  jd.bodyA = sims[%d];\n", indexA);
+	b2Dump("  jd.bodyB = sims[%d];\n", indexB);
+	b2Dump("  jd.collideConnected = bool(%d);\n", m_collideConnected);
+	b2Dump("  jd.localAnchorA.Set(%.9g, %.9g);\n", m_localAnchorA.x, m_localAnchorA.y);
+	b2Dump("  jd.localAnchorB.Set(%.9g, %.9g);\n", m_localAnchorB.x, m_localAnchorB.y);
+	b2Dump("  jd.referenceAngle = %.9g;\n", m_referenceAngle);
+	b2Dump("  jd.stiffness = %.9g;\n", m_stiffness);
+	b2Dump("  jd.damping = %.9g;\n", m_damping);
+	b2Dump("  joints[%d] = m_world->CreateJoint(&jd);\n", m_index);
+}
+#endif
diff --git a/src/vendor/box2d/wheel_joint.c b/src/vendor/box2d/wheel_joint.c
new file mode 100644
index 0000000..2201533
--- /dev/null
+++ b/src/vendor/box2d/wheel_joint.c
@@ -0,0 +1,549 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#include "body.h"
+#include "core.h"
+#include "joint.h"
+#include "solver.h"
+#include "solver_set.h"
+#include "world.h"
+
+// needed for dll export
+#include "box2d/box2d.h"
+
+#include <stdio.h>
+
+void b2WheelJoint_EnableSpring( b2JointId jointId, bool enableSpring )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+
+	if ( enableSpring != joint->wheelJoint.enableSpring )
+	{
+		joint->wheelJoint.enableSpring = enableSpring;
+		joint->wheelJoint.springImpulse = 0.0f;
+	}
+}
+
+bool b2WheelJoint_IsSpringEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.enableSpring;
+}
+
+void b2WheelJoint_SetSpringHertz( b2JointId jointId, float hertz )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	joint->wheelJoint.hertz = hertz;
+}
+
+float b2WheelJoint_GetSpringHertz( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.hertz;
+}
+
+void b2WheelJoint_SetSpringDampingRatio( b2JointId jointId, float dampingRatio )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	joint->wheelJoint.dampingRatio = dampingRatio;
+}
+
+float b2WheelJoint_GetSpringDampingRatio( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.dampingRatio;
+}
+
+void b2WheelJoint_EnableLimit( b2JointId jointId, bool enableLimit )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	if ( joint->wheelJoint.enableLimit != enableLimit )
+	{
+		joint->wheelJoint.lowerImpulse = 0.0f;
+		joint->wheelJoint.upperImpulse = 0.0f;
+		joint->wheelJoint.enableLimit = enableLimit;
+	}
+}
+
+bool b2WheelJoint_IsLimitEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.enableLimit;
+}
+
+float b2WheelJoint_GetLowerLimit( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.lowerTranslation;
+}
+
+float b2WheelJoint_GetUpperLimit( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.upperTranslation;
+}
+
+void b2WheelJoint_SetLimits( b2JointId jointId, float lower, float upper )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	if ( lower != joint->wheelJoint.lowerTranslation || upper != joint->wheelJoint.upperTranslation )
+	{
+		joint->wheelJoint.lowerTranslation = b2MinFloat( lower, upper );
+		joint->wheelJoint.upperTranslation = b2MaxFloat( lower, upper );
+		joint->wheelJoint.lowerImpulse = 0.0f;
+		joint->wheelJoint.upperImpulse = 0.0f;
+	}
+}
+
+void b2WheelJoint_EnableMotor( b2JointId jointId, bool enableMotor )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	if ( joint->wheelJoint.enableMotor != enableMotor )
+	{
+		joint->wheelJoint.motorImpulse = 0.0f;
+		joint->wheelJoint.enableMotor = enableMotor;
+	}
+}
+
+bool b2WheelJoint_IsMotorEnabled( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.enableMotor;
+}
+
+void b2WheelJoint_SetMotorSpeed( b2JointId jointId, float motorSpeed )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	joint->wheelJoint.motorSpeed = motorSpeed;
+}
+
+float b2WheelJoint_GetMotorSpeed( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.motorSpeed;
+}
+
+float b2WheelJoint_GetMotorTorque( b2JointId jointId )
+{
+	b2World* world = b2GetWorld( jointId.world0 );
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return world->inv_h * joint->wheelJoint.motorImpulse;
+}
+
+void b2WheelJoint_SetMaxMotorTorque( b2JointId jointId, float torque )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	joint->wheelJoint.maxMotorTorque = torque;
+}
+
+float b2WheelJoint_GetMaxMotorTorque( b2JointId jointId )
+{
+	b2JointSim* joint = b2GetJointSimCheckType( jointId, b2_wheelJoint );
+	return joint->wheelJoint.maxMotorTorque;
+}
+
+b2Vec2 b2GetWheelJointForce( b2World* world, b2JointSim* base )
+{
+	b2WheelJoint* joint = &base->wheelJoint;
+
+	// This is a frame behind
+	b2Vec2 axisA = joint->axisA;
+	b2Vec2 perpA = b2LeftPerp( axisA );
+
+	float perpForce = world->inv_h * joint->perpImpulse;
+	float axialForce = world->inv_h * ( joint->springImpulse + joint->lowerImpulse - joint->upperImpulse );
+
+	b2Vec2 force = b2Add( b2MulSV( perpForce, perpA ), b2MulSV( axialForce, axisA ) );
+	return force;
+}
+
+float b2GetWheelJointTorque( b2World* world, b2JointSim* base )
+{
+	return world->inv_h * base->wheelJoint.motorImpulse;
+}
+
+// Linear constraint (point-to-line)
+// d = pB - pA = xB + rB - xA - rA
+// C = dot(ay, d)
+// Cdot = dot(d, cross(wA, ay)) + dot(ay, vB + cross(wB, rB) - vA - cross(wA, rA))
+//      = -dot(ay, vA) - dot(cross(d + rA, ay), wA) + dot(ay, vB) + dot(cross(rB, ay), vB)
+// J = [-ay, -cross(d + rA, ay), ay, cross(rB, ay)]
+
+// Spring linear constraint
+// C = dot(ax, d)
+// Cdot = = -dot(ax, vA) - dot(cross(d + rA, ax), wA) + dot(ax, vB) + dot(cross(rB, ax), vB)
+// J = [-ax -cross(d+rA, ax) ax cross(rB, ax)]
+
+// Motor rotational constraint
+// Cdot = wB - wA
+// J = [0 0 -1 0 0 1]
+
+void b2PrepareWheelJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_wheelJoint );
+
+	// chase body id to the solver set where the body lives
+	int idA = base->bodyIdA;
+	int idB = base->bodyIdB;
+
+	b2World* world = context->world;
+
+	b2Body* bodyA = b2BodyArray_Get( &world->bodies, idA );
+	b2Body* bodyB = b2BodyArray_Get( &world->bodies, idB );
+
+	B2_ASSERT( bodyA->setIndex == b2_awakeSet || bodyB->setIndex == b2_awakeSet );
+	b2SolverSet* setA = b2SolverSetArray_Get( &world->solverSets, bodyA->setIndex );
+	b2SolverSet* setB = b2SolverSetArray_Get( &world->solverSets, bodyB->setIndex );
+
+	int localIndexA = bodyA->localIndex;
+	int localIndexB = bodyB->localIndex;
+
+	b2BodySim* bodySimA = b2BodySimArray_Get( &setA->bodySims, localIndexA );
+	b2BodySim* bodySimB = b2BodySimArray_Get( &setB->bodySims, localIndexB );
+
+	float mA = bodySimA->invMass;
+	float iA = bodySimA->invInertia;
+	float mB = bodySimB->invMass;
+	float iB = bodySimB->invInertia;
+
+	base->invMassA = mA;
+	base->invMassB = mB;
+	base->invIA = iA;
+	base->invIB = iB;
+
+	b2WheelJoint* joint = &base->wheelJoint;
+
+	joint->indexA = bodyA->setIndex == b2_awakeSet ? localIndexA : B2_NULL_INDEX;
+	joint->indexB = bodyB->setIndex == b2_awakeSet ? localIndexB : B2_NULL_INDEX;
+
+	b2Rot qA = bodySimA->transform.q;
+	b2Rot qB = bodySimB->transform.q;
+
+	joint->anchorA = b2RotateVector( qA, b2Sub( base->localOriginAnchorA, bodySimA->localCenter ) );
+	joint->anchorB = b2RotateVector( qB, b2Sub( base->localOriginAnchorB, bodySimB->localCenter ) );
+	joint->axisA = b2RotateVector( qA, joint->localAxisA );
+	joint->deltaCenter = b2Sub( bodySimB->center, bodySimA->center );
+
+	b2Vec2 rA = joint->anchorA;
+	b2Vec2 rB = joint->anchorB;
+
+	b2Vec2 d = b2Add( joint->deltaCenter, b2Sub( rB, rA ) );
+	b2Vec2 axisA = joint->axisA;
+	b2Vec2 perpA = b2LeftPerp( axisA );
+
+	// perpendicular constraint (keep wheel on line)
+	float s1 = b2Cross( b2Add( d, rA ), perpA );
+	float s2 = b2Cross( rB, perpA );
+
+	float kp = mA + mB + iA * s1 * s1 + iB * s2 * s2;
+	joint->perpMass = kp > 0.0f ? 1.0f / kp : 0.0f;
+
+	// spring constraint
+	float a1 = b2Cross( b2Add( d, rA ), axisA );
+	float a2 = b2Cross( rB, axisA );
+
+	float ka = mA + mB + iA * a1 * a1 + iB * a2 * a2;
+	joint->axialMass = ka > 0.0f ? 1.0f / ka : 0.0f;
+
+	joint->springSoftness = b2MakeSoft( joint->hertz, joint->dampingRatio, context->h );
+
+	float km = iA + iB;
+	joint->motorMass = km > 0.0f ? 1.0f / km : 0.0f;
+
+	if ( context->enableWarmStarting == false )
+	{
+		joint->perpImpulse = 0.0f;
+		joint->springImpulse = 0.0f;
+		joint->motorImpulse = 0.0f;
+		joint->lowerImpulse = 0.0f;
+		joint->upperImpulse = 0.0f;
+	}
+}
+
+void b2WarmStartWheelJoint( b2JointSim* base, b2StepContext* context )
+{
+	B2_ASSERT( base->type == b2_wheelJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2WheelJoint* joint = &base->wheelJoint;
+
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	b2Vec2 d = b2Add( b2Add( b2Sub( stateB->deltaPosition, stateA->deltaPosition ), joint->deltaCenter ), b2Sub( rB, rA ) );
+	b2Vec2 axisA = b2RotateVector( stateA->deltaRotation, joint->axisA );
+	b2Vec2 perpA = b2LeftPerp( axisA );
+
+	float a1 = b2Cross( b2Add( d, rA ), axisA );
+	float a2 = b2Cross( rB, axisA );
+	float s1 = b2Cross( b2Add( d, rA ), perpA );
+	float s2 = b2Cross( rB, perpA );
+
+	float axialImpulse = joint->springImpulse + joint->lowerImpulse - joint->upperImpulse;
+
+	b2Vec2 P = b2Add( b2MulSV( axialImpulse, axisA ), b2MulSV( joint->perpImpulse, perpA ) );
+	float LA = axialImpulse * a1 + joint->perpImpulse * s1 + joint->motorImpulse;
+	float LB = axialImpulse * a2 + joint->perpImpulse * s2 + joint->motorImpulse;
+
+	stateA->linearVelocity = b2MulSub( stateA->linearVelocity, mA, P );
+	stateA->angularVelocity -= iA * LA;
+	stateB->linearVelocity = b2MulAdd( stateB->linearVelocity, mB, P );
+	stateB->angularVelocity += iB * LB;
+}
+
+void b2SolveWheelJoint( b2JointSim* base, b2StepContext* context, bool useBias )
+{
+	B2_ASSERT( base->type == b2_wheelJoint );
+
+	float mA = base->invMassA;
+	float mB = base->invMassB;
+	float iA = base->invIA;
+	float iB = base->invIB;
+
+	// dummy state for static bodies
+	b2BodyState dummyState = b2_identityBodyState;
+
+	b2WheelJoint* joint = &base->wheelJoint;
+
+	b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA;
+	b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB;
+
+	b2Vec2 vA = stateA->linearVelocity;
+	float wA = stateA->angularVelocity;
+	b2Vec2 vB = stateB->linearVelocity;
+	float wB = stateB->angularVelocity;
+
+	bool fixedRotation = ( iA + iB == 0.0f );
+
+	// current anchors
+	b2Vec2 rA = b2RotateVector( stateA->deltaRotation, joint->anchorA );
+	b2Vec2 rB = b2RotateVector( stateB->deltaRotation, joint->anchorB );
+
+	b2Vec2 d = b2Add( b2Add( b2Sub( stateB->deltaPosition, stateA->deltaPosition ), joint->deltaCenter ), b2Sub( rB, rA ) );
+	b2Vec2 axisA = b2RotateVector( stateA->deltaRotation, joint->axisA );
+	float translation = b2Dot( axisA, d );
+
+	float a1 = b2Cross( b2Add( d, rA ), axisA );
+	float a2 = b2Cross( rB, axisA );
+
+	// motor constraint
+	if ( joint->enableMotor && fixedRotation == false )
+	{
+		float Cdot = wB - wA - joint->motorSpeed;
+		float impulse = -joint->motorMass * Cdot;
+		float oldImpulse = joint->motorImpulse;
+		float maxImpulse = context->h * joint->maxMotorTorque;
+		joint->motorImpulse = b2ClampFloat( joint->motorImpulse + impulse, -maxImpulse, maxImpulse );
+		impulse = joint->motorImpulse - oldImpulse;
+
+		wA -= iA * impulse;
+		wB += iB * impulse;
+	}
+
+	// spring constraint
+	if ( joint->enableSpring )
+	{
+		// This is a real spring and should be applied even during relax
+		float C = translation;
+		float bias = joint->springSoftness.biasRate * C;
+		float massScale = joint->springSoftness.massScale;
+		float impulseScale = joint->springSoftness.impulseScale;
+
+		float Cdot = b2Dot( axisA, b2Sub( vB, vA ) ) + a2 * wB - a1 * wA;
+		float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->springImpulse;
+		joint->springImpulse += impulse;
+
+		b2Vec2 P = b2MulSV( impulse, axisA );
+		float LA = impulse * a1;
+		float LB = impulse * a2;
+
+		vA = b2MulSub( vA, mA, P );
+		wA -= iA * LA;
+		vB = b2MulAdd( vB, mB, P );
+		wB += iB * LB;
+	}
+
+	if ( joint->enableLimit )
+	{
+		// Lower limit
+		{
+			float C = translation - joint->lowerTranslation;
+			float bias = 0.0f;
+			float massScale = 1.0f;
+			float impulseScale = 0.0f;
+
+			if ( C > 0.0f )
+			{
+				// speculation
+				bias = C * context->inv_h;
+			}
+			else if ( useBias )
+			{
+				bias = context->jointSoftness.biasRate * C;
+				massScale = context->jointSoftness.massScale;
+				impulseScale = context->jointSoftness.impulseScale;
+			}
+
+			float Cdot = b2Dot( axisA, b2Sub( vB, vA ) ) + a2 * wB - a1 * wA;
+			float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->lowerImpulse;
+			float oldImpulse = joint->lowerImpulse;
+			joint->lowerImpulse = b2MaxFloat( oldImpulse + impulse, 0.0f );
+			impulse = joint->lowerImpulse - oldImpulse;
+
+			b2Vec2 P = b2MulSV( impulse, axisA );
+			float LA = impulse * a1;
+			float LB = impulse * a2;
+
+			vA = b2MulSub( vA, mA, P );
+			wA -= iA * LA;
+			vB = b2MulAdd( vB, mB, P );
+			wB += iB * LB;
+		}
+
+		// Upper limit
+		// Note: signs are flipped to keep C positive when the constraint is satisfied.
+		// This also keeps the impulse positive when the limit is active.
+		{
+			// sign flipped
+			float C = joint->upperTranslation - translation;
+			float bias = 0.0f;
+			float massScale = 1.0f;
+			float impulseScale = 0.0f;
+
+			if ( C > 0.0f )
+			{
+				// speculation
+				bias = C * context->inv_h;
+			}
+			else if ( useBias )
+			{
+				bias = context->jointSoftness.biasRate * C;
+				massScale = context->jointSoftness.massScale;
+				impulseScale = context->jointSoftness.impulseScale;
+			}
+
+			// sign flipped on Cdot
+			float Cdot = b2Dot( axisA, b2Sub( vA, vB ) ) + a1 * wA - a2 * wB;
+			float impulse = -massScale * joint->axialMass * ( Cdot + bias ) - impulseScale * joint->upperImpulse;
+			float oldImpulse = joint->upperImpulse;
+			joint->upperImpulse = b2MaxFloat( oldImpulse + impulse, 0.0f );
+			impulse = joint->upperImpulse - oldImpulse;
+
+			b2Vec2 P = b2MulSV( impulse, axisA );
+			float LA = impulse * a1;
+			float LB = impulse * a2;
+
+			// sign flipped on applied impulse
+			vA = b2MulAdd( vA, mA, P );
+			wA += iA * LA;
+			vB = b2MulSub( vB, mB, P );
+			wB -= iB * LB;
+		}
+	}
+
+	// point to line constraint
+	{
+		b2Vec2 perpA = b2LeftPerp( axisA );
+
+		float bias = 0.0f;
+		float massScale = 1.0f;
+		float impulseScale = 0.0f;
+		if ( useBias )
+		{
+			float C = b2Dot( perpA, d );
+			bias = context->jointSoftness.biasRate * C;
+			massScale = context->jointSoftness.massScale;
+			impulseScale = context->jointSoftness.impulseScale;
+		}
+
+		float s1 = b2Cross( b2Add( d, rA ), perpA );
+		float s2 = b2Cross( rB, perpA );
+		float Cdot = b2Dot( perpA, b2Sub( vB, vA ) ) + s2 * wB - s1 * wA;
+
+		float impulse = -massScale * joint->perpMass * ( Cdot + bias ) - impulseScale * joint->perpImpulse;
+		joint->perpImpulse += impulse;
+
+		b2Vec2 P = b2MulSV( impulse, perpA );
+		float LA = impulse * s1;
+		float LB = impulse * s2;
+
+		vA = b2MulSub( vA, mA, P );
+		wA -= iA * LA;
+		vB = b2MulAdd( vB, mB, P );
+		wB += iB * LB;
+	}
+
+	stateA->linearVelocity = vA;
+	stateA->angularVelocity = wA;
+	stateB->linearVelocity = vB;
+	stateB->angularVelocity = wB;
+}
+
+#if 0
+void b2WheelJoint_Dump()
+{
+	int32 indexA = joint->bodyA->joint->islandIndex;
+	int32 indexB = joint->bodyB->joint->islandIndex;
+
+	b2Dump("  b2WheelJointDef jd;\n");
+	b2Dump("  jd.bodyA = sims[%d];\n", indexA);
+	b2Dump("  jd.bodyB = sims[%d];\n", indexB);
+	b2Dump("  jd.collideConnected = bool(%d);\n", joint->collideConnected);
+	b2Dump("  jd.localAnchorA.Set(%.9g, %.9g);\n", joint->localAnchorA.x, joint->localAnchorA.y);
+	b2Dump("  jd.localAnchorB.Set(%.9g, %.9g);\n", joint->localAnchorB.x, joint->localAnchorB.y);
+	b2Dump("  jd.referenceAngle = %.9g;\n", joint->referenceAngle);
+	b2Dump("  jd.enableLimit = bool(%d);\n", joint->enableLimit);
+	b2Dump("  jd.lowerAngle = %.9g;\n", joint->lowerAngle);
+	b2Dump("  jd.upperAngle = %.9g;\n", joint->upperAngle);
+	b2Dump("  jd.enableMotor = bool(%d);\n", joint->enableMotor);
+	b2Dump("  jd.motorSpeed = %.9g;\n", joint->motorSpeed);
+	b2Dump("  jd.maxMotorTorque = %.9g;\n", joint->maxMotorTorque);
+	b2Dump("  joints[%d] = joint->world->CreateJoint(&jd);\n", joint->index);
+}
+#endif
+
+void b2DrawWheelJoint( b2DebugDraw* draw, b2JointSim* base, b2Transform transformA, b2Transform transformB )
+{
+	B2_ASSERT( base->type == b2_wheelJoint );
+
+	b2WheelJoint* joint = &base->wheelJoint;
+
+	b2Vec2 pA = b2TransformPoint( transformA, base->localOriginAnchorA );
+	b2Vec2 pB = b2TransformPoint( transformB, base->localOriginAnchorB );
+	b2Vec2 axis = b2RotateVector( transformA.q, joint->localAxisA );
+
+	b2HexColor c1 = b2_colorGray;
+	b2HexColor c2 = b2_colorGreen;
+	b2HexColor c3 = b2_colorRed;
+	b2HexColor c4 = b2_colorDimGray;
+	b2HexColor c5 = b2_colorBlue;
+
+	draw->DrawSegmentFcn( pA, pB, c5, draw->context );
+
+	if ( joint->enableLimit )
+	{
+		b2Vec2 lower = b2MulAdd( pA, joint->lowerTranslation, axis );
+		b2Vec2 upper = b2MulAdd( pA, joint->upperTranslation, axis );
+		b2Vec2 perp = b2LeftPerp( axis );
+		draw->DrawSegmentFcn( lower, upper, c1, draw->context );
+		draw->DrawSegmentFcn( b2MulSub( lower, 0.1f, perp ), b2MulAdd( lower, 0.1f, perp ), c2, draw->context );
+		draw->DrawSegmentFcn( b2MulSub( upper, 0.1f, perp ), b2MulAdd( upper, 0.1f, perp ), c3, draw->context );
+	}
+	else
+	{
+		draw->DrawSegmentFcn( b2MulSub( pA, 1.0f, axis ), b2MulAdd( pA, 1.0f, axis ), c1, draw->context );
+	}
+
+	draw->DrawPointFcn( pA, 5.0f, c1, draw->context );
+	draw->DrawPointFcn( pB, 5.0f, c4, draw->context );
+}
diff --git a/src/vendor/box2d/world.c b/src/vendor/box2d/world.c
new file mode 100644
index 0000000..c1ab50b
--- /dev/null
+++ b/src/vendor/box2d/world.c
@@ -0,0 +1,3303 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#if defined( _MSC_VER ) && !defined( _CRT_SECURE_NO_WARNINGS )
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "world.h"
+
+#include "aabb.h"
+#include "arena_allocator.h"
+#include "array.h"
+#include "bitset.h"
+#include "body.h"
+#include "broad_phase.h"
+#include "constants.h"
+#include "constraint_graph.h"
+#include "contact.h"
+#include "core.h"
+#include "ctz.h"
+#include "island.h"
+#include "joint.h"
+#include "sensor.h"
+#include "shape.h"
+#include "solver.h"
+#include "solver_set.h"
+
+#include "box2d/box2d.h"
+
+#include <float.h>
+#include <stdio.h>
+#include <string.h>
+
+_Static_assert( B2_MAX_WORLDS > 0, "must be 1 or more" );
+_Static_assert( B2_MAX_WORLDS < UINT16_MAX, "B2_MAX_WORLDS limit exceeded" );
+b2World b2_worlds[B2_MAX_WORLDS];
+
+B2_ARRAY_SOURCE( b2BodyMoveEvent, b2BodyMoveEvent )
+B2_ARRAY_SOURCE( b2ContactBeginTouchEvent, b2ContactBeginTouchEvent )
+B2_ARRAY_SOURCE( b2ContactEndTouchEvent, b2ContactEndTouchEvent )
+B2_ARRAY_SOURCE( b2ContactHitEvent, b2ContactHitEvent )
+B2_ARRAY_SOURCE( b2SensorBeginTouchEvent, b2SensorBeginTouchEvent )
+B2_ARRAY_SOURCE( b2SensorEndTouchEvent, b2SensorEndTouchEvent )
+B2_ARRAY_SOURCE( b2TaskContext, b2TaskContext )
+
+b2World* b2GetWorldFromId( b2WorldId id )
+{
+	B2_ASSERT( 1 <= id.index1 && id.index1 <= B2_MAX_WORLDS );
+	b2World* world = b2_worlds + ( id.index1 - 1 );
+	B2_ASSERT( id.index1 == world->worldId + 1 );
+	B2_ASSERT( id.generation == world->generation );
+	return world;
+}
+
+b2World* b2GetWorld( int index )
+{
+	B2_ASSERT( 0 <= index && index < B2_MAX_WORLDS );
+	b2World* world = b2_worlds + index;
+	B2_ASSERT( world->worldId == index );
+	return world;
+}
+
+b2World* b2GetWorldLocked( int index )
+{
+	B2_ASSERT( 0 <= index && index < B2_MAX_WORLDS );
+	b2World* world = b2_worlds + index;
+	B2_ASSERT( world->worldId == index );
+	if ( world->locked )
+	{
+		B2_ASSERT( false );
+		return NULL;
+	}
+
+	return world;
+}
+
+static void* b2DefaultAddTaskFcn( b2TaskCallback* task, int count, int minRange, void* taskContext, void* userContext )
+{
+	B2_UNUSED( minRange, userContext );
+	task( 0, count, 0, taskContext );
+	return NULL;
+}
+
+static void b2DefaultFinishTaskFcn( void* userTask, void* userContext )
+{
+	B2_UNUSED( userTask, userContext );
+}
+
+static float b2DefaultFrictionCallback( float frictionA, int materialA, float frictionB, int materialB )
+{
+	B2_UNUSED( materialA, materialB );
+	return sqrtf( frictionA * frictionB );
+}
+
+static float b2DefaultRestitutionCallback( float restitutionA, int materialA, float restitutionB, int materialB )
+{
+	B2_UNUSED( materialA, materialB );
+	return b2MaxFloat( restitutionA, restitutionB );
+}
+
+b2WorldId b2CreateWorld( const b2WorldDef* def )
+{
+	_Static_assert( B2_MAX_WORLDS < UINT16_MAX, "B2_MAX_WORLDS limit exceeded" );
+	B2_CHECK_DEF( def );
+
+	int worldId = B2_NULL_INDEX;
+	for ( int i = 0; i < B2_MAX_WORLDS; ++i )
+	{
+		if ( b2_worlds[i].inUse == false )
+		{
+			worldId = i;
+			break;
+		}
+	}
+
+	if ( worldId == B2_NULL_INDEX )
+	{
+		return (b2WorldId){ 0 };
+	}
+
+	b2InitializeContactRegisters();
+
+	b2World* world = b2_worlds + worldId;
+	uint16_t generation = world->generation;
+
+	*world = (b2World){ 0 };
+
+	world->worldId = (uint16_t)worldId;
+	world->generation = generation;
+	world->inUse = true;
+
+	world->arena = b2CreateArenaAllocator( 2048 );
+	b2CreateBroadPhase( &world->broadPhase );
+	b2CreateGraph( &world->constraintGraph, 16 );
+
+	// pools
+	world->bodyIdPool = b2CreateIdPool();
+	world->bodies = b2BodyArray_Create( 16 );
+	world->solverSets = b2SolverSetArray_Create( 8 );
+
+	// add empty static, active, and disabled body sets
+	world->solverSetIdPool = b2CreateIdPool();
+	b2SolverSet set = { 0 };
+
+	// static set
+	set.setIndex = b2AllocId( &world->solverSetIdPool );
+	b2SolverSetArray_Push( &world->solverSets, set );
+	B2_ASSERT( world->solverSets.data[b2_staticSet].setIndex == b2_staticSet );
+
+	// disabled set
+	set.setIndex = b2AllocId( &world->solverSetIdPool );
+	b2SolverSetArray_Push( &world->solverSets, set );
+	B2_ASSERT( world->solverSets.data[b2_disabledSet].setIndex == b2_disabledSet );
+
+	// awake set
+	set.setIndex = b2AllocId( &world->solverSetIdPool );
+	b2SolverSetArray_Push( &world->solverSets, set );
+	B2_ASSERT( world->solverSets.data[b2_awakeSet].setIndex == b2_awakeSet );
+
+	world->shapeIdPool = b2CreateIdPool();
+	world->shapes = b2ShapeArray_Create( 16 );
+
+	world->chainIdPool = b2CreateIdPool();
+	world->chainShapes = b2ChainShapeArray_Create( 4 );
+
+	world->contactIdPool = b2CreateIdPool();
+	world->contacts = b2ContactArray_Create( 16 );
+
+	world->jointIdPool = b2CreateIdPool();
+	world->joints = b2JointArray_Create( 16 );
+
+	world->islandIdPool = b2CreateIdPool();
+	world->islands = b2IslandArray_Create( 8 );
+
+	world->sensors = b2SensorArray_Create( 4 );
+
+	world->bodyMoveEvents = b2BodyMoveEventArray_Create( 4 );
+	world->sensorBeginEvents = b2SensorBeginTouchEventArray_Create( 4 );
+	world->sensorEndEvents[0] = b2SensorEndTouchEventArray_Create( 4 );
+	world->sensorEndEvents[1] = b2SensorEndTouchEventArray_Create( 4 );
+	world->contactBeginEvents = b2ContactBeginTouchEventArray_Create( 4 );
+	world->contactEndEvents[0] = b2ContactEndTouchEventArray_Create( 4 );
+	world->contactEndEvents[1] = b2ContactEndTouchEventArray_Create( 4 );
+	world->contactHitEvents = b2ContactHitEventArray_Create( 4 );
+	world->endEventArrayIndex = 0;
+
+	world->stepIndex = 0;
+	world->splitIslandId = B2_NULL_INDEX;
+	world->activeTaskCount = 0;
+	world->taskCount = 0;
+	world->gravity = def->gravity;
+	world->hitEventThreshold = def->hitEventThreshold;
+	world->restitutionThreshold = def->restitutionThreshold;
+	world->maxLinearSpeed = def->maximumLinearSpeed;
+	world->maxContactPushSpeed = def->maxContactPushSpeed;
+	world->contactHertz = def->contactHertz;
+	world->contactDampingRatio = def->contactDampingRatio;
+	world->jointHertz = def->jointHertz;
+	world->jointDampingRatio = def->jointDampingRatio;
+
+	if ( def->frictionCallback == NULL )
+	{
+		world->frictionCallback = b2DefaultFrictionCallback;
+	}
+	else
+	{
+		world->frictionCallback = def->frictionCallback;
+	}
+
+	if ( def->restitutionCallback == NULL )
+	{
+		world->restitutionCallback = b2DefaultRestitutionCallback;
+	}
+	else
+	{
+		world->restitutionCallback = def->restitutionCallback;
+	}
+
+	world->enableSleep = def->enableSleep;
+	world->locked = false;
+	world->enableWarmStarting = true;
+	world->enableContinuous = def->enableContinuous;
+	world->enableSpeculative = true;
+	world->userTreeTask = NULL;
+	world->userData = def->userData;
+
+	if ( def->workerCount > 0 && def->enqueueTask != NULL && def->finishTask != NULL )
+	{
+		world->workerCount = b2MinInt( def->workerCount, B2_MAX_WORKERS );
+		world->enqueueTaskFcn = def->enqueueTask;
+		world->finishTaskFcn = def->finishTask;
+		world->userTaskContext = def->userTaskContext;
+	}
+	else
+	{
+		world->workerCount = 1;
+		world->enqueueTaskFcn = b2DefaultAddTaskFcn;
+		world->finishTaskFcn = b2DefaultFinishTaskFcn;
+		world->userTaskContext = NULL;
+	}
+
+	world->taskContexts = b2TaskContextArray_Create( world->workerCount );
+	b2TaskContextArray_Resize( &world->taskContexts, world->workerCount );
+
+	world->sensorTaskContexts = b2SensorTaskContextArray_Create( world->workerCount );
+	b2SensorTaskContextArray_Resize( &world->sensorTaskContexts, world->workerCount );
+
+	for ( int i = 0; i < world->workerCount; ++i )
+	{
+		world->taskContexts.data[i].contactStateBitSet = b2CreateBitSet( 1024 );
+		world->taskContexts.data[i].enlargedSimBitSet = b2CreateBitSet( 256 );
+		world->taskContexts.data[i].awakeIslandBitSet = b2CreateBitSet( 256 );
+
+		world->sensorTaskContexts.data[i].eventBits = b2CreateBitSet( 128 );
+	}
+
+	world->debugBodySet = b2CreateBitSet( 256 );
+	world->debugJointSet = b2CreateBitSet( 256 );
+	world->debugContactSet = b2CreateBitSet( 256 );
+	world->debugIslandSet = b2CreateBitSet( 256 );
+
+	// add one to worldId so that 0 represents a null b2WorldId
+	return (b2WorldId){ (uint16_t)( worldId + 1 ), world->generation };
+}
+
+void b2DestroyWorld( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+
+	b2DestroyBitSet( &world->debugBodySet );
+	b2DestroyBitSet( &world->debugJointSet );
+	b2DestroyBitSet( &world->debugContactSet );
+	b2DestroyBitSet( &world->debugIslandSet );
+
+	for ( int i = 0; i < world->workerCount; ++i )
+	{
+		b2DestroyBitSet( &world->taskContexts.data[i].contactStateBitSet );
+		b2DestroyBitSet( &world->taskContexts.data[i].enlargedSimBitSet );
+		b2DestroyBitSet( &world->taskContexts.data[i].awakeIslandBitSet );
+
+		b2DestroyBitSet( &world->sensorTaskContexts.data[i].eventBits );
+	}
+
+	b2TaskContextArray_Destroy( &world->taskContexts );
+	b2SensorTaskContextArray_Destroy( &world->sensorTaskContexts );
+
+	b2BodyMoveEventArray_Destroy( &world->bodyMoveEvents );
+	b2SensorBeginTouchEventArray_Destroy( &world->sensorBeginEvents );
+	b2SensorEndTouchEventArray_Destroy( world->sensorEndEvents + 0 );
+	b2SensorEndTouchEventArray_Destroy( world->sensorEndEvents + 1 );
+	b2ContactBeginTouchEventArray_Destroy( &world->contactBeginEvents );
+	b2ContactEndTouchEventArray_Destroy( world->contactEndEvents + 0 );
+	b2ContactEndTouchEventArray_Destroy( world->contactEndEvents + 1 );
+	b2ContactHitEventArray_Destroy( &world->contactHitEvents );
+
+	int chainCapacity = world->chainShapes.count;
+	for ( int i = 0; i < chainCapacity; ++i )
+	{
+		b2ChainShape* chain = world->chainShapes.data + i;
+		if ( chain->id != B2_NULL_INDEX )
+		{
+			b2FreeChainData( chain );
+		}
+		else
+		{
+			B2_ASSERT( chain->shapeIndices == NULL );
+			B2_ASSERT( chain->materials == NULL );
+		}
+	}
+
+	int sensorCount = world->sensors.count;
+	for ( int i = 0; i < sensorCount; ++i )
+	{
+		b2ShapeRefArray_Destroy( &world->sensors.data[i].overlaps1 );
+		b2ShapeRefArray_Destroy( &world->sensors.data[i].overlaps2 );
+	}
+
+	b2SensorArray_Destroy( &world->sensors );
+
+	b2BodyArray_Destroy( &world->bodies );
+	b2ShapeArray_Destroy( &world->shapes );
+	b2ChainShapeArray_Destroy( &world->chainShapes );
+	b2ContactArray_Destroy( &world->contacts );
+	b2JointArray_Destroy( &world->joints );
+	b2IslandArray_Destroy( &world->islands );
+
+	// Destroy solver sets
+	int setCapacity = world->solverSets.count;
+	for ( int i = 0; i < setCapacity; ++i )
+	{
+		b2SolverSet* set = world->solverSets.data + i;
+		if ( set->setIndex != B2_NULL_INDEX )
+		{
+			b2DestroySolverSet( world, i );
+		}
+	}
+
+	b2SolverSetArray_Destroy( &world->solverSets );
+
+	b2DestroyGraph( &world->constraintGraph );
+	b2DestroyBroadPhase( &world->broadPhase );
+
+	b2DestroyIdPool( &world->bodyIdPool );
+	b2DestroyIdPool( &world->shapeIdPool );
+	b2DestroyIdPool( &world->chainIdPool );
+	b2DestroyIdPool( &world->contactIdPool );
+	b2DestroyIdPool( &world->jointIdPool );
+	b2DestroyIdPool( &world->islandIdPool );
+	b2DestroyIdPool( &world->solverSetIdPool );
+
+	b2DestroyArenaAllocator( &world->arena );
+
+	// Wipe world but preserve generation
+	uint16_t generation = world->generation;
+	*world = (b2World){ 0 };
+	world->worldId = 0;
+	world->generation = generation + 1;
+}
+
+static void b2CollideTask( int startIndex, int endIndex, uint32_t threadIndex, void* context )
+{
+	b2TracyCZoneNC( collide_task, "Collide", b2_colorDodgerBlue, true );
+
+	b2StepContext* stepContext = context;
+	b2World* world = stepContext->world;
+	B2_ASSERT( (int)threadIndex < world->workerCount );
+	b2TaskContext* taskContext = world->taskContexts.data + threadIndex;
+	b2ContactSim** contactSims = stepContext->contacts;
+	b2Shape* shapes = world->shapes.data;
+	b2Body* bodies = world->bodies.data;
+
+	B2_ASSERT( startIndex < endIndex );
+
+	for ( int contactIndex = startIndex; contactIndex < endIndex; ++contactIndex )
+	{
+		b2ContactSim* contactSim = contactSims[contactIndex];
+
+		int contactId = contactSim->contactId;
+
+		b2Shape* shapeA = shapes + contactSim->shapeIdA;
+		b2Shape* shapeB = shapes + contactSim->shapeIdB;
+
+		// Do proxies still overlap?
+		bool overlap = b2AABB_Overlaps( shapeA->fatAABB, shapeB->fatAABB );
+		if ( overlap == false )
+		{
+			contactSim->simFlags |= b2_simDisjoint;
+			contactSim->simFlags &= ~b2_simTouchingFlag;
+			b2SetBit( &taskContext->contactStateBitSet, contactId );
+		}
+		else
+		{
+			bool wasTouching = ( contactSim->simFlags & b2_simTouchingFlag );
+
+			// Update contact respecting shape/body order (A,B)
+			b2Body* bodyA = bodies + shapeA->bodyId;
+			b2Body* bodyB = bodies + shapeB->bodyId;
+			b2BodySim* bodySimA = b2GetBodySim( world, bodyA );
+			b2BodySim* bodySimB = b2GetBodySim( world, bodyB );
+
+			// avoid cache misses in b2PrepareContactsTask
+			contactSim->bodySimIndexA = bodyA->setIndex == b2_awakeSet ? bodyA->localIndex : B2_NULL_INDEX;
+			contactSim->invMassA = bodySimA->invMass;
+			contactSim->invIA = bodySimA->invInertia;
+
+			contactSim->bodySimIndexB = bodyB->setIndex == b2_awakeSet ? bodyB->localIndex : B2_NULL_INDEX;
+			contactSim->invMassB = bodySimB->invMass;
+			contactSim->invIB = bodySimB->invInertia;
+
+			b2Transform transformA = bodySimA->transform;
+			b2Transform transformB = bodySimB->transform;
+
+			b2Vec2 centerOffsetA = b2RotateVector( transformA.q, bodySimA->localCenter );
+			b2Vec2 centerOffsetB = b2RotateVector( transformB.q, bodySimB->localCenter );
+
+			// This updates solid contacts and sensors
+			bool touching =
+				b2UpdateContact( world, contactSim, shapeA, transformA, centerOffsetA, shapeB, transformB, centerOffsetB );
+
+			// State changes that affect island connectivity. Also affects contact and sensor events.
+			if ( touching == true && wasTouching == false )
+			{
+				contactSim->simFlags |= b2_simStartedTouching;
+				b2SetBit( &taskContext->contactStateBitSet, contactId );
+			}
+			else if ( touching == false && wasTouching == true )
+			{
+				contactSim->simFlags |= b2_simStoppedTouching;
+				b2SetBit( &taskContext->contactStateBitSet, contactId );
+			}
+
+			// To make this work, the time of impact code needs to adjust the target
+			// distance based on the number of TOI events for a body.
+			// if (touching && bodySimB->isFast)
+			//{
+			//	b2Manifold* manifold = &contactSim->manifold;
+			//	int pointCount = manifold->pointCount;
+			//	for (int i = 0; i < pointCount; ++i)
+			//	{
+			//		// trick the solver into pushing the fast shapes apart
+			//		manifold->points[i].separation -= 0.25f * B2_SPECULATIVE_DISTANCE;
+			//	}
+			//}
+		}
+	}
+
+	b2TracyCZoneEnd( collide_task );
+}
+
+static void b2UpdateTreesTask( int startIndex, int endIndex, uint32_t threadIndex, void* context )
+{
+	B2_UNUSED( startIndex );
+	B2_UNUSED( endIndex );
+	B2_UNUSED( threadIndex );
+
+	b2TracyCZoneNC( tree_task, "Rebuild BVH", b2_colorFireBrick, true );
+
+	b2World* world = context;
+	b2BroadPhase_RebuildTrees( &world->broadPhase );
+
+	b2TracyCZoneEnd( tree_task );
+}
+
+static void b2AddNonTouchingContact( b2World* world, b2Contact* contact, b2ContactSim* contactSim )
+{
+	B2_ASSERT( contact->setIndex == b2_awakeSet );
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	contact->colorIndex = B2_NULL_INDEX;
+	contact->localIndex = set->contactSims.count;
+
+	b2ContactSim* newContactSim = b2ContactSimArray_Add( &set->contactSims );
+	memcpy( newContactSim, contactSim, sizeof( b2ContactSim ) );
+}
+
+static void b2RemoveNonTouchingContact( b2World* world, int setIndex, int localIndex )
+{
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+	int movedIndex = b2ContactSimArray_RemoveSwap( &set->contactSims, localIndex );
+	if ( movedIndex != B2_NULL_INDEX )
+	{
+		b2ContactSim* movedContactSim = set->contactSims.data + localIndex;
+		b2Contact* movedContact = b2ContactArray_Get( &world->contacts, movedContactSim->contactId );
+		B2_ASSERT( movedContact->setIndex == setIndex );
+		B2_ASSERT( movedContact->localIndex == movedIndex );
+		B2_ASSERT( movedContact->colorIndex == B2_NULL_INDEX );
+		movedContact->localIndex = localIndex;
+	}
+}
+
+// Narrow-phase collision
+static void b2Collide( b2StepContext* context )
+{
+	b2World* world = context->world;
+
+	B2_ASSERT( world->workerCount > 0 );
+
+	b2TracyCZoneNC( collide, "Narrow Phase", b2_colorDodgerBlue, true );
+
+	// Task that can be done in parallel with the narrow-phase
+	// - rebuild the collision tree for dynamic and kinematic bodies to keep their query performance good
+	// todo_erin move this to start when contacts are being created
+	world->userTreeTask = world->enqueueTaskFcn( &b2UpdateTreesTask, 1, 1, world, world->userTaskContext );
+	world->taskCount += 1;
+	world->activeTaskCount += world->userTreeTask == NULL ? 0 : 1;
+
+	// gather contacts into a single array for easier parallel-for
+	int contactCount = 0;
+	b2GraphColor* graphColors = world->constraintGraph.colors;
+	for ( int i = 0; i < B2_GRAPH_COLOR_COUNT; ++i )
+	{
+		contactCount += graphColors[i].contactSims.count;
+	}
+
+	int nonTouchingCount = world->solverSets.data[b2_awakeSet].contactSims.count;
+	contactCount += nonTouchingCount;
+
+	if ( contactCount == 0 )
+	{
+		b2TracyCZoneEnd( collide );
+		return;
+	}
+
+	b2ContactSim** contactSims = b2AllocateArenaItem( &world->arena, contactCount * sizeof( b2ContactSim* ), "contacts" );
+
+	int contactIndex = 0;
+	for ( int i = 0; i < B2_GRAPH_COLOR_COUNT; ++i )
+	{
+		b2GraphColor* color = graphColors + i;
+		int count = color->contactSims.count;
+		b2ContactSim* base = color->contactSims.data;
+		for ( int j = 0; j < count; ++j )
+		{
+			contactSims[contactIndex] = base + j;
+			contactIndex += 1;
+		}
+	}
+
+	{
+		b2ContactSim* base = world->solverSets.data[b2_awakeSet].contactSims.data;
+		for ( int i = 0; i < nonTouchingCount; ++i )
+		{
+			contactSims[contactIndex] = base + i;
+			contactIndex += 1;
+		}
+	}
+
+	B2_ASSERT( contactIndex == contactCount );
+
+	context->contacts = contactSims;
+
+	// Contact bit set on ids because contact pointers are unstable as they move between touching and not touching.
+	int contactIdCapacity = b2GetIdCapacity( &world->contactIdPool );
+	for ( int i = 0; i < world->workerCount; ++i )
+	{
+		b2SetBitCountAndClear( &world->taskContexts.data[i].contactStateBitSet, contactIdCapacity );
+	}
+
+	// Task should take at least 40us on a 4GHz CPU (10K cycles)
+	int minRange = 64;
+	void* userCollideTask = world->enqueueTaskFcn( &b2CollideTask, contactCount, minRange, context, world->userTaskContext );
+	world->taskCount += 1;
+	if ( userCollideTask != NULL )
+	{
+		world->finishTaskFcn( userCollideTask, world->userTaskContext );
+	}
+
+	b2FreeArenaItem( &world->arena, contactSims );
+	context->contacts = NULL;
+	contactSims = NULL;
+
+	// Serially update contact state
+	// todo_erin bring this zone together with island merge
+	b2TracyCZoneNC( contact_state, "Contact State", b2_colorLightSlateGray, true );
+
+	// Bitwise OR all contact bits
+	b2BitSet* bitSet = &world->taskContexts.data[0].contactStateBitSet;
+	for ( int i = 1; i < world->workerCount; ++i )
+	{
+		b2InPlaceUnion( bitSet, &world->taskContexts.data[i].contactStateBitSet );
+	}
+
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+
+	int endEventArrayIndex = world->endEventArrayIndex;
+
+	const b2Shape* shapes = world->shapes.data;
+	uint16_t worldId = world->worldId;
+
+	// Process contact state changes. Iterate over set bits
+	for ( uint32_t k = 0; k < bitSet->blockCount; ++k )
+	{
+		uint64_t bits = bitSet->bits[k];
+		while ( bits != 0 )
+		{
+			uint32_t ctz = b2CTZ64( bits );
+			int contactId = (int)( 64 * k + ctz );
+
+			b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+			B2_ASSERT( contact->setIndex == b2_awakeSet );
+
+			int colorIndex = contact->colorIndex;
+			int localIndex = contact->localIndex;
+
+			b2ContactSim* contactSim = NULL;
+			if ( colorIndex != B2_NULL_INDEX )
+			{
+				// contact lives in constraint graph
+				B2_ASSERT( 0 <= colorIndex && colorIndex < B2_GRAPH_COLOR_COUNT );
+				b2GraphColor* color = graphColors + colorIndex;
+				contactSim = b2ContactSimArray_Get( &color->contactSims, localIndex );
+			}
+			else
+			{
+				contactSim = b2ContactSimArray_Get( &awakeSet->contactSims, localIndex );
+			}
+
+			const b2Shape* shapeA = shapes + contact->shapeIdA;
+			const b2Shape* shapeB = shapes + contact->shapeIdB;
+			b2ShapeId shapeIdA = { shapeA->id + 1, worldId, shapeA->generation };
+			b2ShapeId shapeIdB = { shapeB->id + 1, worldId, shapeB->generation };
+			uint32_t flags = contact->flags;
+			uint32_t simFlags = contactSim->simFlags;
+
+			if ( simFlags & b2_simDisjoint )
+			{
+				// Bounding boxes no longer overlap
+				b2DestroyContact( world, contact, false );
+				contact = NULL;
+				contactSim = NULL;
+			}
+			else if ( simFlags & b2_simStartedTouching )
+			{
+				B2_ASSERT( contact->islandId == B2_NULL_INDEX );
+				// Contact is solid
+				if ( flags & b2_contactEnableContactEvents )
+				{
+					b2ContactBeginTouchEvent event = { shapeIdA, shapeIdB, contactSim->manifold };
+					b2ContactBeginTouchEventArray_Push( &world->contactBeginEvents, event );
+				}
+
+				B2_ASSERT( contactSim->manifold.pointCount > 0 );
+				B2_ASSERT( contact->setIndex == b2_awakeSet );
+
+				// Link first because this wakes colliding bodies and ensures the body sims
+				// are in the correct place.
+				contact->flags |= b2_contactTouchingFlag;
+				b2LinkContact( world, contact );
+
+				// Make sure these didn't change
+				B2_ASSERT( contact->colorIndex == B2_NULL_INDEX );
+				B2_ASSERT( contact->localIndex == localIndex );
+
+				// Contact sim pointer may have become orphaned due to awake set growth,
+				// so I just need to refresh it.
+				contactSim = b2ContactSimArray_Get( &awakeSet->contactSims, localIndex );
+
+				contactSim->simFlags &= ~b2_simStartedTouching;
+
+				b2AddContactToGraph( world, contactSim, contact );
+				b2RemoveNonTouchingContact( world, b2_awakeSet, localIndex );
+				contactSim = NULL;
+			}
+			else if ( simFlags & b2_simStoppedTouching )
+			{
+				contactSim->simFlags &= ~b2_simStoppedTouching;
+
+				// Contact is solid
+				contact->flags &= ~b2_contactTouchingFlag;
+
+				if ( contact->flags & b2_contactEnableContactEvents )
+				{
+					b2ContactEndTouchEvent event = { shapeIdA, shapeIdB };
+					b2ContactEndTouchEventArray_Push( world->contactEndEvents + endEventArrayIndex, event );
+				}
+
+				B2_ASSERT( contactSim->manifold.pointCount == 0 );
+
+				b2UnlinkContact( world, contact );
+				int bodyIdA = contact->edges[0].bodyId;
+				int bodyIdB = contact->edges[1].bodyId;
+
+				b2AddNonTouchingContact( world, contact, contactSim );
+				b2RemoveContactFromGraph( world, bodyIdA, bodyIdB, colorIndex, localIndex );
+				contact = NULL;
+				contactSim = NULL;
+			}
+
+			// Clear the smallest set bit
+			bits = bits & ( bits - 1 );
+		}
+	}
+
+	b2ValidateSolverSets( world );
+	b2ValidateContacts( world );
+
+	b2TracyCZoneEnd( contact_state );
+	b2TracyCZoneEnd( collide );
+}
+
+void b2World_Step( b2WorldId worldId, float timeStep, int subStepCount )
+{
+	B2_ASSERT( b2IsValidFloat( timeStep ) );
+	B2_ASSERT( 0 < subStepCount );
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	// Prepare to capture events
+	// Ensure user does not access stale data if there is an early return
+	b2BodyMoveEventArray_Clear( &world->bodyMoveEvents );
+	b2SensorBeginTouchEventArray_Clear( &world->sensorBeginEvents );
+	b2ContactBeginTouchEventArray_Clear( &world->contactBeginEvents );
+	b2ContactHitEventArray_Clear( &world->contactHitEvents );
+
+	world->profile = (b2Profile){ 0 };
+
+	if ( timeStep == 0.0f )
+	{
+		// Swap end event array buffers
+		world->endEventArrayIndex = 1 - world->endEventArrayIndex;
+		b2SensorEndTouchEventArray_Clear( world->sensorEndEvents + world->endEventArrayIndex );
+		b2ContactEndTouchEventArray_Clear( world->contactEndEvents + world->endEventArrayIndex );
+
+		// todo_erin would be useful to still process collision while paused
+		return;
+	}
+
+	b2TracyCZoneNC( world_step, "Step", b2_colorBox2DGreen, true );
+
+	world->locked = true;
+	world->activeTaskCount = 0;
+	world->taskCount = 0;
+
+	uint64_t stepTicks = b2GetTicks();
+
+	// Update collision pairs and create contacts
+	{
+		uint64_t pairTicks = b2GetTicks();
+		b2UpdateBroadPhasePairs( world );
+		world->profile.pairs = b2GetMilliseconds( pairTicks );
+	}
+
+	b2StepContext context = { 0 };
+	context.world = world;
+	context.dt = timeStep;
+	context.subStepCount = b2MaxInt( 1, subStepCount );
+
+	if ( timeStep > 0.0f )
+	{
+		context.inv_dt = 1.0f / timeStep;
+		context.h = timeStep / context.subStepCount;
+		context.inv_h = context.subStepCount * context.inv_dt;
+	}
+	else
+	{
+		context.inv_dt = 0.0f;
+		context.h = 0.0f;
+		context.inv_h = 0.0f;
+	}
+
+	world->inv_h = context.inv_h;
+
+	// Hertz values get reduced for large time steps
+	float contactHertz = b2MinFloat( world->contactHertz, 0.25f * context.inv_h );
+	float jointHertz = b2MinFloat( world->jointHertz, 0.125f * context.inv_h );
+
+	context.contactSoftness = b2MakeSoft( contactHertz, world->contactDampingRatio, context.h );
+	context.staticSoftness = b2MakeSoft( 2.0f * contactHertz, world->contactDampingRatio, context.h );
+	context.jointSoftness = b2MakeSoft( jointHertz, world->jointDampingRatio, context.h );
+
+	context.restitutionThreshold = world->restitutionThreshold;
+	context.maxLinearVelocity = world->maxLinearSpeed;
+	context.enableWarmStarting = world->enableWarmStarting;
+
+	// Update contacts
+	{
+		uint64_t collideTicks = b2GetTicks();
+		b2Collide( &context );
+		world->profile.collide = b2GetMilliseconds( collideTicks );
+	}
+
+	// Integrate velocities, solve velocity constraints, and integrate positions.
+	if ( context.dt > 0.0f )
+	{
+		uint64_t solveTicks = b2GetTicks();
+		b2Solve( world, &context );
+		world->profile.solve = b2GetMilliseconds( solveTicks );
+	}
+
+	// Update sensors
+	{
+		uint64_t sensorTicks = b2GetTicks();
+		b2OverlapSensors( world );
+		world->profile.sensors = b2GetMilliseconds( sensorTicks );
+	}
+
+	world->profile.step = b2GetMilliseconds( stepTicks );
+
+	B2_ASSERT( b2GetArenaAllocation( &world->arena ) == 0 );
+
+	// Ensure stack is large enough
+	b2GrowArena( &world->arena );
+
+	// Make sure all tasks that were started were also finished
+	B2_ASSERT( world->activeTaskCount == 0 );
+
+	b2TracyCZoneEnd( world_step );
+
+	// Swap end event array buffers
+	world->endEventArrayIndex = 1 - world->endEventArrayIndex;
+	b2SensorEndTouchEventArray_Clear( world->sensorEndEvents + world->endEventArrayIndex );
+	b2ContactEndTouchEventArray_Clear( world->contactEndEvents + world->endEventArrayIndex );
+	world->locked = false;
+}
+
+static void b2DrawShape( b2DebugDraw* draw, b2Shape* shape, b2Transform xf, b2HexColor color )
+{
+	switch ( shape->type )
+	{
+		case b2_capsuleShape:
+		{
+			b2Capsule* capsule = &shape->capsule;
+			b2Vec2 p1 = b2TransformPoint( xf, capsule->center1 );
+			b2Vec2 p2 = b2TransformPoint( xf, capsule->center2 );
+			draw->DrawSolidCapsuleFcn( p1, p2, capsule->radius, color, draw->context );
+		}
+		break;
+
+		case b2_circleShape:
+		{
+			b2Circle* circle = &shape->circle;
+			xf.p = b2TransformPoint( xf, circle->center );
+			draw->DrawSolidCircleFcn( xf, circle->radius, color, draw->context );
+		}
+		break;
+
+		case b2_polygonShape:
+		{
+			b2Polygon* poly = &shape->polygon;
+			draw->DrawSolidPolygonFcn( xf, poly->vertices, poly->count, poly->radius, color, draw->context );
+		}
+		break;
+
+		case b2_segmentShape:
+		{
+			b2Segment* segment = &shape->segment;
+			b2Vec2 p1 = b2TransformPoint( xf, segment->point1 );
+			b2Vec2 p2 = b2TransformPoint( xf, segment->point2 );
+			draw->DrawSegmentFcn( p1, p2, color, draw->context );
+		}
+		break;
+
+		case b2_chainSegmentShape:
+		{
+			b2Segment* segment = &shape->chainSegment.segment;
+			b2Vec2 p1 = b2TransformPoint( xf, segment->point1 );
+			b2Vec2 p2 = b2TransformPoint( xf, segment->point2 );
+			draw->DrawSegmentFcn( p1, p2, color, draw->context );
+			draw->DrawPointFcn( p2, 4.0f, color, draw->context );
+			draw->DrawSegmentFcn( p1, b2Lerp( p1, p2, 0.1f ), b2_colorPaleGreen, draw->context );
+		}
+		break;
+
+		default:
+			break;
+	}
+}
+
+struct DrawContext
+{
+	b2World* world;
+	b2DebugDraw* draw;
+};
+
+static bool DrawQueryCallback( int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	struct DrawContext* drawContext = context;
+	b2World* world = drawContext->world;
+	b2DebugDraw* draw = drawContext->draw;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+	B2_ASSERT( shape->id == shapeId );
+
+	b2SetBit( &world->debugBodySet, shape->bodyId );
+
+	if ( draw->drawShapes )
+	{
+		b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+		b2BodySim* bodySim = b2GetBodySim( world, body );
+
+		b2HexColor color;
+
+		if ( shape->customColor != 0 )
+		{
+			color = shape->customColor;
+		}
+		else if ( body->type == b2_dynamicBody && body->mass == 0.0f )
+		{
+			// Bad body
+			color = b2_colorRed;
+		}
+		else if ( body->setIndex == b2_disabledSet )
+		{
+			color = b2_colorSlateGray;
+		}
+		else if ( shape->sensorIndex != B2_NULL_INDEX )
+		{
+			color = b2_colorWheat;
+		}
+		else if ( bodySim->isBullet && body->setIndex == b2_awakeSet )
+		{
+			color = b2_colorTurquoise;
+		}
+		else if ( body->isSpeedCapped )
+		{
+			color = b2_colorYellow;
+		}
+		else if ( bodySim->isFast )
+		{
+			color = b2_colorSalmon;
+		}
+		else if ( body->type == b2_staticBody )
+		{
+			color = b2_colorPaleGreen;
+		}
+		else if ( body->type == b2_kinematicBody )
+		{
+			color = b2_colorRoyalBlue;
+		}
+		else if ( body->setIndex == b2_awakeSet )
+		{
+			color = b2_colorPink;
+		}
+		else
+		{
+			color = b2_colorGray;
+		}
+
+		b2DrawShape( draw, shape, bodySim->transform, color );
+	}
+
+	if ( draw->drawBounds )
+	{
+		b2AABB aabb = shape->fatAABB;
+
+		b2Vec2 vs[4] = { { aabb.lowerBound.x, aabb.lowerBound.y },
+						 { aabb.upperBound.x, aabb.lowerBound.y },
+						 { aabb.upperBound.x, aabb.upperBound.y },
+						 { aabb.lowerBound.x, aabb.upperBound.y } };
+
+		draw->DrawPolygonFcn( vs, 4, b2_colorGold, draw->context );
+	}
+
+	return true;
+}
+
+// todo this has varying order for moving shapes, causing flicker when overlapping shapes are moving
+// solution: display order by shape id modulus 3, keep 3 buckets in GLSolid* and flush in 3 passes.
+static void b2DrawWithBounds( b2World* world, b2DebugDraw* draw )
+{
+	B2_ASSERT( b2IsValidAABB( draw->drawingBounds ) );
+
+	const float k_impulseScale = 1.0f;
+	const float k_axisScale = 0.3f;
+	b2HexColor speculativeColor = b2_colorGainsboro;
+	b2HexColor addColor = b2_colorGreen;
+	b2HexColor persistColor = b2_colorBlue;
+	b2HexColor normalColor = b2_colorDimGray;
+	b2HexColor impulseColor = b2_colorMagenta;
+	b2HexColor frictionColor = b2_colorYellow;
+
+	b2HexColor graphColors[B2_GRAPH_COLOR_COUNT] = { b2_colorRed,		b2_colorOrange,	   b2_colorYellow, b2_colorGreen,
+													 b2_colorCyan,		b2_colorBlue,	   b2_colorViolet, b2_colorPink,
+													 b2_colorChocolate, b2_colorGoldenRod, b2_colorCoral,  b2_colorBlack };
+
+	int bodyCapacity = b2GetIdCapacity( &world->bodyIdPool );
+	b2SetBitCountAndClear( &world->debugBodySet, bodyCapacity );
+
+	int jointCapacity = b2GetIdCapacity( &world->jointIdPool );
+	b2SetBitCountAndClear( &world->debugJointSet, jointCapacity );
+
+	int contactCapacity = b2GetIdCapacity( &world->contactIdPool );
+	b2SetBitCountAndClear( &world->debugContactSet, contactCapacity );
+
+	struct DrawContext drawContext = { world, draw };
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2DynamicTree_Query( world->broadPhase.trees + i, draw->drawingBounds, B2_DEFAULT_MASK_BITS, DrawQueryCallback,
+							 &drawContext );
+	}
+
+	uint32_t wordCount = world->debugBodySet.blockCount;
+	uint64_t* bits = world->debugBodySet.bits;
+	for ( uint32_t k = 0; k < wordCount; ++k )
+	{
+		uint64_t word = bits[k];
+		while ( word != 0 )
+		{
+			uint32_t ctz = b2CTZ64( word );
+			uint32_t bodyId = 64 * k + ctz;
+
+			b2Body* body = b2BodyArray_Get( &world->bodies, bodyId );
+
+			if ( draw->drawBodyNames && body->name[0] != 0 )
+			{
+				b2Vec2 offset = { 0.1f, 0.1f };
+				b2BodySim* bodySim = b2GetBodySim( world, body );
+
+				b2Transform transform = { bodySim->center, bodySim->transform.q };
+				draw->DrawTransformFcn( transform, draw->context );
+
+				b2Vec2 p = b2TransformPoint( transform, offset );
+
+				draw->DrawStringFcn( p, body->name, b2_colorBlueViolet, draw->context );
+			}
+
+			if ( draw->drawMass && body->type == b2_dynamicBody )
+			{
+				b2Vec2 offset = { 0.1f, 0.1f };
+				b2BodySim* bodySim = b2GetBodySim( world, body );
+
+				b2Transform transform = { bodySim->center, bodySim->transform.q };
+				draw->DrawTransformFcn( transform, draw->context );
+
+				b2Vec2 p = b2TransformPoint( transform, offset );
+
+				char buffer[32];
+				snprintf( buffer, 32, "  %.2f", body->mass );
+				draw->DrawStringFcn( p, buffer, b2_colorWhite, draw->context );
+			}
+
+			if ( draw->drawJoints )
+			{
+				int jointKey = body->headJointKey;
+				while ( jointKey != B2_NULL_INDEX )
+				{
+					int jointId = jointKey >> 1;
+					int edgeIndex = jointKey & 1;
+					b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+
+					// avoid double draw
+					if ( b2GetBit( &world->debugJointSet, jointId ) == false )
+					{
+						b2DrawJoint( draw, world, joint );
+						b2SetBit( &world->debugJointSet, jointId );
+					}
+					else
+					{
+						// todo testing
+						edgeIndex += 0;
+					}
+
+					jointKey = joint->edges[edgeIndex].nextKey;
+				}
+			}
+
+			const float linearSlop = B2_LINEAR_SLOP;
+			if ( draw->drawContacts && body->type == b2_dynamicBody && body->setIndex == b2_awakeSet )
+			{
+				int contactKey = body->headContactKey;
+				while ( contactKey != B2_NULL_INDEX )
+				{
+					int contactId = contactKey >> 1;
+					int edgeIndex = contactKey & 1;
+					b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+					contactKey = contact->edges[edgeIndex].nextKey;
+
+					if ( contact->setIndex != b2_awakeSet || contact->colorIndex == B2_NULL_INDEX )
+					{
+						continue;
+					}
+
+					// avoid double draw
+					if ( b2GetBit( &world->debugContactSet, contactId ) == false )
+					{
+						B2_ASSERT( 0 <= contact->colorIndex && contact->colorIndex < B2_GRAPH_COLOR_COUNT );
+
+						b2GraphColor* gc = world->constraintGraph.colors + contact->colorIndex;
+						b2ContactSim* contactSim = b2ContactSimArray_Get( &gc->contactSims, contact->localIndex );
+						int pointCount = contactSim->manifold.pointCount;
+						b2Vec2 normal = contactSim->manifold.normal;
+						char buffer[32];
+
+						for ( int j = 0; j < pointCount; ++j )
+						{
+							b2ManifoldPoint* point = contactSim->manifold.points + j;
+
+							if ( draw->drawGraphColors )
+							{
+								// graph color
+								float pointSize = contact->colorIndex == B2_OVERFLOW_INDEX ? 7.5f : 5.0f;
+								draw->DrawPointFcn( point->point, pointSize, graphColors[contact->colorIndex], draw->context );
+								// g_draw.DrawString(point->position, "%d", point->color);
+							}
+							else if ( point->separation > linearSlop )
+							{
+								// Speculative
+								draw->DrawPointFcn( point->point, 5.0f, speculativeColor, draw->context );
+							}
+							else if ( point->persisted == false )
+							{
+								// Add
+								draw->DrawPointFcn( point->point, 10.0f, addColor, draw->context );
+							}
+							else if ( point->persisted == true )
+							{
+								// Persist
+								draw->DrawPointFcn( point->point, 5.0f, persistColor, draw->context );
+							}
+
+							if ( draw->drawContactNormals )
+							{
+								b2Vec2 p1 = point->point;
+								b2Vec2 p2 = b2MulAdd( p1, k_axisScale, normal );
+								draw->DrawSegmentFcn( p1, p2, normalColor, draw->context );
+							}
+							else if ( draw->drawContactImpulses )
+							{
+								b2Vec2 p1 = point->point;
+								b2Vec2 p2 = b2MulAdd( p1, k_impulseScale * point->normalImpulse, normal );
+								draw->DrawSegmentFcn( p1, p2, impulseColor, draw->context );
+								snprintf( buffer, B2_ARRAY_COUNT( buffer ), "%.1f", 1000.0f * point->normalImpulse );
+								draw->DrawStringFcn( p1, buffer, b2_colorWhite, draw->context );
+							}
+
+							if ( draw->drawContactFeatures )
+							{
+								snprintf( buffer, B2_ARRAY_COUNT( buffer ), "%d", point->id );
+								draw->DrawStringFcn( point->point, buffer, b2_colorOrange, draw->context );
+							}
+
+							if ( draw->drawFrictionImpulses )
+							{
+								b2Vec2 tangent = b2RightPerp( normal );
+								b2Vec2 p1 = point->point;
+								b2Vec2 p2 = b2MulAdd( p1, k_impulseScale * point->tangentImpulse, tangent );
+								draw->DrawSegmentFcn( p1, p2, frictionColor, draw->context );
+								snprintf( buffer, B2_ARRAY_COUNT( buffer ), "%.1f", 1000.0f * point->tangentImpulse );
+								draw->DrawStringFcn( p1, buffer, b2_colorWhite, draw->context );
+							}
+						}
+
+						b2SetBit( &world->debugContactSet, contactId );
+					}
+					else
+					{
+						// todo testing
+						edgeIndex += 0;
+					}
+
+					contactKey = contact->edges[edgeIndex].nextKey;
+				}
+			}
+
+			// Clear the smallest set bit
+			word = word & ( word - 1 );
+		}
+	}
+}
+
+void b2World_Draw( b2WorldId worldId, b2DebugDraw* draw )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	// todo it seems bounds drawing is fast enough for regular usage
+	if ( draw->useDrawingBounds )
+	{
+		b2DrawWithBounds( world, draw );
+		return;
+	}
+
+	if ( draw->drawShapes )
+	{
+		int setCount = world->solverSets.count;
+		for ( int setIndex = 0; setIndex < setCount; ++setIndex )
+		{
+			b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+			int bodyCount = set->bodySims.count;
+			for ( int bodyIndex = 0; bodyIndex < bodyCount; ++bodyIndex )
+			{
+				b2BodySim* bodySim = set->bodySims.data + bodyIndex;
+				b2Body* body = b2BodyArray_Get( &world->bodies, bodySim->bodyId );
+				B2_ASSERT( body->setIndex == setIndex );
+
+				b2Transform xf = bodySim->transform;
+				int shapeId = body->headShapeId;
+				while ( shapeId != B2_NULL_INDEX )
+				{
+					b2Shape* shape = world->shapes.data + shapeId;
+					b2HexColor color;
+
+					if ( shape->customColor != 0 )
+					{
+						color = shape->customColor;
+					}
+					else if ( body->type == b2_dynamicBody && body->mass == 0.0f )
+					{
+						// Bad body
+						color = b2_colorRed;
+					}
+					else if ( body->setIndex == b2_disabledSet )
+					{
+						color = b2_colorSlateGray;
+					}
+					else if ( shape->sensorIndex != B2_NULL_INDEX )
+					{
+						color = b2_colorWheat;
+					}
+					else if ( bodySim->isBullet && body->setIndex == b2_awakeSet )
+					{
+						color = b2_colorTurquoise;
+					}
+					else if ( body->isSpeedCapped )
+					{
+						color = b2_colorYellow;
+					}
+					else if ( bodySim->isFast )
+					{
+						color = b2_colorSalmon;
+					}
+					else if ( body->type == b2_staticBody )
+					{
+						color = b2_colorPaleGreen;
+					}
+					else if ( body->type == b2_kinematicBody )
+					{
+						color = b2_colorRoyalBlue;
+					}
+					else if ( body->setIndex == b2_awakeSet )
+					{
+						color = b2_colorPink;
+					}
+					else
+					{
+						color = b2_colorGray;
+					}
+
+					b2DrawShape( draw, shape, xf, color );
+					shapeId = shape->nextShapeId;
+				}
+			}
+		}
+	}
+
+	if ( draw->drawJoints )
+	{
+		int count = world->joints.count;
+		for ( int i = 0; i < count; ++i )
+		{
+			b2Joint* joint = world->joints.data + i;
+			if ( joint->setIndex == B2_NULL_INDEX )
+			{
+				continue;
+			}
+
+			b2DrawJoint( draw, world, joint );
+		}
+	}
+
+	if ( draw->drawBounds )
+	{
+		b2HexColor color = b2_colorGold;
+
+		int setCount = world->solverSets.count;
+		for ( int setIndex = 0; setIndex < setCount; ++setIndex )
+		{
+			b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+			int bodyCount = set->bodySims.count;
+			for ( int bodyIndex = 0; bodyIndex < bodyCount; ++bodyIndex )
+			{
+				b2BodySim* bodySim = set->bodySims.data + bodyIndex;
+
+				char buffer[32];
+				snprintf( buffer, 32, "%d", bodySim->bodyId );
+				draw->DrawStringFcn( bodySim->center, buffer, b2_colorWhite, draw->context );
+
+				b2Body* body = b2BodyArray_Get( &world->bodies, bodySim->bodyId );
+				B2_ASSERT( body->setIndex == setIndex );
+
+				int shapeId = body->headShapeId;
+				while ( shapeId != B2_NULL_INDEX )
+				{
+					b2Shape* shape = world->shapes.data + shapeId;
+					b2AABB aabb = shape->fatAABB;
+
+					b2Vec2 vs[4] = { { aabb.lowerBound.x, aabb.lowerBound.y },
+									 { aabb.upperBound.x, aabb.lowerBound.y },
+									 { aabb.upperBound.x, aabb.upperBound.y },
+									 { aabb.lowerBound.x, aabb.upperBound.y } };
+
+					draw->DrawPolygonFcn( vs, 4, color, draw->context );
+
+					shapeId = shape->nextShapeId;
+				}
+			}
+		}
+	}
+
+	if ( draw->drawBodyNames )
+	{
+		b2Vec2 offset = { 0.1f, 0.2f };
+		int count = world->bodies.count;
+		for ( int i = 0; i < count; ++i )
+		{
+			b2Body* body = world->bodies.data + i;
+			if ( body->setIndex == B2_NULL_INDEX )
+			{
+				continue;
+			}
+
+			if ( body->name[0] == 0 )
+			{
+				continue;
+			}
+
+			b2Transform transform = b2GetBodyTransformQuick( world, body );
+			b2Vec2 p = b2TransformPoint( transform, offset );
+
+			draw->DrawStringFcn( p, body->name, b2_colorBlueViolet, draw->context );
+		}
+	}
+
+	if ( draw->drawMass )
+	{
+		b2Vec2 offset = { 0.1f, 0.1f };
+		int setCount = world->solverSets.count;
+		for ( int setIndex = 0; setIndex < setCount; ++setIndex )
+		{
+			b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, setIndex );
+			int bodyCount = set->bodySims.count;
+			for ( int bodyIndex = 0; bodyIndex < bodyCount; ++bodyIndex )
+			{
+				b2BodySim* bodySim = set->bodySims.data + bodyIndex;
+
+				b2Transform transform = { bodySim->center, bodySim->transform.q };
+				draw->DrawTransformFcn( transform, draw->context );
+
+				b2Vec2 p = b2TransformPoint( transform, offset );
+
+				char buffer[32];
+				float mass = bodySim->invMass > 0.0f ? 1.0f / bodySim->invMass : 0.0f;
+				snprintf( buffer, 32, "  %.2f", mass );
+				draw->DrawStringFcn( p, buffer, b2_colorWhite, draw->context );
+			}
+		}
+	}
+
+	if ( draw->drawContacts )
+	{
+		const float k_impulseScale = 1.0f;
+		const float k_axisScale = 0.3f;
+		const float linearSlop = B2_LINEAR_SLOP;
+
+		b2HexColor speculativeColor = b2_colorLightGray;
+		b2HexColor addColor = b2_colorGreen;
+		b2HexColor persistColor = b2_colorBlue;
+		b2HexColor normalColor = b2_colorDimGray;
+		b2HexColor impulseColor = b2_colorMagenta;
+		b2HexColor frictionColor = b2_colorYellow;
+
+		b2HexColor colors[B2_GRAPH_COLOR_COUNT] = { b2_colorRed,	   b2_colorOrange,	  b2_colorYellow, b2_colorGreen,
+													b2_colorCyan,	   b2_colorBlue,	  b2_colorViolet, b2_colorPink,
+													b2_colorChocolate, b2_colorGoldenRod, b2_colorCoral,  b2_colorBlack };
+
+		for ( int colorIndex = 0; colorIndex < B2_GRAPH_COLOR_COUNT; ++colorIndex )
+		{
+			b2GraphColor* graphColor = world->constraintGraph.colors + colorIndex;
+
+			int contactCount = graphColor->contactSims.count;
+			for ( int contactIndex = 0; contactIndex < contactCount; ++contactIndex )
+			{
+				b2ContactSim* contact = graphColor->contactSims.data + contactIndex;
+				int pointCount = contact->manifold.pointCount;
+				b2Vec2 normal = contact->manifold.normal;
+				char buffer[32];
+
+				for ( int j = 0; j < pointCount; ++j )
+				{
+					b2ManifoldPoint* point = contact->manifold.points + j;
+
+					if ( draw->drawGraphColors && 0 <= colorIndex && colorIndex <= B2_GRAPH_COLOR_COUNT )
+					{
+						// graph color
+						float pointSize = colorIndex == B2_OVERFLOW_INDEX ? 7.5f : 5.0f;
+						draw->DrawPointFcn( point->point, pointSize, colors[colorIndex], draw->context );
+						// g_draw.DrawString(point->position, "%d", point->color);
+					}
+					else if ( point->separation > linearSlop )
+					{
+						// Speculative
+						draw->DrawPointFcn( point->point, 5.0f, speculativeColor, draw->context );
+					}
+					else if ( point->persisted == false )
+					{
+						// Add
+						draw->DrawPointFcn( point->point, 10.0f, addColor, draw->context );
+					}
+					else if ( point->persisted == true )
+					{
+						// Persist
+						draw->DrawPointFcn( point->point, 5.0f, persistColor, draw->context );
+					}
+
+					if ( draw->drawContactNormals )
+					{
+						b2Vec2 p1 = point->point;
+						b2Vec2 p2 = b2MulAdd( p1, k_axisScale, normal );
+						draw->DrawSegmentFcn( p1, p2, normalColor, draw->context );
+					}
+					else if ( draw->drawContactImpulses )
+					{
+						b2Vec2 p1 = point->point;
+						b2Vec2 p2 = b2MulAdd( p1, k_impulseScale * point->normalImpulse, normal );
+						draw->DrawSegmentFcn( p1, p2, impulseColor, draw->context );
+						snprintf( buffer, B2_ARRAY_COUNT( buffer ), "%.2f", 1000.0f * point->normalImpulse );
+						draw->DrawStringFcn( p1, buffer, b2_colorWhite, draw->context );
+					}
+
+					if ( draw->drawContactFeatures )
+					{
+						snprintf( buffer, B2_ARRAY_COUNT( buffer ), "%d", point->id );
+						draw->DrawStringFcn( point->point, buffer, b2_colorOrange, draw->context );
+					}
+
+					if ( draw->drawFrictionImpulses )
+					{
+						b2Vec2 tangent = b2RightPerp( normal );
+						b2Vec2 p1 = point->point;
+						b2Vec2 p2 = b2MulAdd( p1, k_impulseScale * point->tangentImpulse, tangent );
+						draw->DrawSegmentFcn( p1, p2, frictionColor, draw->context );
+						snprintf( buffer, B2_ARRAY_COUNT( buffer ), "%.2f", point->tangentImpulse );
+						draw->DrawStringFcn( p1, buffer, b2_colorWhite, draw->context );
+					}
+				}
+			}
+		}
+	}
+
+	if ( draw->drawIslands )
+	{
+		int count = world->islands.count;
+		for ( int i = 0; i < count; ++i )
+		{
+			b2Island* island = world->islands.data + i;
+			if ( island->setIndex == B2_NULL_INDEX )
+			{
+				continue;
+			}
+
+			int shapeCount = 0;
+			b2AABB aabb = {
+				.lowerBound = { FLT_MAX, FLT_MAX },
+				.upperBound = { -FLT_MAX, -FLT_MAX },
+			};
+
+			int bodyId = island->headBody;
+			while ( bodyId != B2_NULL_INDEX )
+			{
+				b2Body* body = b2BodyArray_Get( &world->bodies, bodyId );
+				int shapeId = body->headShapeId;
+				while ( shapeId != B2_NULL_INDEX )
+				{
+					b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+					aabb = b2AABB_Union( aabb, shape->fatAABB );
+					shapeCount += 1;
+					shapeId = shape->nextShapeId;
+				}
+
+				bodyId = body->islandNext;
+			}
+
+			if ( shapeCount > 0 )
+			{
+				b2Vec2 vs[4] = { { aabb.lowerBound.x, aabb.lowerBound.y },
+								 { aabb.upperBound.x, aabb.lowerBound.y },
+								 { aabb.upperBound.x, aabb.upperBound.y },
+								 { aabb.lowerBound.x, aabb.upperBound.y } };
+
+				draw->DrawPolygonFcn( vs, 4, b2_colorOrangeRed, draw->context );
+			}
+		}
+	}
+}
+
+b2BodyEvents b2World_GetBodyEvents( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return (b2BodyEvents){ 0 };
+	}
+
+	int count = world->bodyMoveEvents.count;
+	b2BodyEvents events = { world->bodyMoveEvents.data, count };
+	return events;
+}
+
+b2SensorEvents b2World_GetSensorEvents( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return (b2SensorEvents){ 0 };
+	}
+
+	// Careful to use previous buffer
+	int endEventArrayIndex = 1 - world->endEventArrayIndex;
+
+	int beginCount = world->sensorBeginEvents.count;
+	int endCount = world->sensorEndEvents[endEventArrayIndex].count;
+
+	b2SensorEvents events = {
+		.beginEvents = world->sensorBeginEvents.data,
+		.endEvents = world->sensorEndEvents[endEventArrayIndex].data,
+		.beginCount = beginCount,
+		.endCount = endCount,
+	};
+	return events;
+}
+
+b2ContactEvents b2World_GetContactEvents( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return (b2ContactEvents){ 0 };
+	}
+
+	// Careful to use previous buffer
+	int endEventArrayIndex = 1 - world->endEventArrayIndex;
+
+	int beginCount = world->contactBeginEvents.count;
+	int endCount = world->contactEndEvents[endEventArrayIndex].count;
+	int hitCount = world->contactHitEvents.count;
+
+	b2ContactEvents events = {
+		.beginEvents = world->contactBeginEvents.data,
+		.endEvents = world->contactEndEvents[endEventArrayIndex].data,
+		.hitEvents = world->contactHitEvents.data,
+		.beginCount = beginCount,
+		.endCount = endCount,
+		.hitCount = hitCount,
+	};
+
+	return events;
+}
+
+bool b2World_IsValid( b2WorldId id )
+{
+	if ( id.index1 < 1 || B2_MAX_WORLDS < id.index1 )
+	{
+		return false;
+	}
+
+	b2World* world = b2_worlds + ( id.index1 - 1 );
+
+	if ( world->worldId != id.index1 - 1 )
+	{
+		// world is not allocated
+		return false;
+	}
+
+	return id.generation == world->generation;
+}
+
+bool b2Body_IsValid( b2BodyId id )
+{
+	if ( B2_MAX_WORLDS <= id.world0 )
+	{
+		// invalid world
+		return false;
+	}
+
+	b2World* world = b2_worlds + id.world0;
+	if ( world->worldId != id.world0 )
+	{
+		// world is free
+		return false;
+	}
+
+	if ( id.index1 < 1 || world->bodies.count < id.index1 )
+	{
+		// invalid index
+		return false;
+	}
+
+	b2Body* body = world->bodies.data + ( id.index1 - 1 );
+	if ( body->setIndex == B2_NULL_INDEX )
+	{
+		// this was freed
+		return false;
+	}
+
+	B2_ASSERT( body->localIndex != B2_NULL_INDEX );
+
+	if ( body->generation != id.generation )
+	{
+		// this id is orphaned
+		return false;
+	}
+
+	return true;
+}
+
+bool b2Shape_IsValid( b2ShapeId id )
+{
+	if ( B2_MAX_WORLDS <= id.world0 )
+	{
+		return false;
+	}
+
+	b2World* world = b2_worlds + id.world0;
+	if ( world->worldId != id.world0 )
+	{
+		// world is free
+		return false;
+	}
+
+	int shapeId = id.index1 - 1;
+	if ( shapeId < 0 || world->shapes.count <= shapeId )
+	{
+		return false;
+	}
+
+	b2Shape* shape = world->shapes.data + shapeId;
+	if ( shape->id == B2_NULL_INDEX )
+	{
+		// shape is free
+		return false;
+	}
+
+	B2_ASSERT( shape->id == shapeId );
+
+	return id.generation == shape->generation;
+}
+
+bool b2Chain_IsValid( b2ChainId id )
+{
+	if ( B2_MAX_WORLDS <= id.world0 )
+	{
+		return false;
+	}
+
+	b2World* world = b2_worlds + id.world0;
+	if ( world->worldId != id.world0 )
+	{
+		// world is free
+		return false;
+	}
+
+	int chainId = id.index1 - 1;
+	if ( chainId < 0 || world->chainShapes.count <= chainId )
+	{
+		return false;
+	}
+
+	b2ChainShape* chain = world->chainShapes.data + chainId;
+	if ( chain->id == B2_NULL_INDEX )
+	{
+		// chain is free
+		return false;
+	}
+
+	B2_ASSERT( chain->id == chainId );
+
+	return id.generation == chain->generation;
+}
+
+bool b2Joint_IsValid( b2JointId id )
+{
+	if ( B2_MAX_WORLDS <= id.world0 )
+	{
+		return false;
+	}
+
+	b2World* world = b2_worlds + id.world0;
+	if ( world->worldId != id.world0 )
+	{
+		// world is free
+		return false;
+	}
+
+	int jointId = id.index1 - 1;
+	if ( jointId < 0 || world->joints.count <= jointId )
+	{
+		return false;
+	}
+
+	b2Joint* joint = world->joints.data + jointId;
+	if ( joint->jointId == B2_NULL_INDEX )
+	{
+		// joint is free
+		return false;
+	}
+
+	B2_ASSERT( joint->jointId == jointId );
+
+	return id.generation == joint->generation;
+}
+
+void b2World_EnableSleeping( b2WorldId worldId, bool flag )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	if ( flag == world->enableSleep )
+	{
+		return;
+	}
+
+	world->enableSleep = flag;
+
+	if ( flag == false )
+	{
+		int setCount = world->solverSets.count;
+		for ( int i = b2_firstSleepingSet; i < setCount; ++i )
+		{
+			b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, i );
+			if ( set->bodySims.count > 0 )
+			{
+				b2WakeSolverSet( world, i );
+			}
+		}
+	}
+}
+
+bool b2World_IsSleepingEnabled( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->enableSleep;
+}
+
+void b2World_EnableWarmStarting( b2WorldId worldId, bool flag )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	world->enableWarmStarting = flag;
+}
+
+bool b2World_IsWarmStartingEnabled( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->enableWarmStarting;
+}
+
+int b2World_GetAwakeBodyCount( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	b2SolverSet* awakeSet = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	return awakeSet->bodySims.count;
+}
+
+void b2World_EnableContinuous( b2WorldId worldId, bool flag )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	world->enableContinuous = flag;
+}
+
+bool b2World_IsContinuousEnabled( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->enableContinuous;
+}
+
+void b2World_SetRestitutionThreshold( b2WorldId worldId, float value )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	world->restitutionThreshold = b2ClampFloat( value, 0.0f, FLT_MAX );
+}
+
+float b2World_GetRestitutionThreshold( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->restitutionThreshold;
+}
+
+void b2World_SetHitEventThreshold( b2WorldId worldId, float value )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	world->hitEventThreshold = b2ClampFloat( value, 0.0f, FLT_MAX );
+}
+
+float b2World_GetHitEventThreshold( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->hitEventThreshold;
+}
+
+void b2World_SetContactTuning( b2WorldId worldId, float hertz, float dampingRatio, float pushSpeed )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	world->contactHertz = b2ClampFloat( hertz, 0.0f, FLT_MAX );
+	world->contactDampingRatio = b2ClampFloat( dampingRatio, 0.0f, FLT_MAX );
+	world->maxContactPushSpeed = b2ClampFloat( pushSpeed, 0.0f, FLT_MAX );
+}
+
+void b2World_SetJointTuning( b2WorldId worldId, float hertz, float dampingRatio )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	world->jointHertz = b2ClampFloat( hertz, 0.0f, FLT_MAX );
+	world->jointDampingRatio = b2ClampFloat( dampingRatio, 0.0f, FLT_MAX );
+}
+
+void b2World_SetMaximumLinearSpeed( b2WorldId worldId, float maximumLinearSpeed )
+{
+	B2_ASSERT( b2IsValidFloat( maximumLinearSpeed ) && maximumLinearSpeed > 0.0f );
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	world->maxLinearSpeed = maximumLinearSpeed;
+}
+
+float b2World_GetMaximumLinearSpeed( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->maxLinearSpeed;
+}
+
+b2Profile b2World_GetProfile( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->profile;
+}
+
+b2Counters b2World_GetCounters( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	b2Counters s = { 0 };
+	s.bodyCount = b2GetIdCount( &world->bodyIdPool );
+	s.shapeCount = b2GetIdCount( &world->shapeIdPool );
+	s.contactCount = b2GetIdCount( &world->contactIdPool );
+	s.jointCount = b2GetIdCount( &world->jointIdPool );
+	s.islandCount = b2GetIdCount( &world->islandIdPool );
+
+	b2DynamicTree* staticTree = world->broadPhase.trees + b2_staticBody;
+	s.staticTreeHeight = b2DynamicTree_GetHeight( staticTree );
+
+	b2DynamicTree* dynamicTree = world->broadPhase.trees + b2_dynamicBody;
+	b2DynamicTree* kinematicTree = world->broadPhase.trees + b2_kinematicBody;
+	s.treeHeight = b2MaxInt( b2DynamicTree_GetHeight( dynamicTree ), b2DynamicTree_GetHeight( kinematicTree ) );
+
+	s.stackUsed = b2GetMaxArenaAllocation( &world->arena );
+	s.byteCount = b2GetByteCount();
+	s.taskCount = world->taskCount;
+
+	for ( int i = 0; i < B2_GRAPH_COLOR_COUNT; ++i )
+	{
+		s.colorCounts[i] = world->constraintGraph.colors[i].contactSims.count + world->constraintGraph.colors[i].jointSims.count;
+	}
+	return s;
+}
+
+void b2World_SetUserData( b2WorldId worldId, void* userData )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	world->userData = userData;
+}
+
+void* b2World_GetUserData( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->userData;
+}
+
+void b2World_SetFrictionCallback( b2WorldId worldId, b2FrictionCallback* callback )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	if ( callback != NULL )
+	{
+		world->frictionCallback = callback;
+	}
+	else
+	{
+		world->frictionCallback = b2DefaultFrictionCallback;
+	}
+}
+
+void b2World_SetRestitutionCallback( b2WorldId worldId, b2RestitutionCallback* callback )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	if ( callback != NULL )
+	{
+		world->restitutionCallback = callback;
+	}
+	else
+	{
+		world->restitutionCallback = b2DefaultRestitutionCallback;
+	}
+}
+
+void b2World_DumpMemoryStats( b2WorldId worldId )
+{
+	FILE* file = fopen( "box2d_memory.txt", "w" );
+	if ( file == NULL )
+	{
+		return;
+	}
+
+	b2World* world = b2GetWorldFromId( worldId );
+
+	// id pools
+	fprintf( file, "id pools\n" );
+	fprintf( file, "body ids: %d\n", b2GetIdBytes( &world->bodyIdPool ) );
+	fprintf( file, "solver set ids: %d\n", b2GetIdBytes( &world->solverSetIdPool ) );
+	fprintf( file, "joint ids: %d\n", b2GetIdBytes( &world->jointIdPool ) );
+	fprintf( file, "contact ids: %d\n", b2GetIdBytes( &world->contactIdPool ) );
+	fprintf( file, "island ids: %d\n", b2GetIdBytes( &world->islandIdPool ) );
+	fprintf( file, "shape ids: %d\n", b2GetIdBytes( &world->shapeIdPool ) );
+	fprintf( file, "chain ids: %d\n", b2GetIdBytes( &world->chainIdPool ) );
+	fprintf( file, "\n" );
+
+	// world arrays
+	fprintf( file, "world arrays\n" );
+	fprintf( file, "bodies: %d\n", b2BodyArray_ByteCount( &world->bodies ) );
+	fprintf( file, "solver sets: %d\n", b2SolverSetArray_ByteCount( &world->solverSets ) );
+	fprintf( file, "joints: %d\n", b2JointArray_ByteCount( &world->joints ) );
+	fprintf( file, "contacts: %d\n", b2ContactArray_ByteCount( &world->contacts ) );
+	fprintf( file, "islands: %d\n", b2IslandArray_ByteCount( &world->islands ) );
+	fprintf( file, "shapes: %d\n", b2ShapeArray_ByteCount( &world->shapes ) );
+	fprintf( file, "chains: %d\n", b2ChainShapeArray_ByteCount( &world->chainShapes ) );
+	fprintf( file, "\n" );
+
+	// broad-phase
+	fprintf( file, "broad-phase\n" );
+	fprintf( file, "static tree: %d\n", b2DynamicTree_GetByteCount( world->broadPhase.trees + b2_staticBody ) );
+	fprintf( file, "kinematic tree: %d\n", b2DynamicTree_GetByteCount( world->broadPhase.trees + b2_kinematicBody ) );
+	fprintf( file, "dynamic tree: %d\n", b2DynamicTree_GetByteCount( world->broadPhase.trees + b2_dynamicBody ) );
+	b2HashSet* moveSet = &world->broadPhase.moveSet;
+	fprintf( file, "moveSet: %d (%d, %d)\n", b2GetHashSetBytes( moveSet ), moveSet->count, moveSet->capacity );
+	fprintf( file, "moveArray: %d\n", b2IntArray_ByteCount( &world->broadPhase.moveArray ) );
+	b2HashSet* pairSet = &world->broadPhase.pairSet;
+	fprintf( file, "pairSet: %d (%d, %d)\n", b2GetHashSetBytes( pairSet ), pairSet->count, pairSet->capacity );
+	fprintf( file, "\n" );
+
+	// solver sets
+	int bodySimCapacity = 0;
+	int bodyStateCapacity = 0;
+	int jointSimCapacity = 0;
+	int contactSimCapacity = 0;
+	int islandSimCapacity = 0;
+	int solverSetCapacity = world->solverSets.count;
+	for ( int i = 0; i < solverSetCapacity; ++i )
+	{
+		b2SolverSet* set = world->solverSets.data + i;
+		if ( set->setIndex == B2_NULL_INDEX )
+		{
+			continue;
+		}
+
+		bodySimCapacity += set->bodySims.capacity;
+		bodyStateCapacity += set->bodyStates.capacity;
+		jointSimCapacity += set->jointSims.capacity;
+		contactSimCapacity += set->contactSims.capacity;
+		islandSimCapacity += set->islandSims.capacity;
+	}
+
+	fprintf( file, "solver sets\n" );
+	fprintf( file, "body sim: %d\n", bodySimCapacity * (int)sizeof( b2BodySim ) );
+	fprintf( file, "body state: %d\n", bodyStateCapacity * (int)sizeof( b2BodyState ) );
+	fprintf( file, "joint sim: %d\n", jointSimCapacity * (int)sizeof( b2JointSim ) );
+	fprintf( file, "contact sim: %d\n", contactSimCapacity * (int)sizeof( b2ContactSim ) );
+	fprintf( file, "island sim: %d\n", islandSimCapacity * (int)sizeof( islandSimCapacity ) );
+	fprintf( file, "\n" );
+
+	// constraint graph
+	int bodyBitSetBytes = 0;
+	contactSimCapacity = 0;
+	jointSimCapacity = 0;
+	for ( int i = 0; i < B2_GRAPH_COLOR_COUNT; ++i )
+	{
+		b2GraphColor* c = world->constraintGraph.colors + i;
+		bodyBitSetBytes += b2GetBitSetBytes( &c->bodySet );
+		contactSimCapacity += c->contactSims.capacity;
+		jointSimCapacity += c->jointSims.capacity;
+	}
+
+	fprintf( file, "constraint graph\n" );
+	fprintf( file, "body bit sets: %d\n", bodyBitSetBytes );
+	fprintf( file, "joint sim: %d\n", jointSimCapacity * (int)sizeof( b2JointSim ) );
+	fprintf( file, "contact sim: %d\n", contactSimCapacity * (int)sizeof( b2ContactSim ) );
+	fprintf( file, "\n" );
+
+	// stack allocator
+	fprintf( file, "stack allocator: %d\n\n", world->arena.capacity );
+
+	// chain shapes
+	// todo
+
+	fclose( file );
+}
+
+typedef struct WorldQueryContext
+{
+	b2World* world;
+	b2OverlapResultFcn* fcn;
+	b2QueryFilter filter;
+	void* userContext;
+} WorldQueryContext;
+
+static bool TreeQueryCallback( int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	WorldQueryContext* worldContext = context;
+	b2World* world = worldContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+	b2Filter shapeFilter = shape->filter;
+	b2QueryFilter queryFilter = worldContext->filter;
+
+	if ( ( shapeFilter.categoryBits & queryFilter.maskBits ) == 0 || ( shapeFilter.maskBits & queryFilter.categoryBits ) == 0 )
+	{
+		return true;
+	}
+
+	b2ShapeId id = { shapeId + 1, world->worldId, shape->generation };
+	bool result = worldContext->fcn( id, worldContext->userContext );
+	return result;
+}
+
+b2TreeStats b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context )
+{
+	b2TreeStats treeStats = { 0 };
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return treeStats;
+	}
+
+	B2_ASSERT( b2IsValidAABB( aabb ) );
+
+	WorldQueryContext worldContext = { world, fcn, filter, context };
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2TreeStats treeResult =
+			b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeQueryCallback, &worldContext );
+
+		treeStats.nodeVisits += treeResult.nodeVisits;
+		treeStats.leafVisits += treeResult.leafVisits;
+	}
+
+	return treeStats;
+}
+
+typedef struct WorldOverlapContext
+{
+	b2World* world;
+	b2OverlapResultFcn* fcn;
+	b2QueryFilter filter;
+	const b2ShapeProxy* proxy;
+	void* userContext;
+} WorldOverlapContext;
+
+static bool TreeOverlapCallback( int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	WorldOverlapContext* worldContext = context;
+	b2World* world = worldContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+	b2Filter shapeFilter = shape->filter;
+	b2QueryFilter queryFilter = worldContext->filter;
+
+	if ( ( shapeFilter.categoryBits & queryFilter.maskBits ) == 0 || ( shapeFilter.maskBits & queryFilter.categoryBits ) == 0 )
+	{
+		return true;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	b2DistanceInput input;
+	input.proxyA = *worldContext->proxy;
+	input.proxyB = b2MakeShapeDistanceProxy( shape );
+	input.transformA = b2Transform_identity;
+	input.transformB = transform;
+	input.useRadii = true;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput output = b2ShapeDistance( &input, &cache, NULL, 0 );
+
+	float tolerance = 0.1f * B2_LINEAR_SLOP;
+	if ( output.distance > tolerance )
+	{
+		return true;
+	}
+
+	b2ShapeId id = { shape->id + 1, world->worldId, shape->generation };
+	bool result = worldContext->fcn( id, worldContext->userContext );
+	return result;
+}
+
+b2TreeStats b2World_OverlapShape( b2WorldId worldId, const b2ShapeProxy* proxy, b2QueryFilter filter, b2OverlapResultFcn* fcn,
+								  void* context )
+{
+	b2TreeStats treeStats = { 0 };
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return treeStats;
+	}
+
+	b2AABB aabb = b2MakeAABB( proxy->points, proxy->count, proxy->radius );
+	WorldOverlapContext worldContext = {
+		world, fcn, filter, proxy, context,
+	};
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2TreeStats treeResult =
+			b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeOverlapCallback, &worldContext );
+
+		treeStats.nodeVisits += treeResult.nodeVisits;
+		treeStats.leafVisits += treeResult.leafVisits;
+	}
+
+	return treeStats;
+}
+
+typedef struct WorldRayCastContext
+{
+	b2World* world;
+	b2CastResultFcn* fcn;
+	b2QueryFilter filter;
+	float fraction;
+	void* userContext;
+} WorldRayCastContext;
+
+static float RayCastCallback( const b2RayCastInput* input, int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	WorldRayCastContext* worldContext = context;
+	b2World* world = worldContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+	b2Filter shapeFilter = shape->filter;
+	b2QueryFilter queryFilter = worldContext->filter;
+
+	if ( ( shapeFilter.categoryBits & queryFilter.maskBits ) == 0 || ( shapeFilter.maskBits & queryFilter.categoryBits ) == 0 )
+	{
+		return input->maxFraction;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+	b2CastOutput output = b2RayCastShape( input, shape, transform );
+
+	if ( output.hit )
+	{
+		b2ShapeId id = { shapeId + 1, world->worldId, shape->generation };
+		float fraction = worldContext->fcn( id, output.point, output.normal, output.fraction, worldContext->userContext );
+
+		// The user may return -1 to skip this shape
+		if ( 0.0f <= fraction && fraction <= 1.0f )
+		{
+			worldContext->fraction = fraction;
+		}
+
+		return fraction;
+	}
+
+	return input->maxFraction;
+}
+
+b2TreeStats b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn,
+							 void* context )
+{
+	b2TreeStats treeStats = { 0 };
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return treeStats;
+	}
+
+	B2_ASSERT( b2IsValidVec2( origin ) );
+	B2_ASSERT( b2IsValidVec2( translation ) );
+
+	b2RayCastInput input = { origin, translation, 1.0f };
+
+	WorldRayCastContext worldContext = { world, fcn, filter, 1.0f, context };
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2TreeStats treeResult =
+			b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext );
+		treeStats.nodeVisits += treeResult.nodeVisits;
+		treeStats.leafVisits += treeResult.leafVisits;
+
+		if ( worldContext.fraction == 0.0f )
+		{
+			return treeStats;
+		}
+
+		input.maxFraction = worldContext.fraction;
+	}
+
+	return treeStats;
+}
+
+// This callback finds the closest hit. This is the most common callback used in games.
+static float b2RayCastClosestFcn( b2ShapeId shapeId, b2Vec2 point, b2Vec2 normal, float fraction, void* context )
+{
+	b2RayResult* rayResult = (b2RayResult*)context;
+	rayResult->shapeId = shapeId;
+	rayResult->point = point;
+	rayResult->normal = normal;
+	rayResult->fraction = fraction;
+	rayResult->hit = true;
+	return fraction;
+}
+
+b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter )
+{
+	b2RayResult result = { 0 };
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return result;
+	}
+
+	B2_ASSERT( b2IsValidVec2( origin ) );
+	B2_ASSERT( b2IsValidVec2( translation ) );
+
+	b2RayCastInput input = { origin, translation, 1.0f };
+	WorldRayCastContext worldContext = { world, b2RayCastClosestFcn, filter, 1.0f, &result };
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2TreeStats treeResult =
+			b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext );
+		result.nodeVisits += treeResult.nodeVisits;
+		result.leafVisits += treeResult.leafVisits;
+
+		if ( worldContext.fraction == 0.0f )
+		{
+			return result;
+		}
+
+		input.maxFraction = worldContext.fraction;
+	}
+
+	return result;
+}
+
+static float ShapeCastCallback( const b2ShapeCastInput* input, int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	WorldRayCastContext* worldContext = context;
+	b2World* world = worldContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+	b2Filter shapeFilter = shape->filter;
+	b2QueryFilter queryFilter = worldContext->filter;
+
+	if ( ( shapeFilter.categoryBits & queryFilter.maskBits ) == 0 || ( shapeFilter.maskBits & queryFilter.categoryBits ) == 0 )
+	{
+		return input->maxFraction;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	b2CastOutput output = b2ShapeCastShape( input, shape, transform );
+
+	if ( output.hit )
+	{
+		b2ShapeId id = { shapeId + 1, world->worldId, shape->generation };
+		float fraction = worldContext->fcn( id, output.point, output.normal, output.fraction, worldContext->userContext );
+
+		// The user may return -1 to skip this shape
+		if ( 0.0f <= fraction && fraction <= 1.0f )
+		{
+			worldContext->fraction = fraction;
+		}
+
+		return fraction;
+	}
+
+	return input->maxFraction;
+}
+
+b2TreeStats b2World_CastShape( b2WorldId worldId, const b2ShapeProxy* proxy, b2Vec2 translation, b2QueryFilter filter,
+								b2CastResultFcn* fcn, void* context )
+{
+	b2TreeStats treeStats = { 0 };
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return treeStats;
+	}
+
+	B2_ASSERT( b2IsValidVec2( translation ) );
+
+	b2ShapeCastInput input = { 0 };
+	input.proxy = *proxy;
+	input.translation = translation;
+	input.maxFraction = 1.0f;
+
+	WorldRayCastContext worldContext = { world, fcn, filter, 1.0f, context };
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2TreeStats treeResult =
+			b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, ShapeCastCallback, &worldContext );
+		treeStats.nodeVisits += treeResult.nodeVisits;
+		treeStats.leafVisits += treeResult.leafVisits;
+
+		if ( worldContext.fraction == 0.0f )
+		{
+			return treeStats;
+		}
+
+		input.maxFraction = worldContext.fraction;
+	}
+
+	return treeStats;
+}
+
+typedef struct b2MoverContext
+{
+	b2World* world;
+	b2QueryFilter filter;
+	b2ShapeProxy proxy;
+	b2Transform transform;
+	void* userContext;
+} b2CharacterCallbackContext;
+
+typedef struct WorldMoverCastContext
+{
+	b2World* world;
+	b2QueryFilter filter;
+	float fraction;
+} WorldMoverCastContext;
+
+static float MoverCastCallback( const b2ShapeCastInput* input, int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+	WorldMoverCastContext* worldContext = context;
+	b2World* world = worldContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+	b2Filter shapeFilter = shape->filter;
+	b2QueryFilter queryFilter = worldContext->filter;
+
+	if ( ( shapeFilter.categoryBits & queryFilter.maskBits ) == 0 || ( shapeFilter.maskBits & queryFilter.categoryBits ) == 0 )
+	{
+		return worldContext->fraction;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	b2CastOutput output = b2ShapeCastShape( input, shape, transform );
+	if ( output.fraction == 0.0f )
+	{
+		// Ignore overlapping shapes
+		return worldContext->fraction;
+	}
+
+	worldContext->fraction = output.fraction;
+	return output.fraction;
+}
+
+float b2World_CastMover( b2WorldId worldId, const b2Capsule* mover, b2Vec2 translation, b2QueryFilter filter )
+{
+	B2_ASSERT( b2IsValidVec2( translation ) );
+	B2_ASSERT( mover->radius > 2.0f * B2_LINEAR_SLOP );
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return 1.0f;
+	}
+
+	b2ShapeCastInput input = { 0 };
+	input.proxy.points[0] = mover->center1;
+	input.proxy.points[1] = mover->center2;
+	input.proxy.count = 2;
+	input.proxy.radius = mover->radius;
+	input.translation = translation;
+	input.maxFraction = 1.0f;
+	input.canEncroach = true;
+
+	WorldMoverCastContext worldContext = { world, filter, 1.0f };
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, MoverCastCallback, &worldContext );
+
+		if ( worldContext.fraction == 0.0f )
+		{
+			return 0.0f;
+		}
+
+		input.maxFraction = worldContext.fraction;
+	}
+
+	return worldContext.fraction;
+}
+
+typedef struct WorldMoverContext
+{
+	b2World* world;
+	b2PlaneResultFcn* fcn;
+	b2QueryFilter filter;
+	b2Capsule mover;
+	void* userContext;
+} WorldMoverContext;
+
+static bool TreeCollideCallback( int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+	WorldMoverContext* worldContext = (WorldMoverContext*)context;
+	b2World* world = worldContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+	b2Filter shapeFilter = shape->filter;
+	b2QueryFilter queryFilter = worldContext->filter;
+
+	if ( ( shapeFilter.categoryBits & queryFilter.maskBits ) == 0 || ( shapeFilter.maskBits & queryFilter.categoryBits ) == 0 )
+	{
+		return true;
+	}
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	b2PlaneResult result = b2CollideMover( shape, transform, &worldContext->mover );
+
+	if ( result.hit )
+	{
+		b2ShapeId id = { shape->id + 1, world->worldId, shape->generation };
+		return worldContext->fcn( id, &result, worldContext->userContext );
+	}
+
+	return true;
+}
+
+// It is tempting to use a shape proxy for the mover, but this makes handling deep overlap difficult and the generality may
+// not be worth it.
+void b2World_CollideMover( b2WorldId worldId, const b2Capsule* mover, b2QueryFilter filter, b2PlaneResultFcn* fcn, void* context )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	b2Vec2 r = { mover->radius, mover->radius };
+
+	b2AABB aabb;
+	aabb.lowerBound = b2Sub( b2Min( mover->center1, mover->center2 ), r );
+	aabb.upperBound = b2Add( b2Max( mover->center1, mover->center2 ), r );
+
+	WorldMoverContext worldContext = {
+		world, fcn, filter, *mover, context,
+	};
+
+	for ( int i = 0; i < b2_bodyTypeCount; ++i )
+	{
+		b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeCollideCallback, &worldContext );
+	}
+}
+
+#if 0
+
+void b2World_Dump()
+{
+	if (m_locked)
+	{
+		return;
+	}
+
+	b2OpenDump("box2d_dump.inl");
+
+	b2Dump("b2Vec2 g(%.9g, %.9g);\n", m_gravity.x, m_gravity.y);
+	b2Dump("m_world->SetGravity(g);\n");
+
+	b2Dump("b2Body** sims = (b2Body**)b2Alloc(%d * sizeof(b2Body*));\n", m_bodyCount);
+	b2Dump("b2Joint** joints = (b2Joint**)b2Alloc(%d * sizeof(b2Joint*));\n", m_jointCount);
+
+	int32 i = 0;
+	for (b2Body* b = m_bodyList; b; b = b->m_next)
+	{
+		b->m_islandIndex = i;
+		b->Dump();
+		++i;
+	}
+
+	i = 0;
+	for (b2Joint* j = m_jointList; j; j = j->m_next)
+	{
+		j->m_index = i;
+		++i;
+	}
+
+	// First pass on joints, skip gear joints.
+	for (b2Joint* j = m_jointList; j; j = j->m_next)
+	{
+		if (j->m_type == e_gearJoint)
+		{
+			continue;
+		}
+
+		b2Dump("{\n");
+		j->Dump();
+		b2Dump("}\n");
+	}
+
+	// Second pass on joints, only gear joints.
+	for (b2Joint* j = m_jointList; j; j = j->m_next)
+	{
+		if (j->m_type != e_gearJoint)
+		{
+			continue;
+		}
+
+		b2Dump("{\n");
+		j->Dump();
+		b2Dump("}\n");
+	}
+
+	b2Dump("b2Free(joints);\n");
+	b2Dump("b2Free(sims);\n");
+	b2Dump("joints = nullptr;\n");
+	b2Dump("sims = nullptr;\n");
+
+	b2CloseDump();
+}
+#endif
+
+void b2World_SetCustomFilterCallback( b2WorldId worldId, b2CustomFilterFcn* fcn, void* context )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	world->customFilterFcn = fcn;
+	world->customFilterContext = context;
+}
+
+void b2World_SetPreSolveCallback( b2WorldId worldId, b2PreSolveFcn* fcn, void* context )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	world->preSolveFcn = fcn;
+	world->preSolveContext = context;
+}
+
+void b2World_SetGravity( b2WorldId worldId, b2Vec2 gravity )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	world->gravity = gravity;
+}
+
+b2Vec2 b2World_GetGravity( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	return world->gravity;
+}
+
+struct ExplosionContext
+{
+	b2World* world;
+	b2Vec2 position;
+	float radius;
+	float falloff;
+	float impulsePerLength;
+};
+
+static bool ExplosionCallback( int proxyId, uint64_t userData, void* context )
+{
+	B2_UNUSED( proxyId );
+
+	int shapeId = (int)userData;
+
+	struct ExplosionContext* explosionContext = context;
+	b2World* world = explosionContext->world;
+
+	b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+
+	b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId );
+	B2_ASSERT( body->type == b2_dynamicBody );
+
+	b2Transform transform = b2GetBodyTransformQuick( world, body );
+
+	b2DistanceInput input;
+	input.proxyA = b2MakeShapeDistanceProxy( shape );
+	input.proxyB = b2MakeProxy( &explosionContext->position, 1, 0.0f );
+	input.transformA = transform;
+	input.transformB = b2Transform_identity;
+	input.useRadii = true;
+
+	b2SimplexCache cache = { 0 };
+	b2DistanceOutput output = b2ShapeDistance( &input, &cache, NULL, 0 );
+
+	float radius = explosionContext->radius;
+	float falloff = explosionContext->falloff;
+	if ( output.distance > radius + falloff )
+	{
+		return true;
+	}
+
+	b2WakeBody( world, body );
+
+	if ( body->setIndex != b2_awakeSet )
+	{
+		return true;
+	}
+
+	b2Vec2 closestPoint = output.pointA;
+	if ( output.distance == 0.0f )
+	{
+		b2Vec2 localCentroid = b2GetShapeCentroid( shape );
+		closestPoint = b2TransformPoint( transform, localCentroid );
+	}
+
+	b2Vec2 direction = b2Sub( closestPoint, explosionContext->position );
+	if ( b2LengthSquared( direction ) > 100.0f * FLT_EPSILON * FLT_EPSILON )
+	{
+		direction = b2Normalize( direction );
+	}
+	else
+	{
+		direction = (b2Vec2){ 1.0f, 0.0f };
+	}
+
+	b2Vec2 localLine = b2InvRotateVector( transform.q, b2LeftPerp( direction ) );
+	float perimeter = b2GetShapeProjectedPerimeter( shape, localLine );
+	float scale = 1.0f;
+	if ( output.distance > radius && falloff > 0.0f )
+	{
+		scale = b2ClampFloat( ( radius + falloff - output.distance ) / falloff, 0.0f, 1.0f );
+	}
+
+	float magnitude = explosionContext->impulsePerLength * perimeter * scale;
+	b2Vec2 impulse = b2MulSV( magnitude, direction );
+
+	int localIndex = body->localIndex;
+	b2SolverSet* set = b2SolverSetArray_Get( &world->solverSets, b2_awakeSet );
+	b2BodyState* state = b2BodyStateArray_Get( &set->bodyStates, localIndex );
+	b2BodySim* bodySim = b2BodySimArray_Get( &set->bodySims, localIndex );
+	state->linearVelocity = b2MulAdd( state->linearVelocity, bodySim->invMass, impulse );
+	state->angularVelocity += bodySim->invInertia * b2Cross( b2Sub( closestPoint, bodySim->center ), impulse );
+
+	return true;
+}
+
+void b2World_Explode( b2WorldId worldId, const b2ExplosionDef* explosionDef )
+{
+	uint64_t maskBits = explosionDef->maskBits;
+	b2Vec2 position = explosionDef->position;
+	float radius = explosionDef->radius;
+	float falloff = explosionDef->falloff;
+	float impulsePerLength = explosionDef->impulsePerLength;
+
+	B2_ASSERT( b2IsValidVec2( position ) );
+	B2_ASSERT( b2IsValidFloat( radius ) && radius >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( falloff ) && falloff >= 0.0f );
+	B2_ASSERT( b2IsValidFloat( impulsePerLength ) );
+
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	struct ExplosionContext explosionContext = { world, position, radius, falloff, impulsePerLength };
+
+	b2AABB aabb;
+	aabb.lowerBound.x = position.x - ( radius + falloff );
+	aabb.lowerBound.y = position.y - ( radius + falloff );
+	aabb.upperBound.x = position.x + ( radius + falloff );
+	aabb.upperBound.y = position.y + ( radius + falloff );
+
+	b2DynamicTree_Query( world->broadPhase.trees + b2_dynamicBody, aabb, maskBits, ExplosionCallback, &explosionContext );
+}
+
+void b2World_RebuildStaticTree( b2WorldId worldId )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	B2_ASSERT( world->locked == false );
+	if ( world->locked )
+	{
+		return;
+	}
+
+	b2DynamicTree* staticTree = world->broadPhase.trees + b2_staticBody;
+	b2DynamicTree_Rebuild( staticTree, true );
+}
+
+void b2World_EnableSpeculative( b2WorldId worldId, bool flag )
+{
+	b2World* world = b2GetWorldFromId( worldId );
+	world->enableSpeculative = flag;
+}
+
+#if B2_VALIDATE
+// When validating islands ids I have to compare the root island
+// ids because islands are not merged until the next time step.
+static int b2GetRootIslandId( b2World* world, int islandId )
+{
+	if ( islandId == B2_NULL_INDEX )
+	{
+		return B2_NULL_INDEX;
+	}
+
+	b2Island* island = b2IslandArray_Get( &world->islands, islandId );
+
+	int rootId = islandId;
+	b2Island* rootIsland = island;
+	while ( rootIsland->parentIsland != B2_NULL_INDEX )
+	{
+		b2Island* parent = b2IslandArray_Get( &world->islands, rootIsland->parentIsland );
+		rootId = rootIsland->parentIsland;
+		rootIsland = parent;
+	}
+
+	return rootId;
+}
+
+// This validates island graph connectivity for each body
+void b2ValidateConnectivity( b2World* world )
+{
+	b2Body* bodies = world->bodies.data;
+	int bodyCapacity = world->bodies.count;
+
+	for ( int bodyIndex = 0; bodyIndex < bodyCapacity; ++bodyIndex )
+	{
+		b2Body* body = bodies + bodyIndex;
+		if ( body->id == B2_NULL_INDEX )
+		{
+			b2ValidateFreeId( &world->bodyIdPool, bodyIndex );
+			continue;
+		}
+
+		b2ValidateUsedId( &world->bodyIdPool, bodyIndex );
+
+		B2_ASSERT( bodyIndex == body->id );
+
+		// Need to get the root island because islands are not merged until the next time step
+		int bodyIslandId = b2GetRootIslandId( world, body->islandId );
+		int bodySetIndex = body->setIndex;
+
+		int contactKey = body->headContactKey;
+		while ( contactKey != B2_NULL_INDEX )
+		{
+			int contactId = contactKey >> 1;
+			int edgeIndex = contactKey & 1;
+
+			b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+
+			bool touching = ( contact->flags & b2_contactTouchingFlag ) != 0;
+			if ( touching )
+			{
+				if ( bodySetIndex != b2_staticSet )
+				{
+					int contactIslandId = b2GetRootIslandId( world, contact->islandId );
+					B2_ASSERT( contactIslandId == bodyIslandId );
+				}
+			}
+			else
+			{
+				B2_ASSERT( contact->islandId == B2_NULL_INDEX );
+			}
+
+			contactKey = contact->edges[edgeIndex].nextKey;
+		}
+
+		int jointKey = body->headJointKey;
+		while ( jointKey != B2_NULL_INDEX )
+		{
+			int jointId = jointKey >> 1;
+			int edgeIndex = jointKey & 1;
+
+			b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+
+			int otherEdgeIndex = edgeIndex ^ 1;
+
+			b2Body* otherBody = b2BodyArray_Get( &world->bodies, joint->edges[otherEdgeIndex].bodyId );
+
+			if ( bodySetIndex == b2_disabledSet || otherBody->setIndex == b2_disabledSet )
+			{
+				B2_ASSERT( joint->islandId == B2_NULL_INDEX );
+			}
+			else if ( bodySetIndex == b2_staticSet )
+			{
+				if ( otherBody->setIndex == b2_staticSet )
+				{
+					B2_ASSERT( joint->islandId == B2_NULL_INDEX );
+				}
+			}
+			else
+			{
+				int jointIslandId = b2GetRootIslandId( world, joint->islandId );
+				B2_ASSERT( jointIslandId == bodyIslandId );
+			}
+
+			jointKey = joint->edges[edgeIndex].nextKey;
+		}
+	}
+}
+
+// Validates solver sets, but not island connectivity
+void b2ValidateSolverSets( b2World* world )
+{
+	B2_ASSERT( b2GetIdCapacity( &world->bodyIdPool ) == world->bodies.count );
+	B2_ASSERT( b2GetIdCapacity( &world->contactIdPool ) == world->contacts.count );
+	B2_ASSERT( b2GetIdCapacity( &world->jointIdPool ) == world->joints.count );
+	B2_ASSERT( b2GetIdCapacity( &world->islandIdPool ) == world->islands.count );
+	B2_ASSERT( b2GetIdCapacity( &world->solverSetIdPool ) == world->solverSets.count );
+
+	int activeSetCount = 0;
+	int totalBodyCount = 0;
+	int totalJointCount = 0;
+	int totalContactCount = 0;
+	int totalIslandCount = 0;
+
+	// Validate all solver sets
+	int setCount = world->solverSets.count;
+	for ( int setIndex = 0; setIndex < setCount; ++setIndex )
+	{
+		b2SolverSet* set = world->solverSets.data + setIndex;
+		if ( set->setIndex != B2_NULL_INDEX )
+		{
+			activeSetCount += 1;
+
+			if ( setIndex == b2_staticSet )
+			{
+				B2_ASSERT( set->contactSims.count == 0 );
+				B2_ASSERT( set->islandSims.count == 0 );
+				B2_ASSERT( set->bodyStates.count == 0 );
+			}
+			else if ( setIndex == b2_awakeSet )
+			{
+				B2_ASSERT( set->bodySims.count == set->bodyStates.count );
+				B2_ASSERT( set->jointSims.count == 0 );
+			}
+			else if ( setIndex == b2_disabledSet )
+			{
+				B2_ASSERT( set->islandSims.count == 0 );
+				B2_ASSERT( set->bodyStates.count == 0 );
+			}
+			else
+			{
+				B2_ASSERT( set->bodyStates.count == 0 );
+			}
+
+			// Validate bodies
+			{
+				b2Body* bodies = world->bodies.data;
+				B2_ASSERT( set->bodySims.count >= 0 );
+				totalBodyCount += set->bodySims.count;
+				for ( int i = 0; i < set->bodySims.count; ++i )
+				{
+					b2BodySim* bodySim = set->bodySims.data + i;
+
+					int bodyId = bodySim->bodyId;
+					B2_ASSERT( 0 <= bodyId && bodyId < world->bodies.count );
+					b2Body* body = bodies + bodyId;
+					B2_ASSERT( body->setIndex == setIndex );
+					B2_ASSERT( body->localIndex == i );
+					B2_ASSERT( body->generation == body->generation );
+
+					if ( setIndex == b2_disabledSet )
+					{
+						B2_ASSERT( body->headContactKey == B2_NULL_INDEX );
+					}
+
+					// Validate body shapes
+					int prevShapeId = B2_NULL_INDEX;
+					int shapeId = body->headShapeId;
+					while ( shapeId != B2_NULL_INDEX )
+					{
+						b2Shape* shape = b2ShapeArray_Get( &world->shapes, shapeId );
+						B2_ASSERT( shape->id == shapeId );
+						B2_ASSERT( shape->prevShapeId == prevShapeId );
+
+						if ( setIndex == b2_disabledSet )
+						{
+							B2_ASSERT( shape->proxyKey == B2_NULL_INDEX );
+						}
+						else if ( setIndex == b2_staticSet )
+						{
+							B2_ASSERT( B2_PROXY_TYPE( shape->proxyKey ) == b2_staticBody );
+						}
+						else
+						{
+							b2BodyType proxyType = B2_PROXY_TYPE( shape->proxyKey );
+							B2_ASSERT( proxyType == b2_kinematicBody || proxyType == b2_dynamicBody );
+						}
+
+						prevShapeId = shapeId;
+						shapeId = shape->nextShapeId;
+					}
+
+					// Validate body contacts
+					int contactKey = body->headContactKey;
+					while ( contactKey != B2_NULL_INDEX )
+					{
+						int contactId = contactKey >> 1;
+						int edgeIndex = contactKey & 1;
+
+						b2Contact* contact = b2ContactArray_Get( &world->contacts, contactId );
+						B2_ASSERT( contact->setIndex != b2_staticSet );
+						B2_ASSERT( contact->edges[0].bodyId == bodyId || contact->edges[1].bodyId == bodyId );
+						contactKey = contact->edges[edgeIndex].nextKey;
+					}
+
+					// Validate body joints
+					int jointKey = body->headJointKey;
+					while ( jointKey != B2_NULL_INDEX )
+					{
+						int jointId = jointKey >> 1;
+						int edgeIndex = jointKey & 1;
+
+						b2Joint* joint = b2JointArray_Get( &world->joints, jointId );
+
+						int otherEdgeIndex = edgeIndex ^ 1;
+
+						b2Body* otherBody = b2BodyArray_Get( &world->bodies, joint->edges[otherEdgeIndex].bodyId );
+
+						if ( setIndex == b2_disabledSet || otherBody->setIndex == b2_disabledSet )
+						{
+							B2_ASSERT( joint->setIndex == b2_disabledSet );
+						}
+						else if ( setIndex == b2_staticSet && otherBody->setIndex == b2_staticSet )
+						{
+							B2_ASSERT( joint->setIndex == b2_staticSet );
+						}
+						else if ( setIndex == b2_awakeSet )
+						{
+							B2_ASSERT( joint->setIndex == b2_awakeSet );
+						}
+						else if ( setIndex >= b2_firstSleepingSet )
+						{
+							B2_ASSERT( joint->setIndex == setIndex );
+						}
+
+						b2JointSim* jointSim = b2GetJointSim( world, joint );
+						B2_ASSERT( jointSim->jointId == jointId );
+						B2_ASSERT( jointSim->bodyIdA == joint->edges[0].bodyId );
+						B2_ASSERT( jointSim->bodyIdB == joint->edges[1].bodyId );
+
+						jointKey = joint->edges[edgeIndex].nextKey;
+					}
+				}
+			}
+
+			// Validate contacts
+			{
+				B2_ASSERT( set->contactSims.count >= 0 );
+				totalContactCount += set->contactSims.count;
+				for ( int i = 0; i < set->contactSims.count; ++i )
+				{
+					b2ContactSim* contactSim = set->contactSims.data + i;
+					b2Contact* contact = b2ContactArray_Get( &world->contacts, contactSim->contactId );
+					if ( setIndex == b2_awakeSet )
+					{
+						// contact should be non-touching if awake
+						// or it could be this contact hasn't been transferred yet
+						B2_ASSERT( contactSim->manifold.pointCount == 0 ||
+								   ( contactSim->simFlags & b2_simStartedTouching ) != 0 );
+					}
+					B2_ASSERT( contact->setIndex == setIndex );
+					B2_ASSERT( contact->colorIndex == B2_NULL_INDEX );
+					B2_ASSERT( contact->localIndex == i );
+				}
+			}
+
+			// Validate joints
+			{
+				B2_ASSERT( set->jointSims.count >= 0 );
+				totalJointCount += set->jointSims.count;
+				for ( int i = 0; i < set->jointSims.count; ++i )
+				{
+					b2JointSim* jointSim = set->jointSims.data + i;
+					b2Joint* joint = b2JointArray_Get( &world->joints, jointSim->jointId );
+					B2_ASSERT( joint->setIndex == setIndex );
+					B2_ASSERT( joint->colorIndex == B2_NULL_INDEX );
+					B2_ASSERT( joint->localIndex == i );
+				}
+			}
+
+			// Validate islands
+			{
+				B2_ASSERT( set->islandSims.count >= 0 );
+				totalIslandCount += set->islandSims.count;
+				for ( int i = 0; i < set->islandSims.count; ++i )
+				{
+					b2IslandSim* islandSim = set->islandSims.data + i;
+					b2Island* island = b2IslandArray_Get( &world->islands, islandSim->islandId );
+					B2_ASSERT( island->setIndex == setIndex );
+					B2_ASSERT( island->localIndex == i );
+				}
+			}
+		}
+		else
+		{
+			B2_ASSERT( set->bodySims.count == 0 );
+			B2_ASSERT( set->contactSims.count == 0 );
+			B2_ASSERT( set->jointSims.count == 0 );
+			B2_ASSERT( set->islandSims.count == 0 );
+			B2_ASSERT( set->bodyStates.count == 0 );
+		}
+	}
+
+	int setIdCount = b2GetIdCount( &world->solverSetIdPool );
+	B2_ASSERT( activeSetCount == setIdCount );
+
+	int bodyIdCount = b2GetIdCount( &world->bodyIdPool );
+	B2_ASSERT( totalBodyCount == bodyIdCount );
+
+	int islandIdCount = b2GetIdCount( &world->islandIdPool );
+	B2_ASSERT( totalIslandCount == islandIdCount );
+
+	// Validate constraint graph
+	for ( int colorIndex = 0; colorIndex < B2_GRAPH_COLOR_COUNT; ++colorIndex )
+	{
+		b2GraphColor* color = world->constraintGraph.colors + colorIndex;
+		{
+			B2_ASSERT( color->contactSims.count >= 0 );
+			totalContactCount += color->contactSims.count;
+			for ( int i = 0; i < color->contactSims.count; ++i )
+			{
+				b2ContactSim* contactSim = color->contactSims.data + i;
+				b2Contact* contact = b2ContactArray_Get( &world->contacts, contactSim->contactId );
+				// contact should be touching in the constraint graph or awaiting transfer to non-touching
+				B2_ASSERT( contactSim->manifold.pointCount > 0 ||
+						   ( contactSim->simFlags & ( b2_simStoppedTouching | b2_simDisjoint ) ) != 0 );
+				B2_ASSERT( contact->setIndex == b2_awakeSet );
+				B2_ASSERT( contact->colorIndex == colorIndex );
+				B2_ASSERT( contact->localIndex == i );
+
+				int bodyIdA = contact->edges[0].bodyId;
+				int bodyIdB = contact->edges[1].bodyId;
+
+				if ( colorIndex < B2_OVERFLOW_INDEX )
+				{
+					b2Body* bodyA = b2BodyArray_Get( &world->bodies, bodyIdA );
+					b2Body* bodyB = b2BodyArray_Get( &world->bodies, bodyIdB );
+					B2_ASSERT( b2GetBit( &color->bodySet, bodyIdA ) == ( bodyA->type != b2_staticBody ) );
+					B2_ASSERT( b2GetBit( &color->bodySet, bodyIdB ) == ( bodyB->type != b2_staticBody ) );
+				}
+			}
+		}
+
+		{
+			B2_ASSERT( color->jointSims.count >= 0 );
+			totalJointCount += color->jointSims.count;
+			for ( int i = 0; i < color->jointSims.count; ++i )
+			{
+				b2JointSim* jointSim = color->jointSims.data + i;
+				b2Joint* joint = b2JointArray_Get( &world->joints, jointSim->jointId );
+				B2_ASSERT( joint->setIndex == b2_awakeSet );
+				B2_ASSERT( joint->colorIndex == colorIndex );
+				B2_ASSERT( joint->localIndex == i );
+
+				int bodyIdA = joint->edges[0].bodyId;
+				int bodyIdB = joint->edges[1].bodyId;
+
+				if ( colorIndex < B2_OVERFLOW_INDEX )
+				{
+					b2Body* bodyA = b2BodyArray_Get( &world->bodies, bodyIdA );
+					b2Body* bodyB = b2BodyArray_Get( &world->bodies, bodyIdB );
+					B2_ASSERT( b2GetBit( &color->bodySet, bodyIdA ) == ( bodyA->type != b2_staticBody ) );
+					B2_ASSERT( b2GetBit( &color->bodySet, bodyIdB ) == ( bodyB->type != b2_staticBody ) );
+				}
+			}
+		}
+	}
+
+	int contactIdCount = b2GetIdCount( &world->contactIdPool );
+	B2_ASSERT( totalContactCount == contactIdCount );
+	B2_ASSERT( totalContactCount == (int)world->broadPhase.pairSet.count );
+
+	int jointIdCount = b2GetIdCount( &world->jointIdPool );
+	B2_ASSERT( totalJointCount == jointIdCount );
+
+// Validate shapes
+// This is very slow on compounds
+#if 0
+	int shapeCapacity = b2Array(world->shapeArray).count;
+	for (int shapeIndex = 0; shapeIndex < shapeCapacity; shapeIndex += 1)
+	{
+		b2Shape* shape = world->shapeArray + shapeIndex;
+		if (shape->id != shapeIndex)
+		{
+			continue;
+		}
+
+		B2_ASSERT(0 <= shape->bodyId && shape->bodyId < b2Array(world->bodyArray).count);
+
+		b2Body* body = world->bodyArray + shape->bodyId;
+		B2_ASSERT(0 <= body->setIndex && body->setIndex < b2Array(world->solverSetArray).count);
+
+		b2SolverSet* set = world->solverSetArray + body->setIndex;
+		B2_ASSERT(0 <= body->localIndex && body->localIndex < set->sims.count);
+
+		b2BodySim* bodySim = set->sims.data + body->localIndex;
+		B2_ASSERT(bodySim->bodyId == shape->bodyId);
+
+		bool found = false;
+		int shapeCount = 0;
+		int index = body->headShapeId;
+		while (index != B2_NULL_INDEX)
+		{
+			b2CheckId(world->shapeArray, index);
+			b2Shape* s = world->shapeArray + index;
+			if (index == shapeIndex)
+			{
+				found = true;
+			}
+
+			index = s->nextShapeId;
+			shapeCount += 1;
+		}
+
+		B2_ASSERT(found);
+		B2_ASSERT(shapeCount == body->shapeCount);
+	}
+#endif
+}
+
+// Validate contact touching status.
+void b2ValidateContacts( b2World* world )
+{
+	int contactCount = world->contacts.count;
+	B2_ASSERT( contactCount == b2GetIdCapacity( &world->contactIdPool ) );
+	int allocatedContactCount = 0;
+
+	for ( int contactIndex = 0; contactIndex < contactCount; ++contactIndex )
+	{
+		b2Contact* contact = b2ContactArray_Get( &world->contacts, contactIndex );
+		if ( contact->contactId == B2_NULL_INDEX )
+		{
+			continue;
+		}
+
+		B2_ASSERT( contact->contactId == contactIndex );
+
+		allocatedContactCount += 1;
+
+		bool touching = ( contact->flags & b2_contactTouchingFlag ) != 0;
+
+		int setId = contact->setIndex;
+
+		if ( setId == b2_awakeSet )
+		{
+			// If touching and not a sensor
+			if ( touching )
+			{
+				B2_ASSERT( 0 <= contact->colorIndex && contact->colorIndex < B2_GRAPH_COLOR_COUNT );
+			}
+			else
+			{
+				B2_ASSERT( contact->colorIndex == B2_NULL_INDEX );
+			}
+		}
+		else if ( setId >= b2_firstSleepingSet )
+		{
+			// Only touching contacts allowed in a sleeping set
+			B2_ASSERT( touching == true );
+		}
+		else
+		{
+			// Sleeping and non-touching contacts or sensor contacts belong in the disabled set
+			B2_ASSERT( touching == false && setId == b2_disabledSet );
+		}
+
+		b2ContactSim* contactSim = b2GetContactSim( world, contact );
+		B2_ASSERT( contactSim->contactId == contactIndex );
+		B2_ASSERT( contactSim->bodyIdA == contact->edges[0].bodyId );
+		B2_ASSERT( contactSim->bodyIdB == contact->edges[1].bodyId );
+
+		// Sim touching is true for solid and sensor contacts
+		bool simTouching = ( contactSim->simFlags & b2_simTouchingFlag ) != 0;
+		B2_ASSERT( touching == simTouching );
+
+		B2_ASSERT( 0 <= contactSim->manifold.pointCount && contactSim->manifold.pointCount <= 2 );
+	}
+
+	int contactIdCount = b2GetIdCount( &world->contactIdPool );
+	B2_ASSERT( allocatedContactCount == contactIdCount );
+}
+
+#else
+
+void b2ValidateConnectivity( b2World* world )
+{
+	B2_UNUSED( world );
+}
+
+void b2ValidateSolverSets( b2World* world )
+{
+	B2_UNUSED( world );
+}
+
+void b2ValidateContacts( b2World* world )
+{
+	B2_UNUSED( world );
+}
+
+#endif
diff --git a/src/vendor/box2d/world.h b/src/vendor/box2d/world.h
new file mode 100644
index 0000000..fcc80c5
--- /dev/null
+++ b/src/vendor/box2d/world.h
@@ -0,0 +1,192 @@
+// SPDX-FileCopyrightText: 2023 Erin Catto
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "array.h"
+#include "bitset.h"
+#include "broad_phase.h"
+#include "constraint_graph.h"
+#include "id_pool.h"
+#include "arena_allocator.h"
+
+#include "box2d/types.h"
+
+enum b2SetType
+{
+	b2_staticSet = 0,
+	b2_disabledSet = 1,
+	b2_awakeSet = 2,
+	b2_firstSleepingSet = 3,
+};
+
+// Per thread task storage
+typedef struct b2TaskContext
+{
+	// These bits align with the b2ConstraintGraph::contactBlocks and signal a change in contact status
+	b2BitSet contactStateBitSet;
+
+	// Used to track bodies with shapes that have enlarged AABBs. This avoids having a bit array
+	// that is very large when there are many static shapes.
+	b2BitSet enlargedSimBitSet;
+
+	// Used to put islands to sleep
+	b2BitSet awakeIslandBitSet;
+
+	// Per worker split island candidate
+	float splitSleepTime;
+	int splitIslandId;
+
+} b2TaskContext;
+
+// The world struct manages all physics entities, dynamic simulation,  and asynchronous queries.
+// The world also contains efficient memory management facilities.
+typedef struct b2World
+{
+	b2ArenaAllocator arena;
+	b2BroadPhase broadPhase;
+	b2ConstraintGraph constraintGraph;
+
+	// The body id pool is used to allocate and recycle body ids. Body ids
+	// provide a stable identifier for users, but incur caches misses when used
+	// to access body data. Aligns with b2Body.
+	b2IdPool bodyIdPool;
+
+	// This is a sparse array that maps body ids to the body data
+	// stored in solver sets. As sims move within a set or across set.
+	// Indices come from id pool.
+	b2BodyArray bodies;
+
+	// Provides free list for solver sets.
+	b2IdPool solverSetIdPool;
+
+	// Solvers sets allow sims to be stored in contiguous arrays. The first
+	// set is all static sims. The second set is active sims. The third set is disabled
+	// sims. The remaining sets are sleeping islands.
+	b2SolverSetArray solverSets;
+
+	// Used to create stable ids for joints
+	b2IdPool jointIdPool;
+
+	// This is a sparse array that maps joint ids to the joint data stored in the constraint graph
+	// or in the solver sets.
+	b2JointArray joints;
+
+	// Used to create stable ids for contacts
+	b2IdPool contactIdPool;
+
+	// This is a sparse array that maps contact ids to the contact data stored in the constraint graph
+	// or in the solver sets.
+	b2ContactArray contacts;
+
+	// Used to create stable ids for islands
+	b2IdPool islandIdPool;
+
+	// This is a sparse array that maps island ids to the island data stored in the solver sets.
+	b2IslandArray islands;
+
+	b2IdPool shapeIdPool;
+	b2IdPool chainIdPool;
+
+	// These are sparse arrays that point into the pools above
+	b2ShapeArray shapes;
+	b2ChainShapeArray chainShapes;
+
+	// This is a dense array of sensor data.
+	b2SensorArray sensors;
+
+	// Per thread storage
+	b2TaskContextArray taskContexts;
+	b2SensorTaskContextArray sensorTaskContexts;
+
+	b2BodyMoveEventArray bodyMoveEvents;
+	b2SensorBeginTouchEventArray sensorBeginEvents;
+	b2ContactBeginTouchEventArray contactBeginEvents;
+
+	// End events are double buffered so that the user doesn't need to flush events
+	b2SensorEndTouchEventArray sensorEndEvents[2];
+	b2ContactEndTouchEventArray contactEndEvents[2];
+	int endEventArrayIndex;
+
+	b2ContactHitEventArray contactHitEvents;
+
+	// Used to track debug draw
+	b2BitSet debugBodySet;
+	b2BitSet debugJointSet;
+	b2BitSet debugContactSet;
+	b2BitSet debugIslandSet;
+
+	// Id that is incremented every time step
+	uint64_t stepIndex;
+
+	// Identify islands for splitting as follows:
+	// - I want to split islands so smaller islands can sleep
+	// - when a body comes to rest and its sleep timer trips, I can look at the island and flag it for splitting
+	//   if it has removed constraints
+	// - islands that have removed constraints must be put split first because I don't want to wake bodies incorrectly
+	// - otherwise I can use the awake islands that have bodies wanting to sleep as the splitting candidates
+	// - if no bodies want to sleep then there is no reason to perform island splitting
+	int splitIslandId;
+
+	b2Vec2 gravity;
+	float hitEventThreshold;
+	float restitutionThreshold;
+	float maxLinearSpeed;
+	float maxContactPushSpeed;
+	float contactHertz;
+	float contactDampingRatio;
+	float jointHertz;
+	float jointDampingRatio;
+
+	b2FrictionCallback* frictionCallback;
+	b2RestitutionCallback* restitutionCallback;
+
+	uint16_t generation;
+
+	b2Profile profile;
+
+	b2PreSolveFcn* preSolveFcn;
+	void* preSolveContext;
+
+	b2CustomFilterFcn* customFilterFcn;
+	void* customFilterContext;
+
+	int workerCount;
+	b2EnqueueTaskCallback* enqueueTaskFcn;
+	b2FinishTaskCallback* finishTaskFcn;
+	void* userTaskContext;
+	void* userTreeTask;
+
+	void* userData;
+
+	// Remember type step used for reporting forces and torques
+	float inv_h;
+
+	int activeTaskCount;
+	int taskCount;
+
+	uint16_t worldId;
+
+	bool enableSleep;
+	bool locked;
+	bool enableWarmStarting;
+	bool enableContinuous;
+	bool enableSpeculative;
+	bool inUse;
+} b2World;
+
+b2World* b2GetWorldFromId( b2WorldId id );
+b2World* b2GetWorld( int index );
+b2World* b2GetWorldLocked( int index );
+
+void b2ValidateConnectivity( b2World* world );
+void b2ValidateSolverSets( b2World* world );
+void b2ValidateContacts( b2World* world );
+
+B2_ARRAY_INLINE( b2BodyMoveEvent, b2BodyMoveEvent )
+B2_ARRAY_INLINE( b2ContactBeginTouchEvent, b2ContactBeginTouchEvent )
+B2_ARRAY_INLINE( b2ContactEndTouchEvent, b2ContactEndTouchEvent )
+B2_ARRAY_INLINE( b2ContactHitEvent, b2ContactHitEvent )
+B2_ARRAY_INLINE( b2SensorBeginTouchEvent, b2SensorBeginTouchEvent )
+B2_ARRAY_INLINE( b2SensorEndTouchEvent, b2SensorEndTouchEvent )
+B2_ARRAY_INLINE( b2TaskContext, b2TaskContext )