Made Really Fast

This commit is contained in:
OusmBlueNinja 2025-04-16 22:24:46 -05:00
parent aca2790710
commit 3c405aad55
4 changed files with 373 additions and 0 deletions

21
.vscode/c_cpp_properties.json vendored Normal file
View File

@ -0,0 +1,21 @@
{
"configurations": [
{
"name": "Win32",
"includePath": [
"${default}",
"${workspaceFolder}/**"
],
"defines": [
"_DEBUG",
"UNICODE",
"_UNICODE"
],
"windowsSdkVersion": "10.0.22621.0",
"compilerPath": "C:/msys64/mingw64/bin/g++.exe",
"cStandard": "c23",
"cppStandard": "c++23"
}
],
"version": 4
}

164
include/vector.h Normal file
View File

@ -0,0 +1,164 @@
// ultra_fast_vector4.h
#pragma once
#include <memory>
#include <type_traits>
#include <cassert>
#include <cstdlib> // malloc, realloc, free
#include <new> // std::bad_alloc
// GCC/Clang helpers for inlining and cold paths
#if defined(__GNUC__)
#define UFV_ALWAYS_INLINE inline __attribute__((always_inline))
#define UFV_COLD __attribute__((cold))
#else
#define UFV_ALWAYS_INLINE inline
#define UFV_COLD
#endif
template<typename T, typename Alloc = std::allocator<T>>
class Vector {
static_assert(!std::is_const_v<T>, "Vector<T> cannot hold const T");
// Select POD fast path only for trivially copyable T with default allocator
static constexpr bool POD_PATH =
std::is_trivially_copyable_v<T> &&
std::is_same_v<Alloc, std::allocator<T>>;
using alloc_traits = std::allocator_traits<Alloc>;
public:
using value_type = T;
using allocator_type = Alloc;
using size_type = size_t;
using reference = T&;
using const_reference = const T&;
using iterator = T*;
using const_iterator = const T*;
Vector() noexcept(std::is_nothrow_default_constructible_v<Alloc>)
: _b(nullptr), _e(nullptr), _cap(nullptr), _alloc() {}
explicit Vector(const Alloc& a) noexcept
: _b(nullptr), _e(nullptr), _cap(nullptr), _alloc(a) {}
~Vector() noexcept {
if constexpr (POD_PATH) {
std::free(_b);
} else {
clear();
if (_b) alloc_traits::deallocate(_alloc, _b, capacity());
}
}
UFV_ALWAYS_INLINE size_type size() const noexcept { return _e - _b; }
UFV_ALWAYS_INLINE size_type capacity() const noexcept { return _cap - _b; }
UFV_ALWAYS_INLINE bool empty() const noexcept { return _b == _e; }
UFV_ALWAYS_INLINE iterator begin() noexcept { return _b; }
UFV_ALWAYS_INLINE const_iterator begin() const noexcept { return _b; }
UFV_ALWAYS_INLINE iterator end() noexcept { return _e; }
UFV_ALWAYS_INLINE const_iterator end() const noexcept { return _e; }
UFV_ALWAYS_INLINE reference operator[](size_type i) noexcept { return _b[i]; }
UFV_ALWAYS_INLINE const_reference operator[](size_type i) const noexcept { return _b[i]; }
UFV_ALWAYS_INLINE reference at(size_type i) {
if (i >= size()) throw std::out_of_range("Vector::at");
return _b[i];
}
UFV_ALWAYS_INLINE const_reference at(size_type i) const {
if (i >= size()) throw std::out_of_range("Vector::at");
return _b[i];
}
UFV_ALWAYS_INLINE void clear() noexcept {
if constexpr (!std::is_trivially_destructible_v<T>) {
while (_e != _b) alloc_traits::destroy(_alloc, --_e);
} else {
_e = _b;
}
}
// Cold path: only taken on growth
UFV_COLD void reserve_grow(size_type minNeeded) {
size_type oldSize = size();
if constexpr (POD_PATH) {
size_type newCap = capacity() ? capacity() * 2 : 1;
while (newCap < minNeeded) newCap <<= 1;
void* blk = _b
? std::realloc(_b, newCap * sizeof(T))
: std::malloc (newCap * sizeof(T));
if (!blk) throw std::bad_alloc();
_b = static_cast<T*>(blk);
_e = _b + oldSize;
_cap = _b + newCap;
} else {
size_type newCap = capacity() ? capacity() * 2 : 1;
while (newCap < minNeeded) newCap <<= 1;
T* newB = alloc_traits::allocate(_alloc, newCap);
if constexpr (std::is_nothrow_move_constructible_v<T>) {
std::uninitialized_move(_b, _e, newB);
} else {
std::uninitialized_copy(_b, _e, newB);
}
clear();
if (_b) alloc_traits::deallocate(_alloc, _b, capacity());
_b = newB;
_e = newB + oldSize;
_cap = newB + newCap;
}
}
UFV_ALWAYS_INLINE void reserve(size_type n) {
if (n > capacity()) reserve_grow(n);
}
UFV_ALWAYS_INLINE void shrink_to_fit() {
reserve(size());
}
UFV_ALWAYS_INLINE void push_back(const T& v) {
if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1);
if constexpr (POD_PATH) {
*_e = v;
} else {
alloc_traits::construct(_alloc, _e, v);
}
++_e;
}
UFV_ALWAYS_INLINE void push_back(T&& v) {
if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1);
if constexpr (POD_PATH) {
*_e = static_cast<T&&>(v);
} else {
alloc_traits::construct(_alloc, _e, std::move(v));
}
++_e;
}
template<typename... Args>
UFV_ALWAYS_INLINE reference emplace_back(Args&&... args) {
if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1);
if constexpr (POD_PATH) {
*_e = T(std::forward<Args>(args)...);
} else {
alloc_traits::construct(_alloc, _e, std::forward<Args>(args)...);
}
return *(_e++);
}
UFV_ALWAYS_INLINE void pop_back() noexcept {
assert(_e > _b);
--_e;
if constexpr (!std::is_trivially_destructible_v<T>)
alloc_traits::destroy(_alloc, _e);
}
private:
T* _b;
T* _e;
T* _cap;
Alloc _alloc;
};

37
remake.yaml Normal file
View File

@ -0,0 +1,37 @@
# Remake Build Configuration
# Source folder
src_dirs:
- src
# Include directories
include_dirs:
- include
# Library paths
lib_dirs: []
# Output paths
build_dir: build
target: build/app.exe
log_file: remake/build.log
# C compiler and flags
cc: gcc
cflags:
- -std=c99
- -Wall
# C++ compiler and flags
cxx: g++
cxxflags:
- -std=c++20
- -Wall
- -O3
# Auto-detect these libraries (e.g. "glfw3" or "opengl32" )
auto_libs: []
# Auto-detect headers from these include folders
auto_includes: []

151
src/test.cpp Normal file
View File

@ -0,0 +1,151 @@
#include <iostream>
#include <vector>
#include <string>
#include <chrono>
#include <iomanip>
#include "vector.h"
using namespace std;
using namespace std::chrono;
static constexpr size_t N = 100'000'000;
static constexpr int R = 3;
constexpr const char* COLOR_RESET = "\033[0m";
constexpr const char* COLOR_RED = "\033[31m";
constexpr const char* COLOR_GREEN = "\033[32m";
template<typename F>
double run_bench(F&& fn) {
auto start = steady_clock::now();
fn();
auto end = steady_clock::now();
return duration_cast<duration<double, milli>>(end - start).count();
}
template<typename F>
double run_avg(F&& fn) {
double total = 0.0;
for (int i = 0; i < R; ++i) {
total += run_bench(fn);
}
return total / R;
}
void print_test(const std::string& name, double std_t, double fast_t) {
double speedup = std_t / fast_t;
bool passed = fast_t < std_t;
const char* status_col = passed ? COLOR_GREEN : COLOR_RED;
const char* status_str = passed ? "[PASS]" : "[FAIL]";
cout << status_col << status_str << COLOR_RESET
<< ' ' << std::left << std::setw(25) << name
<< " | std: " << std::right << std::setw(7) << fixed << setprecision(2) << std_t << "ms"
<< " | fast: " << setw(7) << fast_t << "ms"
<< " | x" << status_col << setw(5) << fixed << setprecision(2) << speedup << COLOR_RESET
<< '\n';
}
struct S {
int x, y;
S(int a, int b): x(a), y(b) {}
S(const S& o): x(o.x), y(o.y) {}
S(S&& o) noexcept: x(o.x), y(o.y) {}
};
void TestIntPush_NoReserve() {
auto std_fn = []() {
vector<int> v;
for (size_t i = 0; i < N; ++i) v.push_back(int(i));
};
auto fast_fn = []() {
Vector<int> v;
for (size_t i = 0; i < N; ++i) v.push_back(int(i));
};
double t1 = run_avg(std_fn);
double t2 = run_avg(fast_fn);
print_test("IntPush_NoReserve", t1, t2);
}
void TestIntPush_WithReserve() {
auto std_fn = []() {
vector<int> v; v.reserve(N);
for (size_t i = 0; i < N; ++i) v.push_back(int(i));
};
auto fast_fn = []() {
Vector<int> v; v.reserve(N);
for (size_t i = 0; i < N; ++i) v.push_back(int(i));
};
double t1 = run_avg(std_fn);
double t2 = run_avg(fast_fn);
print_test("IntPush_WithReserve", t1, t2);
}
void TestIntEmplace_NoReserve() {
auto std_fn = []() {
vector<int> v;
for (size_t i = 0; i < N; ++i) v.emplace_back(int(i));
};
auto fast_fn = []() {
Vector<int> v;
for (size_t i = 0; i < N; ++i) v.emplace_back(int(i));
};
double t1 = run_avg(std_fn);
double t2 = run_avg(fast_fn);
print_test("IntEmplace_NoReserve", t1, t2);
}
void TestIntEmplace_WithReserve() {
auto std_fn = []() {
vector<int> v; v.reserve(N);
for (size_t i = 0; i < N; ++i) v.emplace_back(int(i));
};
auto fast_fn = []() {
Vector<int> v; v.reserve(N);
for (size_t i = 0; i < N; ++i) v.emplace_back(int(i));
};
double t1 = run_avg(std_fn);
double t2 = run_avg(fast_fn);
print_test("IntEmplace_WithReserve", t1, t2);
}
void TestNonTrivialPush_NoReserve() {
auto std_fn = []() {
vector<S> v;
for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i)));
};
auto fast_fn = []() {
Vector<S> v;
for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i)));
};
double t1 = run_avg(std_fn);
double t2 = run_avg(fast_fn);
print_test("NonTrivialPush_NoReserve", t1, t2);
}
void TestNonTrivialPush_WithReserve() {
auto std_fn = []() {
vector<S> v; v.reserve(N/10);
for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i)));
};
auto fast_fn = []() {
Vector<S> v; v.reserve(N/10);
for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i)));
};
double t1 = run_avg(std_fn);
double t2 = run_avg(fast_fn);
print_test("NonTrivialPush_WithReserve", t1, t2);
}
int main() {
cout << "\n=== Running Speed Tests (N=" << N << ", runs=" << R << ") ===\n\n";
TestIntPush_NoReserve();
TestIntPush_WithReserve();
TestIntEmplace_NoReserve();
TestIntEmplace_WithReserve();
TestNonTrivialPush_NoReserve();
TestNonTrivialPush_WithReserve();
cout << "\n";
return 0;
}