diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..90d39a6 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,21 @@ +{ + "configurations": [ + { + "name": "Win32", + "includePath": [ + "${default}", + "${workspaceFolder}/**" + ], + "defines": [ + "_DEBUG", + "UNICODE", + "_UNICODE" + ], + "windowsSdkVersion": "10.0.22621.0", + "compilerPath": "C:/msys64/mingw64/bin/g++.exe", + "cStandard": "c23", + "cppStandard": "c++23" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/include/vector.h b/include/vector.h new file mode 100644 index 0000000..a59d3a9 --- /dev/null +++ b/include/vector.h @@ -0,0 +1,164 @@ +// ultra_fast_vector4.h +#pragma once + +#include <memory> +#include <type_traits> +#include <cassert> +#include <cstdlib> // malloc, realloc, free +#include <new> // std::bad_alloc + +// GCC/Clang helpers for inlining and cold paths +#if defined(__GNUC__) + #define UFV_ALWAYS_INLINE inline __attribute__((always_inline)) + #define UFV_COLD __attribute__((cold)) +#else + #define UFV_ALWAYS_INLINE inline + #define UFV_COLD +#endif + +template<typename T, typename Alloc = std::allocator<T>> +class Vector { + static_assert(!std::is_const_v<T>, "Vector<T> cannot hold const T"); + + // Select POD fast path only for trivially copyable T with default allocator + static constexpr bool POD_PATH = + std::is_trivially_copyable_v<T> && + std::is_same_v<Alloc, std::allocator<T>>; + + using alloc_traits = std::allocator_traits<Alloc>; + +public: + using value_type = T; + using allocator_type = Alloc; + using size_type = size_t; + using reference = T&; + using const_reference = const T&; + using iterator = T*; + using const_iterator = const T*; + + Vector() noexcept(std::is_nothrow_default_constructible_v<Alloc>) + : _b(nullptr), _e(nullptr), _cap(nullptr), _alloc() {} + + explicit Vector(const Alloc& a) noexcept + : _b(nullptr), _e(nullptr), _cap(nullptr), _alloc(a) {} + + ~Vector() noexcept { + if constexpr (POD_PATH) { + std::free(_b); + } else { + clear(); + if (_b) alloc_traits::deallocate(_alloc, _b, capacity()); + } + } + + UFV_ALWAYS_INLINE size_type size() const noexcept { return _e - _b; } + UFV_ALWAYS_INLINE size_type capacity() const noexcept { return _cap - _b; } + UFV_ALWAYS_INLINE bool empty() const noexcept { return _b == _e; } + + UFV_ALWAYS_INLINE iterator begin() noexcept { return _b; } + UFV_ALWAYS_INLINE const_iterator begin() const noexcept { return _b; } + UFV_ALWAYS_INLINE iterator end() noexcept { return _e; } + UFV_ALWAYS_INLINE const_iterator end() const noexcept { return _e; } + + UFV_ALWAYS_INLINE reference operator[](size_type i) noexcept { return _b[i]; } + UFV_ALWAYS_INLINE const_reference operator[](size_type i) const noexcept { return _b[i]; } + + UFV_ALWAYS_INLINE reference at(size_type i) { + if (i >= size()) throw std::out_of_range("Vector::at"); + return _b[i]; + } + UFV_ALWAYS_INLINE const_reference at(size_type i) const { + if (i >= size()) throw std::out_of_range("Vector::at"); + return _b[i]; + } + + UFV_ALWAYS_INLINE void clear() noexcept { + if constexpr (!std::is_trivially_destructible_v<T>) { + while (_e != _b) alloc_traits::destroy(_alloc, --_e); + } else { + _e = _b; + } + } + + // Cold path: only taken on growth + UFV_COLD void reserve_grow(size_type minNeeded) { + size_type oldSize = size(); + if constexpr (POD_PATH) { + size_type newCap = capacity() ? capacity() * 2 : 1; + while (newCap < minNeeded) newCap <<= 1; + void* blk = _b + ? std::realloc(_b, newCap * sizeof(T)) + : std::malloc (newCap * sizeof(T)); + if (!blk) throw std::bad_alloc(); + _b = static_cast<T*>(blk); + _e = _b + oldSize; + _cap = _b + newCap; + } else { + size_type newCap = capacity() ? capacity() * 2 : 1; + while (newCap < minNeeded) newCap <<= 1; + T* newB = alloc_traits::allocate(_alloc, newCap); + if constexpr (std::is_nothrow_move_constructible_v<T>) { + std::uninitialized_move(_b, _e, newB); + } else { + std::uninitialized_copy(_b, _e, newB); + } + clear(); + if (_b) alloc_traits::deallocate(_alloc, _b, capacity()); + _b = newB; + _e = newB + oldSize; + _cap = newB + newCap; + } + } + + UFV_ALWAYS_INLINE void reserve(size_type n) { + if (n > capacity()) reserve_grow(n); + } + + UFV_ALWAYS_INLINE void shrink_to_fit() { + reserve(size()); + } + + UFV_ALWAYS_INLINE void push_back(const T& v) { + if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1); + if constexpr (POD_PATH) { + *_e = v; + } else { + alloc_traits::construct(_alloc, _e, v); + } + ++_e; + } + + UFV_ALWAYS_INLINE void push_back(T&& v) { + if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1); + if constexpr (POD_PATH) { + *_e = static_cast<T&&>(v); + } else { + alloc_traits::construct(_alloc, _e, std::move(v)); + } + ++_e; + } + + template<typename... Args> + UFV_ALWAYS_INLINE reference emplace_back(Args&&... args) { + if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1); + if constexpr (POD_PATH) { + *_e = T(std::forward<Args>(args)...); + } else { + alloc_traits::construct(_alloc, _e, std::forward<Args>(args)...); + } + return *(_e++); + } + + UFV_ALWAYS_INLINE void pop_back() noexcept { + assert(_e > _b); + --_e; + if constexpr (!std::is_trivially_destructible_v<T>) + alloc_traits::destroy(_alloc, _e); + } + +private: + T* _b; + T* _e; + T* _cap; + Alloc _alloc; +}; diff --git a/remake.yaml b/remake.yaml new file mode 100644 index 0000000..3ab3c9c --- /dev/null +++ b/remake.yaml @@ -0,0 +1,37 @@ +# Remake Build Configuration + +# Source folder +src_dirs: + - src + +# Include directories +include_dirs: + - include + +# Library paths +lib_dirs: [] + +# Output paths +build_dir: build +target: build/app.exe +log_file: remake/build.log + + +# C compiler and flags +cc: gcc +cflags: + - -std=c99 + - -Wall + +# C++ compiler and flags +cxx: g++ +cxxflags: + - -std=c++20 + - -Wall + - -O3 + +# Auto-detect these libraries (e.g. "glfw3" or "opengl32" ) +auto_libs: [] + +# Auto-detect headers from these include folders +auto_includes: [] diff --git a/src/test.cpp b/src/test.cpp new file mode 100644 index 0000000..f4c0ce8 --- /dev/null +++ b/src/test.cpp @@ -0,0 +1,151 @@ +#include <iostream> +#include <vector> +#include <string> +#include <chrono> +#include <iomanip> + +#include "vector.h" + +using namespace std; +using namespace std::chrono; + +static constexpr size_t N = 100'000'000; +static constexpr int R = 3; + +constexpr const char* COLOR_RESET = "\033[0m"; +constexpr const char* COLOR_RED = "\033[31m"; +constexpr const char* COLOR_GREEN = "\033[32m"; + +template<typename F> +double run_bench(F&& fn) { + auto start = steady_clock::now(); + fn(); + auto end = steady_clock::now(); + return duration_cast<duration<double, milli>>(end - start).count(); +} + +template<typename F> +double run_avg(F&& fn) { + double total = 0.0; + for (int i = 0; i < R; ++i) { + total += run_bench(fn); + } + return total / R; +} + +void print_test(const std::string& name, double std_t, double fast_t) { + double speedup = std_t / fast_t; + bool passed = fast_t < std_t; + const char* status_col = passed ? COLOR_GREEN : COLOR_RED; + const char* status_str = passed ? "[PASS]" : "[FAIL]"; + + cout << status_col << status_str << COLOR_RESET + << ' ' << std::left << std::setw(25) << name + << " | std: " << std::right << std::setw(7) << fixed << setprecision(2) << std_t << "ms" + << " | fast: " << setw(7) << fast_t << "ms" + << " | x" << status_col << setw(5) << fixed << setprecision(2) << speedup << COLOR_RESET + << '\n'; +} + +struct S { + int x, y; + S(int a, int b): x(a), y(b) {} + S(const S& o): x(o.x), y(o.y) {} + S(S&& o) noexcept: x(o.x), y(o.y) {} +}; + +void TestIntPush_NoReserve() { + auto std_fn = []() { + vector<int> v; + for (size_t i = 0; i < N; ++i) v.push_back(int(i)); + }; + auto fast_fn = []() { + Vector<int> v; + for (size_t i = 0; i < N; ++i) v.push_back(int(i)); + }; + double t1 = run_avg(std_fn); + double t2 = run_avg(fast_fn); + print_test("IntPush_NoReserve", t1, t2); +} + +void TestIntPush_WithReserve() { + auto std_fn = []() { + vector<int> v; v.reserve(N); + for (size_t i = 0; i < N; ++i) v.push_back(int(i)); + }; + auto fast_fn = []() { + Vector<int> v; v.reserve(N); + for (size_t i = 0; i < N; ++i) v.push_back(int(i)); + }; + double t1 = run_avg(std_fn); + double t2 = run_avg(fast_fn); + print_test("IntPush_WithReserve", t1, t2); +} + +void TestIntEmplace_NoReserve() { + auto std_fn = []() { + vector<int> v; + for (size_t i = 0; i < N; ++i) v.emplace_back(int(i)); + }; + auto fast_fn = []() { + Vector<int> v; + for (size_t i = 0; i < N; ++i) v.emplace_back(int(i)); + }; + double t1 = run_avg(std_fn); + double t2 = run_avg(fast_fn); + print_test("IntEmplace_NoReserve", t1, t2); +} + +void TestIntEmplace_WithReserve() { + auto std_fn = []() { + vector<int> v; v.reserve(N); + for (size_t i = 0; i < N; ++i) v.emplace_back(int(i)); + }; + auto fast_fn = []() { + Vector<int> v; v.reserve(N); + for (size_t i = 0; i < N; ++i) v.emplace_back(int(i)); + }; + double t1 = run_avg(std_fn); + double t2 = run_avg(fast_fn); + print_test("IntEmplace_WithReserve", t1, t2); +} + +void TestNonTrivialPush_NoReserve() { + auto std_fn = []() { + vector<S> v; + for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i))); + }; + auto fast_fn = []() { + Vector<S> v; + for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i))); + }; + double t1 = run_avg(std_fn); + double t2 = run_avg(fast_fn); + print_test("NonTrivialPush_NoReserve", t1, t2); +} + +void TestNonTrivialPush_WithReserve() { + auto std_fn = []() { + vector<S> v; v.reserve(N/10); + for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i))); + }; + auto fast_fn = []() { + Vector<S> v; v.reserve(N/10); + for (size_t i = 0; i < N/10; ++i) v.push_back(S(int(i), int(i))); + }; + double t1 = run_avg(std_fn); + double t2 = run_avg(fast_fn); + print_test("NonTrivialPush_WithReserve", t1, t2); +} + +int main() { + cout << "\n=== Running Speed Tests (N=" << N << ", runs=" << R << ") ===\n\n"; + TestIntPush_NoReserve(); + TestIntPush_WithReserve(); + TestIntEmplace_NoReserve(); + TestIntEmplace_WithReserve(); + TestNonTrivialPush_NoReserve(); + TestNonTrivialPush_WithReserve(); + cout << "\n"; + return 0; +}