// ultra_fast_vector4.h
#pragma once

#include <memory>
#include <type_traits>
#include <cassert>
#include <cstdlib>   // malloc, realloc, free
#include <new>       // std::bad_alloc








//? Note: The vector is very inconsistant, but is on average %7 faster



// GCC/Clang helpers for inlining and cold paths
#if defined(__GNUC__)
  #define UFV_ALWAYS_INLINE inline __attribute__((always_inline))
  #define UFV_COLD         __attribute__((cold))
#else
  #define UFV_ALWAYS_INLINE inline
  #define UFV_COLD
#endif

template<typename T, typename Alloc = std::allocator<T>>
class Vector {
    static_assert(!std::is_const_v<T>, "Vector<T> cannot hold const T");

    // Select POD fast path only for trivially copyable T with default allocator
    static constexpr bool POD_PATH =
        std::is_trivially_copyable_v<T> &&
        std::is_same_v<Alloc, std::allocator<T>>;

    using alloc_traits = std::allocator_traits<Alloc>;

public:
    using value_type      = T;
    using allocator_type  = Alloc;
    using size_type       = size_t;
    using reference       = T&;
    using const_reference = const T&;
    using iterator        = T*;
    using const_iterator  = const T*;

    Vector() noexcept(std::is_nothrow_default_constructible_v<Alloc>)
      : _b(nullptr), _e(nullptr), _cap(nullptr), _alloc() {}

    explicit Vector(const Alloc& a) noexcept
      : _b(nullptr), _e(nullptr), _cap(nullptr), _alloc(a) {}

    ~Vector() noexcept {
        if constexpr (POD_PATH) {
            std::free(_b);
        } else {
            clear();
            if (_b) alloc_traits::deallocate(_alloc, _b, capacity());
        }
    }

    UFV_ALWAYS_INLINE size_type size()     const noexcept { return _e - _b; }
    UFV_ALWAYS_INLINE size_type capacity() const noexcept { return _cap - _b; }
    UFV_ALWAYS_INLINE bool      empty()    const noexcept { return _b == _e;      }

    UFV_ALWAYS_INLINE iterator       begin()       noexcept { return _b; }
    UFV_ALWAYS_INLINE const_iterator begin() const noexcept { return _b; }
    UFV_ALWAYS_INLINE iterator       end()         noexcept { return _e; }
    UFV_ALWAYS_INLINE const_iterator end()   const noexcept { return _e; }

    UFV_ALWAYS_INLINE reference       operator[](size_type i)       noexcept { return _b[i]; }
    UFV_ALWAYS_INLINE const_reference operator[](size_type i) const noexcept { return _b[i]; }

    UFV_ALWAYS_INLINE reference at(size_type i) {
        if (i >= size()) throw std::out_of_range("Vector::at");
        return _b[i];
    }
    UFV_ALWAYS_INLINE const_reference at(size_type i) const {
        if (i >= size()) throw std::out_of_range("Vector::at");
        return _b[i];
    }

    UFV_ALWAYS_INLINE void clear() noexcept {
        if constexpr (!std::is_trivially_destructible_v<T>) {
            while (_e != _b) alloc_traits::destroy(_alloc, --_e);
        } else {
            _e = _b;
        }
    }

    UFV_COLD void reserve_grow(size_type minNeeded) {
        size_type oldSize = size();
        size_type oldCap = capacity();
        size_type newCap = std::bit_ceil(minNeeded);
    
        if constexpr (POD_PATH) {
            if (newCap == oldCap) return;
            void* blk = _b
                      ? std::realloc(_b, newCap * sizeof(T))
                      : std::malloc (newCap * sizeof(T));
            if (!blk) throw std::bad_alloc();
            _b   = static_cast<T*>(blk);
            _e   = _b + oldSize;
            _cap = _b + newCap;
        } else {
            T* newB = alloc_traits::allocate(_alloc, newCap);
            if constexpr (std::is_nothrow_move_constructible_v<T>) {
                std::uninitialized_move(_b, _e, newB);
            } else {
                std::uninitialized_copy(_b, _e, newB);
            }
            std::destroy(_b, _e); // only if not using clear()
            if (_b) alloc_traits::deallocate(_alloc, _b, oldCap);
            _b   = newB;
            _e   = newB + oldSize;
            _cap = newB + newCap;
        }
    }
    

    UFV_ALWAYS_INLINE void reserve(size_type n) {
        if (n > capacity()) reserve_grow(n);
    }

    UFV_ALWAYS_INLINE void shrink_to_fit() {
        reserve(size());
    }

    UFV_ALWAYS_INLINE void push_back(const T& v) {
        if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1);
        if constexpr (POD_PATH) {
            *_e = v;
        } else {
            alloc_traits::construct(_alloc, _e, v);
        }
        ++_e;
    }

    UFV_ALWAYS_INLINE void push_back(T&& v) {
        if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1);
        if constexpr (POD_PATH) {
            *_e = static_cast<T&&>(v);
        } else {
            alloc_traits::construct(_alloc, _e, std::move(v));
        }
        ++_e;
    }

    template<typename... Args>
    UFV_ALWAYS_INLINE reference emplace_back(Args&&... args) {
        if (__builtin_expect(_e == _cap, 0)) reserve_grow(size() + 1);
        if constexpr (POD_PATH) {
            *_e = T(std::forward<Args>(args)...);
        } else {
            alloc_traits::construct(_alloc, _e, std::forward<Args>(args)...);
        }
        return *(_e++);
    }

    UFV_ALWAYS_INLINE void pop_back() noexcept {
        assert(_e > _b);
        --_e;
        if constexpr (!std::is_trivially_destructible_v<T>)
            alloc_traits::destroy(_alloc, _e);
    }

private:
    T*    _b;
    T*    _e;
    T*    _cap;
    Alloc _alloc;
};