diff --git a/renderers/win32_gdi/clay_renderer_gdi.c b/renderers/win32_gdi/clay_renderer_gdi.c index 25cfdfd..99ccd6c 100644 --- a/renderers/win32_gdi/clay_renderer_gdi.c +++ b/renderers/win32_gdi/clay_renderer_gdi.c @@ -1,9 +1,262 @@ #include + +// #define USE_INTRINSICS +// #define USE_FAST_SQRT + +#if defined(USE_INTRINSICS) +#include +#endif + #include "../../clay.h" HDC renderer_hdcMem = {0}; HBITMAP renderer_hbmMem = {0}; HANDLE renderer_hOld = {0}; +bool gdi_fabulous = true; + +#define RECTWIDTH(rc) ((rc).right - (rc).left) +#define RECTHEIGHT(rc) ((rc).bottom - (rc).top) + +/*----------------------------------------------------------------------------+ + | Math stuff start | + +----------------------------------------------------------------------------*/ +#if defined(USE_INTRINSICS) +#define sqrtf_impl(x) intrin_sqrtf(x) +#elif defined(USE_FAST_SQRT) +#define sqrtf_impl(x) fast_sqrtf(x) +#else +#define sqrtf_impl(x) sqrtf(x) // Fallback to std sqrtf +#endif + +// Use intrinsics +#if defined(USE_INTRINSICS) +inline float intrin_sqrtf(const float f) +{ + __m128 temp = _mm_set_ss(f); + temp = _mm_sqrt_ss(temp); + return _mm_cvtss_f32(temp); +} +#endif // defined(USE_INTRINSICS) + +// Use fast inverse square root +#if defined(USE_FAST_SQRT) +float fast_inv_sqrtf(float number) +{ + const float threehalfs = 1.5f; + + float x2 = number * 0.5f; + float y = number; + + // Evil bit-level hacking + uint32_t i = *(uint32_t*)&y; + i = 0x5f3759df - (i >> 1); // Initial guess for Newton's method + y = *(float*)&i; + + // One iteration of Newton's method + y = y * (threehalfs - (x2 * y * y)); // y = y * (1.5 - 0.5 * x * y^2) + + return y; +} + +// Fast square root approximation using the inverse square root +float fast_sqrtf(float number) +{ + if (number < 0.0f) return 0.0f; // Handle negative input + return number * fast_inv_sqrtf(number); +} +#endif +/*----------------------------------------------------------------------------+ + | Math stuff end | + +----------------------------------------------------------------------------*/ + +static inline Clay_Color ColorBlend(Clay_Color base, Clay_Color overlay, float factor) +{ + Clay_Color blended; + + // Normalize alpha values for multiplications + float base_a = base.a / 255.0f; + float overlay_a = overlay.a / 255.0f; + + overlay_a *= factor; + + float out_a = overlay_a + base_a * (1.0f - overlay_a); + + // Avoid division by zero and fully transparent cases + if (out_a <= 0.0f) + { + return (Clay_Color) { .a = 0, .r = 0, .g = 0, .b = 0 }; + } + + blended.r = (overlay.r * overlay_a + base.r * base_a * (1.0f - overlay_a)) / out_a; + blended.g = (overlay.g * overlay_a + base.g * base_a * (1.0f - overlay_a)) / out_a; + blended.b = (overlay.b * overlay_a + base.b * base_a * (1.0f - overlay_a)) / out_a; + blended.a = out_a * 255.0f; // Denormalize alpha back + + return blended; +} + +static float RoundedRectPixelCoverage(int x, int y, const Clay_CornerRadius radius, int width, int height) { + // Check if the pixel is in one of the four rounded corners + + if (x < radius.topLeft && y < radius.topLeft) { + // Top-left corner + float dx = radius.topLeft - x - 1; + float dy = radius.topLeft - y - 1; + float distance = sqrtf_impl(dx * dx + dy * dy); + if (distance > radius.topLeft) + return 0.0f; + if (distance <= radius.topLeft - 1) + return 1.0f; + return radius.topLeft - distance; + } + else if (x >= width - radius.topRight && y < radius.topRight) { + // Top-right corner + float dx = x - (width - radius.topRight); + float dy = radius.topRight - y - 1; + float distance = sqrtf_impl(dx * dx + dy * dy); + if (distance > radius.topRight) + return 0.0f; + if (distance <= radius.topRight - 1) + return 1.0f; + return radius.topRight - distance; + } + else if (x < radius.bottomLeft && y >= height - radius.bottomLeft) { + // Bottom-left corner + float dx = radius.bottomLeft - x - 1; + float dy = y - (height - radius.bottomLeft); + float distance = sqrtf_impl(dx * dx + dy * dy); + if (distance > radius.bottomLeft) + return 0.0f; + if (distance <= radius.bottomLeft - 1) + return 1.0f; + return radius.bottomLeft - distance; + } + else if (x >= width - radius.bottomRight && y >= height - radius.bottomRight) { + // Bottom-right corner + float dx = x - (width - radius.bottomRight); + float dy = y - (height - radius.bottomRight); + float distance = sqrtf_impl(dx * dx + dy * dy); + if (distance > radius.bottomRight) + return 0.0f; + if (distance <= radius.bottomRight - 1) + return 1.0f; + return radius.bottomRight - distance; + } + else { + // Not in a corner, full coverage + return 1.0f; + } +} + +typedef struct { + HDC hdcMem; + HBITMAP hbmMem; + HBITMAP hbmMemPrev; + void* pBits; + SIZE size; +} HDCSubstitute; + +static void CreateHDCSubstitute(HDCSubstitute* phdcs, HDC hdcSrc, PRECT prc) +{ + if (prc == NULL) + return; + + phdcs->size = (SIZE){ RECTWIDTH(*prc), RECTHEIGHT(*prc) }; + if (phdcs->size.cx <= 0 || phdcs->size.cy <= 0) + return; + + phdcs->hdcMem = CreateCompatibleDC(hdcSrc); + if (phdcs->hdcMem == NULL) + return; + + // Create a 32-bit DIB section for the memory DC + BITMAPINFO bmi = { 0 }; + bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + bmi.bmiHeader.biWidth = phdcs->size.cx; + bmi.bmiHeader.biHeight = -phdcs->size.cy; // I think it's faster? Probably + bmi.bmiHeader.biPlanes = 1; + bmi.bmiHeader.biBitCount = 32; + bmi.bmiHeader.biCompression = BI_RGB; + + phdcs->pBits = NULL; + + phdcs->hbmMem = CreateDIBSection(phdcs->hdcMem, &bmi, DIB_RGB_COLORS, &phdcs->pBits, NULL, 0); + if (phdcs->hbmMem == NULL) + { + DeleteDC(phdcs->hdcMem); + return; + } + + // Select the DIB section into the memory DC + phdcs->hbmMemPrev = SelectObject(phdcs->hdcMem, phdcs->hbmMem); + + // Copy the content of the target DC to the memory DC + BitBlt(phdcs->hdcMem, 0, 0, phdcs->size.cx, phdcs->size.cy, hdcSrc, prc->left, prc->top, SRCCOPY); +} + +static void DestroyHDCSubstitute(HDCSubstitute* phdcs) +{ + if (phdcs == NULL) + return; + + // Clean up + SelectObject(phdcs->hdcMem, phdcs->hbmMemPrev); + DeleteObject(phdcs->hbmMem); + DeleteDC(phdcs->hdcMem); + + ZeroMemory(phdcs, sizeof(HDCSubstitute)); +} + +static void __Clay_Win32_FillRoundRect(HDC hdc, PRECT prc, Clay_Color color, Clay_CornerRadius radius) +{ + HDCSubstitute substitute = { 0 }; + CreateHDCSubstitute(&substitute, hdc, prc); + + bool has_corner_radius = radius.topLeft || radius.topRight || radius.bottomLeft || radius.bottomRight; + + if (has_corner_radius) + { + // Limit the corner radius to the minimum of half the width and half the height + float max_radius = (float)fmin(substitute.size.cx / 2.0f, substitute.size.cy / 2.0f); + if (radius.topLeft > max_radius) radius.topLeft = max_radius; + if (radius.topRight > max_radius) radius.topRight = max_radius; + if (radius.bottomLeft > max_radius) radius.bottomLeft = max_radius; + if (radius.bottomRight > max_radius) radius.bottomRight = max_radius; + } + + // Iterate over each pixel in the DIB section + uint32_t* pixels = (uint32_t*)substitute.pBits; + for (int y = 0; y < substitute.size.cy; ++y) + { + for (int x = 0; x < substitute.size.cx; ++x) + { + float coverage = 1.0f; + if (has_corner_radius) + coverage = RoundedRectPixelCoverage(x, y, radius, substitute.size.cx, substitute.size.cy); + + if (coverage > 0.0f) + { + uint32_t pixel = pixels[y * substitute.size.cx + x]; + Clay_Color dst_color = { + .r = (float)((pixel >> 16) & 0xFF), // Red + .g = (float)((pixel >> 8) & 0xFF), // Green + .b = (float)(pixel & 0xFF), // Blue + .a = 255.0f // Fully opaque + }; + Clay_Color blended = ColorBlend(dst_color, color, coverage); + + pixels[y * substitute.size.cx + x] = + ((uint32_t)(blended.b) << 0) | + ((uint32_t)(blended.g) << 8) | + ((uint32_t)(blended.r) << 16); + } + } + } + + // Copy the blended content back to the target DC + BitBlt(hdc, prc->left, prc->top, substitute.size.cx, substitute.size.cy, substitute.hdcMem, 0, 0, SRCCOPY); + DestroyHDCSubstitute(&substitute); +} void Clay_Win32_Render(HWND hwnd, Clay_RenderCommandArray renderCommands, HFONT* fonts) { @@ -71,23 +324,37 @@ void Clay_Win32_Render(HWND hwnd, Clay_RenderCommandArray renderCommands, HFONT* r.right = boundingBox.x + boundingBox.width; r.bottom = boundingBox.y + boundingBox.height; - HBRUSH recColor = CreateSolidBrush(RGB(rrd.backgroundColor.r, rrd.backgroundColor.g, rrd.backgroundColor.b)); + bool translucid = rrd.backgroundColor.a > 0.0f && rrd.backgroundColor.a < 255.0f; + bool has_rounded_corners = rrd.cornerRadius.topLeft > 0.0f + || rrd.cornerRadius.topRight > 0.0f + || rrd.cornerRadius.bottomLeft > 0.0f + || rrd.cornerRadius.bottomRight > 0.0f; - if (rrd.cornerRadius.topLeft > 0) + if (gdi_fabulous && (translucid || has_rounded_corners)) { - HRGN roundedRectRgn = CreateRoundRectRgn( - r.left, r.top, r.right + 1, r.bottom + 1, - rrd.cornerRadius.topLeft * 2, rrd.cornerRadius.topLeft * 2); - - FillRgn(renderer_hdcMem, roundedRectRgn, recColor); - DeleteObject(roundedRectRgn); + __Clay_Win32_FillRoundRect(renderer_hdcMem, &r, rrd.backgroundColor, rrd.cornerRadius); } else { - FillRect(renderer_hdcMem, &r, recColor); + HBRUSH recColor = CreateSolidBrush(RGB(rrd.backgroundColor.r, rrd.backgroundColor.g, rrd.backgroundColor.b)); + + if (has_rounded_corners) + { + HRGN roundedRectRgn = CreateRoundRectRgn( + r.left, r.top, r.right + 1, r.bottom + 1, + rrd.cornerRadius.topLeft * 2, rrd.cornerRadius.topLeft * 2); + + FillRgn(renderer_hdcMem, roundedRectRgn, recColor); + DeleteObject(roundedRectRgn); + } + else + { + FillRect(renderer_hdcMem, &r, recColor); + } + + DeleteObject(recColor); } - DeleteObject(recColor); break; }