perf(ikv2): reduce indexed root lookup and write overhead

This commit is contained in:
2026-06-15 22:05:23 -05:00
parent ffe30824a1
commit 8ea2e8990e

View File

@@ -9,11 +9,13 @@
typedef struct
{
char *key;
uint32_t key_length;
uint32_t key_hash;
uint8_t type;
uint32_t payload_offset;
uint32_t payload_size;
uint8_t *payload_data;
bool loaded;
uint32_t next_in_bucket;
} ikv2_index_entry_t;
typedef enum
@@ -27,9 +29,12 @@ typedef struct
ikv_lazy_state_t base;
ikv2_source_kind_t source_kind;
char *file_path;
FILE *file_handle;
uint8_t *memory_data;
size_t memory_size;
uint32_t entry_count;
uint32_t bucket_count;
uint32_t *bucket_heads;
ikv2_index_entry_t *entries;
} ikv2_lazy_root_t;
@@ -48,6 +53,48 @@ typedef struct
size_t offset;
} ikv2_cursor_t;
#define IKV2_INDEX_NONE 0xFFFFFFFFu
static uint32_t ikv2_hash_key(const char *value)
{
uint32_t hash = 2166136261u;
while (value && *value)
{
hash ^= (uint8_t)*value++;
hash *= 16777619u;
}
return hash;
}
static uint32_t ikv2_varu32_size(uint32_t value)
{
uint32_t size = 1u;
while (value >= 0x80u)
{
value >>= 7u;
++size;
}
return size;
}
static uint32_t ikv2_bucket_count_for_entries(uint32_t entry_count)
{
uint32_t bucket_count = 64u;
while (bucket_count < (entry_count * 4u) / 3u + 1u)
{
if (bucket_count > 0x7FFFFFFFu)
break;
bucket_count *= 2u;
}
return bucket_count;
}
static bool ikv2_buffer_reserve(ikv2_buffer_t *buffer, size_t additional)
{
uint8_t *next = NULL;
@@ -277,13 +324,6 @@ static char *ikv2_file_read_string(FILE *file)
return value;
}
static int ikv2_compare_entries(const void *lhs, const void *rhs)
{
const ikv2_index_entry_t *left = (const ikv2_index_entry_t *)lhs;
const ikv2_index_entry_t *right = (const ikv2_index_entry_t *)rhs;
return strcmp(left->key ? left->key : "", right->key ? right->key : "");
}
static bool ikv2_collect_root_entries(const ikv_node_t *root, ikv2_index_entry_t **out_entries, uint32_t *out_count)
{
ikv2_index_entry_t *entries = NULL;
@@ -311,35 +351,24 @@ static bool ikv2_collect_root_entries(const ikv_node_t *root, ikv2_index_entry_t
IKV_FREE(payload);
for (uint32_t i = 0; i < index; ++i)
{
IKV_FREE(entries[i].key);
IKV_FREE(entries[i].payload_data);
}
IKV_FREE(entries);
return false;
}
entries[index].key = ikv2_strdup(node->key ? node->key : "");
entries[index].key = (char *)(node->key ? node->key : "");
entries[index].key_length = (uint32_t)strlen(entries[index].key);
entries[index].key_hash = ikv2_hash_key(entries[index].key);
entries[index].type = (uint8_t)node->type;
entries[index].payload_data = payload;
entries[index].payload_size = payload_size;
entries[index].loaded = false;
if (!entries[index].key)
{
IKV_FREE(payload);
for (uint32_t i = 0; i < index; ++i)
{
IKV_FREE(entries[i].key);
IKV_FREE(entries[i].payload_data);
}
IKV_FREE(entries);
return false;
}
entries[index].next_in_bucket = IKV2_INDEX_NONE;
++index;
}
}
qsort(entries, count, sizeof(*entries), ikv2_compare_entries);
*out_entries = entries;
*out_count = count;
return true;
@@ -351,6 +380,9 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data
uint32_t entry_count = 0u;
uint32_t header_size = 0u;
uint32_t payload_base = 0u;
uint32_t total_size = 0u;
const char *root_key = NULL;
uint32_t root_key_length = 0u;
ikv2_buffer_t buffer = {0};
if (!root || !out_data || !out_size || root->type != IKV_OBJECT)
@@ -363,19 +395,34 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data
if (!ikv2_collect_root_entries(root, &entries, &entry_count))
return false;
root_key = (root->key && root->key[0]) ? root->key : "root";
root_key_length = (uint32_t)strlen(root_key);
header_size = 4u + 1u + 4u + 4u;
header_size += 1u + (uint32_t)strlen(root->key && root->key[0] ? root->key : "root");
header_size += 1u;
header_size += ikv2_varu32_size(root_key_length) + root_key_length;
header_size += ikv2_varu32_size(entry_count);
for (uint32_t i = 0; i < entry_count; ++i)
header_size += 1u + (uint32_t)strlen(entries[i].key ? entries[i].key : "");
header_size += ikv2_varu32_size(entries[i].key_length) + entries[i].key_length;
header_size += entry_count * (1u + 4u + 4u);
payload_base = header_size;
total_size = header_size;
for (uint32_t i = 0; i < entry_count; ++i)
total_size += entries[i].payload_size;
buffer.data = total_size ? (uint8_t *)IKV_MALLOC(total_size) : NULL;
buffer.capacity = total_size;
if (total_size > 0u && !buffer.data)
{
for (uint32_t i = 0; i < entry_count; ++i)
IKV_FREE(entries[i].payload_data);
IKV_FREE(entries);
return false;
}
ikv2_buffer_write_bytes(&buffer, "iKv2", 4u);
ikv2_buffer_write_u8(&buffer, (uint8_t)'b');
ikv2_buffer_write_u32le(&buffer, IKV_V2);
ikv2_buffer_write_u32le(&buffer, IKV2_BINARY_FLAGS_INDEXED_ROOT);
ikv2_buffer_write_string(&buffer, (root->key && root->key[0]) ? root->key : "root");
ikv2_buffer_write_string(&buffer, root_key);
ikv2_buffer_write_varu32(&buffer, entry_count);
for (uint32_t i = 0; i < entry_count; ++i)
@@ -397,7 +444,6 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data
for (uint32_t i = 0; i < entry_count; ++i)
{
IKV_FREE(entries[i].key);
IKV_FREE(entries[i].payload_data);
}
IKV_FREE(entries);
@@ -422,70 +468,88 @@ static void ikv2_lazy_root_destroy(ikv_lazy_state_t *state)
for (uint32_t i = 0; i < lazy_root->entry_count; ++i)
IKV_FREE(lazy_root->entries[i].key);
if (lazy_root->file_handle)
IKV_FCLOSE(lazy_root->file_handle);
IKV_FREE(lazy_root->bucket_heads);
IKV_FREE(lazy_root->entries);
IKV_FREE(lazy_root->file_path);
IKV_FREE(lazy_root->memory_data);
IKV_FREE(lazy_root);
}
static bool ikv2_build_entry_buckets(ikv2_lazy_root_t *lazy_root)
{
if (!lazy_root)
return false;
lazy_root->bucket_count = ikv2_bucket_count_for_entries(lazy_root->entry_count);
lazy_root->bucket_heads = (uint32_t *)IKV_MALLOC(sizeof(*lazy_root->bucket_heads) * lazy_root->bucket_count);
if (!lazy_root->bucket_heads)
return false;
for (uint32_t i = 0; i < lazy_root->bucket_count; ++i)
lazy_root->bucket_heads[i] = IKV2_INDEX_NONE;
for (uint32_t i = 0; i < lazy_root->entry_count; ++i)
{
uint32_t bucket = lazy_root->entries[i].key_hash % lazy_root->bucket_count;
lazy_root->entries[i].next_in_bucket = lazy_root->bucket_heads[bucket];
lazy_root->bucket_heads[bucket] = i;
}
return true;
}
static ikv2_index_entry_t *ikv2_find_entry(ikv2_lazy_root_t *lazy_root, const char *key)
{
uint32_t left = 0u;
uint32_t right = 0u;
uint32_t hash = 0u;
uint32_t bucket = 0u;
uint32_t entry_index = IKV2_INDEX_NONE;
size_t key_length = 0u;
if (!lazy_root || !key || lazy_root->entry_count == 0u)
if (!lazy_root || !key || lazy_root->entry_count == 0u || lazy_root->bucket_count == 0u || !lazy_root->bucket_heads)
return NULL;
right = lazy_root->entry_count;
while (left < right)
hash = ikv2_hash_key(key);
key_length = strlen(key);
bucket = hash % lazy_root->bucket_count;
entry_index = lazy_root->bucket_heads[bucket];
while (entry_index != IKV2_INDEX_NONE)
{
uint32_t mid = left + (right - left) / 2u;
int compare = strcmp(key, lazy_root->entries[mid].key ? lazy_root->entries[mid].key : "");
if (compare == 0)
return &lazy_root->entries[mid];
if (compare < 0)
right = mid;
else
left = mid + 1u;
ikv2_index_entry_t *entry = &lazy_root->entries[entry_index];
if (entry->key_hash == hash &&
entry->key_length == (uint32_t)key_length &&
strcmp(key, entry->key ? entry->key : "") == 0)
return entry;
entry_index = entry->next_in_bucket;
}
return NULL;
}
static bool ikv2_read_payload_from_file(const char *path, uint32_t offset, uint32_t size, uint8_t **out_data)
static bool ikv2_read_payload_from_file(ikv2_lazy_root_t *lazy_root, uint32_t offset, uint32_t size, uint8_t **out_data)
{
FILE *file = NULL;
uint8_t *data = NULL;
if (!path || !out_data)
if (!lazy_root || !lazy_root->file_handle || !out_data)
return false;
*out_data = NULL;
file = IKV_FOPEN(path, "rb");
if (!file)
return false;
if (IKV_FSEEK(file, (long)offset, SEEK_SET) != 0)
{
IKV_FCLOSE(file);
if (IKV_FSEEK(lazy_root->file_handle, (long)offset, SEEK_SET) != 0)
return false;
}
data = (uint8_t *)IKV_MALLOC(size);
if (!data)
{
IKV_FCLOSE(file);
return false;
}
if (size > 0u && IKV_FREAD(data, 1u, size, file) != size)
if (size > 0u && IKV_FREAD(data, 1u, size, lazy_root->file_handle) != size)
{
IKV_FREE(data);
IKV_FCLOSE(file);
return false;
}
IKV_FCLOSE(file);
*out_data = data;
return true;
}
@@ -516,7 +580,7 @@ static ikv_node_t *ikv2_lazy_root_load_object_key(ikv_lazy_state_t *state, ikv_n
}
else
{
if (!ikv2_read_payload_from_file(lazy_root->file_path, entry->payload_offset, entry->payload_size, &owned_payload))
if (!ikv2_read_payload_from_file(lazy_root, entry->payload_offset, entry->payload_size, &owned_payload))
return NULL;
payload_data = owned_payload;
}
@@ -530,7 +594,6 @@ static ikv_node_t *ikv2_lazy_root_load_object_key(ikv_lazy_state_t *state, ikv_n
return NULL;
}
entry->loaded = true;
return node;
}
@@ -610,6 +673,9 @@ static ikv_node_t *ikv2_parse_indexed_binary_buffer(uint8_t *buffer, size_t size
ikv_free(root);
return NULL;
}
lazy_root->entries[i].key_length = (uint32_t)strlen(lazy_root->entries[i].key);
lazy_root->entries[i].key_hash = ikv2_hash_key(lazy_root->entries[i].key);
lazy_root->entries[i].next_in_bucket = IKV2_INDEX_NONE;
}
for (uint32_t i = 0; i < entry_count; ++i)
@@ -624,6 +690,13 @@ static ikv_node_t *ikv2_parse_indexed_binary_buffer(uint8_t *buffer, size_t size
}
}
if (!ikv2_build_entry_buckets(lazy_root))
{
ikv2_lazy_root_destroy(&lazy_root->base);
ikv_free(root);
return NULL;
}
lazy_root->source_kind = IKV2_SOURCE_MEMORY;
lazy_root->memory_data = buffer;
lazy_root->memory_size = size;
@@ -758,6 +831,9 @@ static ikv_node_t *ikv2_parse_binary_file(const char *path)
IKV_FCLOSE(file);
return NULL;
}
lazy_root->entries[i].key_length = (uint32_t)strlen(lazy_root->entries[i].key);
lazy_root->entries[i].key_hash = ikv2_hash_key(lazy_root->entries[i].key);
lazy_root->entries[i].next_in_bucket = IKV2_INDEX_NONE;
}
for (uint32_t i = 0; i < entry_count; ++i)
@@ -773,7 +849,15 @@ static ikv_node_t *ikv2_parse_binary_file(const char *path)
}
}
if (!ikv2_build_entry_buckets(lazy_root))
{
ikv2_lazy_root_destroy(&lazy_root->base);
ikv_free(root);
IKV_FCLOSE(file);
return NULL;
}
lazy_root->file_handle = file;
root->lazy_state = &lazy_root->base;
return root;
}