perf(ikv2): reduce indexed root lookup and write overhead
This commit is contained in:
@@ -9,11 +9,13 @@
|
||||
typedef struct
|
||||
{
|
||||
char *key;
|
||||
uint32_t key_length;
|
||||
uint32_t key_hash;
|
||||
uint8_t type;
|
||||
uint32_t payload_offset;
|
||||
uint32_t payload_size;
|
||||
uint8_t *payload_data;
|
||||
bool loaded;
|
||||
uint32_t next_in_bucket;
|
||||
} ikv2_index_entry_t;
|
||||
|
||||
typedef enum
|
||||
@@ -27,9 +29,12 @@ typedef struct
|
||||
ikv_lazy_state_t base;
|
||||
ikv2_source_kind_t source_kind;
|
||||
char *file_path;
|
||||
FILE *file_handle;
|
||||
uint8_t *memory_data;
|
||||
size_t memory_size;
|
||||
uint32_t entry_count;
|
||||
uint32_t bucket_count;
|
||||
uint32_t *bucket_heads;
|
||||
ikv2_index_entry_t *entries;
|
||||
} ikv2_lazy_root_t;
|
||||
|
||||
@@ -48,6 +53,48 @@ typedef struct
|
||||
size_t offset;
|
||||
} ikv2_cursor_t;
|
||||
|
||||
#define IKV2_INDEX_NONE 0xFFFFFFFFu
|
||||
|
||||
static uint32_t ikv2_hash_key(const char *value)
|
||||
{
|
||||
uint32_t hash = 2166136261u;
|
||||
|
||||
while (value && *value)
|
||||
{
|
||||
hash ^= (uint8_t)*value++;
|
||||
hash *= 16777619u;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static uint32_t ikv2_varu32_size(uint32_t value)
|
||||
{
|
||||
uint32_t size = 1u;
|
||||
|
||||
while (value >= 0x80u)
|
||||
{
|
||||
value >>= 7u;
|
||||
++size;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static uint32_t ikv2_bucket_count_for_entries(uint32_t entry_count)
|
||||
{
|
||||
uint32_t bucket_count = 64u;
|
||||
|
||||
while (bucket_count < (entry_count * 4u) / 3u + 1u)
|
||||
{
|
||||
if (bucket_count > 0x7FFFFFFFu)
|
||||
break;
|
||||
bucket_count *= 2u;
|
||||
}
|
||||
|
||||
return bucket_count;
|
||||
}
|
||||
|
||||
static bool ikv2_buffer_reserve(ikv2_buffer_t *buffer, size_t additional)
|
||||
{
|
||||
uint8_t *next = NULL;
|
||||
@@ -277,13 +324,6 @@ static char *ikv2_file_read_string(FILE *file)
|
||||
return value;
|
||||
}
|
||||
|
||||
static int ikv2_compare_entries(const void *lhs, const void *rhs)
|
||||
{
|
||||
const ikv2_index_entry_t *left = (const ikv2_index_entry_t *)lhs;
|
||||
const ikv2_index_entry_t *right = (const ikv2_index_entry_t *)rhs;
|
||||
return strcmp(left->key ? left->key : "", right->key ? right->key : "");
|
||||
}
|
||||
|
||||
static bool ikv2_collect_root_entries(const ikv_node_t *root, ikv2_index_entry_t **out_entries, uint32_t *out_count)
|
||||
{
|
||||
ikv2_index_entry_t *entries = NULL;
|
||||
@@ -311,35 +351,24 @@ static bool ikv2_collect_root_entries(const ikv_node_t *root, ikv2_index_entry_t
|
||||
IKV_FREE(payload);
|
||||
for (uint32_t i = 0; i < index; ++i)
|
||||
{
|
||||
IKV_FREE(entries[i].key);
|
||||
IKV_FREE(entries[i].payload_data);
|
||||
}
|
||||
IKV_FREE(entries);
|
||||
return false;
|
||||
}
|
||||
|
||||
entries[index].key = ikv2_strdup(node->key ? node->key : "");
|
||||
entries[index].key = (char *)(node->key ? node->key : "");
|
||||
entries[index].key_length = (uint32_t)strlen(entries[index].key);
|
||||
entries[index].key_hash = ikv2_hash_key(entries[index].key);
|
||||
entries[index].type = (uint8_t)node->type;
|
||||
entries[index].payload_data = payload;
|
||||
entries[index].payload_size = payload_size;
|
||||
entries[index].loaded = false;
|
||||
if (!entries[index].key)
|
||||
{
|
||||
IKV_FREE(payload);
|
||||
for (uint32_t i = 0; i < index; ++i)
|
||||
{
|
||||
IKV_FREE(entries[i].key);
|
||||
IKV_FREE(entries[i].payload_data);
|
||||
}
|
||||
IKV_FREE(entries);
|
||||
return false;
|
||||
}
|
||||
entries[index].next_in_bucket = IKV2_INDEX_NONE;
|
||||
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
qsort(entries, count, sizeof(*entries), ikv2_compare_entries);
|
||||
*out_entries = entries;
|
||||
*out_count = count;
|
||||
return true;
|
||||
@@ -351,6 +380,9 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data
|
||||
uint32_t entry_count = 0u;
|
||||
uint32_t header_size = 0u;
|
||||
uint32_t payload_base = 0u;
|
||||
uint32_t total_size = 0u;
|
||||
const char *root_key = NULL;
|
||||
uint32_t root_key_length = 0u;
|
||||
ikv2_buffer_t buffer = {0};
|
||||
|
||||
if (!root || !out_data || !out_size || root->type != IKV_OBJECT)
|
||||
@@ -363,19 +395,34 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data
|
||||
if (!ikv2_collect_root_entries(root, &entries, &entry_count))
|
||||
return false;
|
||||
|
||||
root_key = (root->key && root->key[0]) ? root->key : "root";
|
||||
root_key_length = (uint32_t)strlen(root_key);
|
||||
header_size = 4u + 1u + 4u + 4u;
|
||||
header_size += 1u + (uint32_t)strlen(root->key && root->key[0] ? root->key : "root");
|
||||
header_size += 1u;
|
||||
header_size += ikv2_varu32_size(root_key_length) + root_key_length;
|
||||
header_size += ikv2_varu32_size(entry_count);
|
||||
for (uint32_t i = 0; i < entry_count; ++i)
|
||||
header_size += 1u + (uint32_t)strlen(entries[i].key ? entries[i].key : "");
|
||||
header_size += ikv2_varu32_size(entries[i].key_length) + entries[i].key_length;
|
||||
header_size += entry_count * (1u + 4u + 4u);
|
||||
payload_base = header_size;
|
||||
total_size = header_size;
|
||||
for (uint32_t i = 0; i < entry_count; ++i)
|
||||
total_size += entries[i].payload_size;
|
||||
|
||||
buffer.data = total_size ? (uint8_t *)IKV_MALLOC(total_size) : NULL;
|
||||
buffer.capacity = total_size;
|
||||
if (total_size > 0u && !buffer.data)
|
||||
{
|
||||
for (uint32_t i = 0; i < entry_count; ++i)
|
||||
IKV_FREE(entries[i].payload_data);
|
||||
IKV_FREE(entries);
|
||||
return false;
|
||||
}
|
||||
|
||||
ikv2_buffer_write_bytes(&buffer, "iKv2", 4u);
|
||||
ikv2_buffer_write_u8(&buffer, (uint8_t)'b');
|
||||
ikv2_buffer_write_u32le(&buffer, IKV_V2);
|
||||
ikv2_buffer_write_u32le(&buffer, IKV2_BINARY_FLAGS_INDEXED_ROOT);
|
||||
ikv2_buffer_write_string(&buffer, (root->key && root->key[0]) ? root->key : "root");
|
||||
ikv2_buffer_write_string(&buffer, root_key);
|
||||
ikv2_buffer_write_varu32(&buffer, entry_count);
|
||||
|
||||
for (uint32_t i = 0; i < entry_count; ++i)
|
||||
@@ -397,7 +444,6 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data
|
||||
|
||||
for (uint32_t i = 0; i < entry_count; ++i)
|
||||
{
|
||||
IKV_FREE(entries[i].key);
|
||||
IKV_FREE(entries[i].payload_data);
|
||||
}
|
||||
IKV_FREE(entries);
|
||||
@@ -422,70 +468,88 @@ static void ikv2_lazy_root_destroy(ikv_lazy_state_t *state)
|
||||
|
||||
for (uint32_t i = 0; i < lazy_root->entry_count; ++i)
|
||||
IKV_FREE(lazy_root->entries[i].key);
|
||||
if (lazy_root->file_handle)
|
||||
IKV_FCLOSE(lazy_root->file_handle);
|
||||
IKV_FREE(lazy_root->bucket_heads);
|
||||
IKV_FREE(lazy_root->entries);
|
||||
IKV_FREE(lazy_root->file_path);
|
||||
IKV_FREE(lazy_root->memory_data);
|
||||
IKV_FREE(lazy_root);
|
||||
}
|
||||
|
||||
static bool ikv2_build_entry_buckets(ikv2_lazy_root_t *lazy_root)
|
||||
{
|
||||
if (!lazy_root)
|
||||
return false;
|
||||
|
||||
lazy_root->bucket_count = ikv2_bucket_count_for_entries(lazy_root->entry_count);
|
||||
lazy_root->bucket_heads = (uint32_t *)IKV_MALLOC(sizeof(*lazy_root->bucket_heads) * lazy_root->bucket_count);
|
||||
if (!lazy_root->bucket_heads)
|
||||
return false;
|
||||
|
||||
for (uint32_t i = 0; i < lazy_root->bucket_count; ++i)
|
||||
lazy_root->bucket_heads[i] = IKV2_INDEX_NONE;
|
||||
|
||||
for (uint32_t i = 0; i < lazy_root->entry_count; ++i)
|
||||
{
|
||||
uint32_t bucket = lazy_root->entries[i].key_hash % lazy_root->bucket_count;
|
||||
lazy_root->entries[i].next_in_bucket = lazy_root->bucket_heads[bucket];
|
||||
lazy_root->bucket_heads[bucket] = i;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ikv2_index_entry_t *ikv2_find_entry(ikv2_lazy_root_t *lazy_root, const char *key)
|
||||
{
|
||||
uint32_t left = 0u;
|
||||
uint32_t right = 0u;
|
||||
uint32_t hash = 0u;
|
||||
uint32_t bucket = 0u;
|
||||
uint32_t entry_index = IKV2_INDEX_NONE;
|
||||
size_t key_length = 0u;
|
||||
|
||||
if (!lazy_root || !key || lazy_root->entry_count == 0u)
|
||||
if (!lazy_root || !key || lazy_root->entry_count == 0u || lazy_root->bucket_count == 0u || !lazy_root->bucket_heads)
|
||||
return NULL;
|
||||
|
||||
right = lazy_root->entry_count;
|
||||
while (left < right)
|
||||
hash = ikv2_hash_key(key);
|
||||
key_length = strlen(key);
|
||||
bucket = hash % lazy_root->bucket_count;
|
||||
entry_index = lazy_root->bucket_heads[bucket];
|
||||
|
||||
while (entry_index != IKV2_INDEX_NONE)
|
||||
{
|
||||
uint32_t mid = left + (right - left) / 2u;
|
||||
int compare = strcmp(key, lazy_root->entries[mid].key ? lazy_root->entries[mid].key : "");
|
||||
if (compare == 0)
|
||||
return &lazy_root->entries[mid];
|
||||
if (compare < 0)
|
||||
right = mid;
|
||||
else
|
||||
left = mid + 1u;
|
||||
ikv2_index_entry_t *entry = &lazy_root->entries[entry_index];
|
||||
if (entry->key_hash == hash &&
|
||||
entry->key_length == (uint32_t)key_length &&
|
||||
strcmp(key, entry->key ? entry->key : "") == 0)
|
||||
return entry;
|
||||
entry_index = entry->next_in_bucket;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool ikv2_read_payload_from_file(const char *path, uint32_t offset, uint32_t size, uint8_t **out_data)
|
||||
static bool ikv2_read_payload_from_file(ikv2_lazy_root_t *lazy_root, uint32_t offset, uint32_t size, uint8_t **out_data)
|
||||
{
|
||||
FILE *file = NULL;
|
||||
uint8_t *data = NULL;
|
||||
|
||||
if (!path || !out_data)
|
||||
if (!lazy_root || !lazy_root->file_handle || !out_data)
|
||||
return false;
|
||||
|
||||
*out_data = NULL;
|
||||
file = IKV_FOPEN(path, "rb");
|
||||
if (!file)
|
||||
return false;
|
||||
|
||||
if (IKV_FSEEK(file, (long)offset, SEEK_SET) != 0)
|
||||
{
|
||||
IKV_FCLOSE(file);
|
||||
if (IKV_FSEEK(lazy_root->file_handle, (long)offset, SEEK_SET) != 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
data = (uint8_t *)IKV_MALLOC(size);
|
||||
if (!data)
|
||||
{
|
||||
IKV_FCLOSE(file);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (size > 0u && IKV_FREAD(data, 1u, size, file) != size)
|
||||
if (size > 0u && IKV_FREAD(data, 1u, size, lazy_root->file_handle) != size)
|
||||
{
|
||||
IKV_FREE(data);
|
||||
IKV_FCLOSE(file);
|
||||
return false;
|
||||
}
|
||||
|
||||
IKV_FCLOSE(file);
|
||||
*out_data = data;
|
||||
return true;
|
||||
}
|
||||
@@ -516,7 +580,7 @@ static ikv_node_t *ikv2_lazy_root_load_object_key(ikv_lazy_state_t *state, ikv_n
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!ikv2_read_payload_from_file(lazy_root->file_path, entry->payload_offset, entry->payload_size, &owned_payload))
|
||||
if (!ikv2_read_payload_from_file(lazy_root, entry->payload_offset, entry->payload_size, &owned_payload))
|
||||
return NULL;
|
||||
payload_data = owned_payload;
|
||||
}
|
||||
@@ -530,7 +594,6 @@ static ikv_node_t *ikv2_lazy_root_load_object_key(ikv_lazy_state_t *state, ikv_n
|
||||
return NULL;
|
||||
}
|
||||
|
||||
entry->loaded = true;
|
||||
return node;
|
||||
}
|
||||
|
||||
@@ -610,6 +673,9 @@ static ikv_node_t *ikv2_parse_indexed_binary_buffer(uint8_t *buffer, size_t size
|
||||
ikv_free(root);
|
||||
return NULL;
|
||||
}
|
||||
lazy_root->entries[i].key_length = (uint32_t)strlen(lazy_root->entries[i].key);
|
||||
lazy_root->entries[i].key_hash = ikv2_hash_key(lazy_root->entries[i].key);
|
||||
lazy_root->entries[i].next_in_bucket = IKV2_INDEX_NONE;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < entry_count; ++i)
|
||||
@@ -624,6 +690,13 @@ static ikv_node_t *ikv2_parse_indexed_binary_buffer(uint8_t *buffer, size_t size
|
||||
}
|
||||
}
|
||||
|
||||
if (!ikv2_build_entry_buckets(lazy_root))
|
||||
{
|
||||
ikv2_lazy_root_destroy(&lazy_root->base);
|
||||
ikv_free(root);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lazy_root->source_kind = IKV2_SOURCE_MEMORY;
|
||||
lazy_root->memory_data = buffer;
|
||||
lazy_root->memory_size = size;
|
||||
@@ -758,6 +831,9 @@ static ikv_node_t *ikv2_parse_binary_file(const char *path)
|
||||
IKV_FCLOSE(file);
|
||||
return NULL;
|
||||
}
|
||||
lazy_root->entries[i].key_length = (uint32_t)strlen(lazy_root->entries[i].key);
|
||||
lazy_root->entries[i].key_hash = ikv2_hash_key(lazy_root->entries[i].key);
|
||||
lazy_root->entries[i].next_in_bucket = IKV2_INDEX_NONE;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < entry_count; ++i)
|
||||
@@ -773,7 +849,15 @@ static ikv_node_t *ikv2_parse_binary_file(const char *path)
|
||||
}
|
||||
}
|
||||
|
||||
IKV_FCLOSE(file);
|
||||
if (!ikv2_build_entry_buckets(lazy_root))
|
||||
{
|
||||
ikv2_lazy_root_destroy(&lazy_root->base);
|
||||
ikv_free(root);
|
||||
IKV_FCLOSE(file);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lazy_root->file_handle = file;
|
||||
root->lazy_state = &lazy_root->base;
|
||||
return root;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user