diff --git a/src/loaders/ikv2.c b/src/loaders/ikv2.c index a535a0d..b641769 100644 --- a/src/loaders/ikv2.c +++ b/src/loaders/ikv2.c @@ -9,11 +9,13 @@ typedef struct { char *key; + uint32_t key_length; + uint32_t key_hash; uint8_t type; uint32_t payload_offset; uint32_t payload_size; uint8_t *payload_data; - bool loaded; + uint32_t next_in_bucket; } ikv2_index_entry_t; typedef enum @@ -27,9 +29,12 @@ typedef struct ikv_lazy_state_t base; ikv2_source_kind_t source_kind; char *file_path; + FILE *file_handle; uint8_t *memory_data; size_t memory_size; uint32_t entry_count; + uint32_t bucket_count; + uint32_t *bucket_heads; ikv2_index_entry_t *entries; } ikv2_lazy_root_t; @@ -48,6 +53,48 @@ typedef struct size_t offset; } ikv2_cursor_t; +#define IKV2_INDEX_NONE 0xFFFFFFFFu + +static uint32_t ikv2_hash_key(const char *value) +{ + uint32_t hash = 2166136261u; + + while (value && *value) + { + hash ^= (uint8_t)*value++; + hash *= 16777619u; + } + + return hash; +} + +static uint32_t ikv2_varu32_size(uint32_t value) +{ + uint32_t size = 1u; + + while (value >= 0x80u) + { + value >>= 7u; + ++size; + } + + return size; +} + +static uint32_t ikv2_bucket_count_for_entries(uint32_t entry_count) +{ + uint32_t bucket_count = 64u; + + while (bucket_count < (entry_count * 4u) / 3u + 1u) + { + if (bucket_count > 0x7FFFFFFFu) + break; + bucket_count *= 2u; + } + + return bucket_count; +} + static bool ikv2_buffer_reserve(ikv2_buffer_t *buffer, size_t additional) { uint8_t *next = NULL; @@ -277,13 +324,6 @@ static char *ikv2_file_read_string(FILE *file) return value; } -static int ikv2_compare_entries(const void *lhs, const void *rhs) -{ - const ikv2_index_entry_t *left = (const ikv2_index_entry_t *)lhs; - const ikv2_index_entry_t *right = (const ikv2_index_entry_t *)rhs; - return strcmp(left->key ? left->key : "", right->key ? right->key : ""); -} - static bool ikv2_collect_root_entries(const ikv_node_t *root, ikv2_index_entry_t **out_entries, uint32_t *out_count) { ikv2_index_entry_t *entries = NULL; @@ -311,35 +351,24 @@ static bool ikv2_collect_root_entries(const ikv_node_t *root, ikv2_index_entry_t IKV_FREE(payload); for (uint32_t i = 0; i < index; ++i) { - IKV_FREE(entries[i].key); IKV_FREE(entries[i].payload_data); } IKV_FREE(entries); return false; } - entries[index].key = ikv2_strdup(node->key ? node->key : ""); + entries[index].key = (char *)(node->key ? node->key : ""); + entries[index].key_length = (uint32_t)strlen(entries[index].key); + entries[index].key_hash = ikv2_hash_key(entries[index].key); entries[index].type = (uint8_t)node->type; entries[index].payload_data = payload; entries[index].payload_size = payload_size; - entries[index].loaded = false; - if (!entries[index].key) - { - IKV_FREE(payload); - for (uint32_t i = 0; i < index; ++i) - { - IKV_FREE(entries[i].key); - IKV_FREE(entries[i].payload_data); - } - IKV_FREE(entries); - return false; - } + entries[index].next_in_bucket = IKV2_INDEX_NONE; ++index; } } - qsort(entries, count, sizeof(*entries), ikv2_compare_entries); *out_entries = entries; *out_count = count; return true; @@ -351,6 +380,9 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data uint32_t entry_count = 0u; uint32_t header_size = 0u; uint32_t payload_base = 0u; + uint32_t total_size = 0u; + const char *root_key = NULL; + uint32_t root_key_length = 0u; ikv2_buffer_t buffer = {0}; if (!root || !out_data || !out_size || root->type != IKV_OBJECT) @@ -363,19 +395,34 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data if (!ikv2_collect_root_entries(root, &entries, &entry_count)) return false; + root_key = (root->key && root->key[0]) ? root->key : "root"; + root_key_length = (uint32_t)strlen(root_key); header_size = 4u + 1u + 4u + 4u; - header_size += 1u + (uint32_t)strlen(root->key && root->key[0] ? root->key : "root"); - header_size += 1u; + header_size += ikv2_varu32_size(root_key_length) + root_key_length; + header_size += ikv2_varu32_size(entry_count); for (uint32_t i = 0; i < entry_count; ++i) - header_size += 1u + (uint32_t)strlen(entries[i].key ? entries[i].key : ""); + header_size += ikv2_varu32_size(entries[i].key_length) + entries[i].key_length; header_size += entry_count * (1u + 4u + 4u); payload_base = header_size; + total_size = header_size; + for (uint32_t i = 0; i < entry_count; ++i) + total_size += entries[i].payload_size; + + buffer.data = total_size ? (uint8_t *)IKV_MALLOC(total_size) : NULL; + buffer.capacity = total_size; + if (total_size > 0u && !buffer.data) + { + for (uint32_t i = 0; i < entry_count; ++i) + IKV_FREE(entries[i].payload_data); + IKV_FREE(entries); + return false; + } ikv2_buffer_write_bytes(&buffer, "iKv2", 4u); ikv2_buffer_write_u8(&buffer, (uint8_t)'b'); ikv2_buffer_write_u32le(&buffer, IKV_V2); ikv2_buffer_write_u32le(&buffer, IKV2_BINARY_FLAGS_INDEXED_ROOT); - ikv2_buffer_write_string(&buffer, (root->key && root->key[0]) ? root->key : "root"); + ikv2_buffer_write_string(&buffer, root_key); ikv2_buffer_write_varu32(&buffer, entry_count); for (uint32_t i = 0; i < entry_count; ++i) @@ -397,7 +444,6 @@ static bool ikv2_build_indexed_binary(const ikv_node_t *root, uint8_t **out_data for (uint32_t i = 0; i < entry_count; ++i) { - IKV_FREE(entries[i].key); IKV_FREE(entries[i].payload_data); } IKV_FREE(entries); @@ -422,70 +468,88 @@ static void ikv2_lazy_root_destroy(ikv_lazy_state_t *state) for (uint32_t i = 0; i < lazy_root->entry_count; ++i) IKV_FREE(lazy_root->entries[i].key); + if (lazy_root->file_handle) + IKV_FCLOSE(lazy_root->file_handle); + IKV_FREE(lazy_root->bucket_heads); IKV_FREE(lazy_root->entries); IKV_FREE(lazy_root->file_path); IKV_FREE(lazy_root->memory_data); IKV_FREE(lazy_root); } +static bool ikv2_build_entry_buckets(ikv2_lazy_root_t *lazy_root) +{ + if (!lazy_root) + return false; + + lazy_root->bucket_count = ikv2_bucket_count_for_entries(lazy_root->entry_count); + lazy_root->bucket_heads = (uint32_t *)IKV_MALLOC(sizeof(*lazy_root->bucket_heads) * lazy_root->bucket_count); + if (!lazy_root->bucket_heads) + return false; + + for (uint32_t i = 0; i < lazy_root->bucket_count; ++i) + lazy_root->bucket_heads[i] = IKV2_INDEX_NONE; + + for (uint32_t i = 0; i < lazy_root->entry_count; ++i) + { + uint32_t bucket = lazy_root->entries[i].key_hash % lazy_root->bucket_count; + lazy_root->entries[i].next_in_bucket = lazy_root->bucket_heads[bucket]; + lazy_root->bucket_heads[bucket] = i; + } + + return true; +} + static ikv2_index_entry_t *ikv2_find_entry(ikv2_lazy_root_t *lazy_root, const char *key) { - uint32_t left = 0u; - uint32_t right = 0u; + uint32_t hash = 0u; + uint32_t bucket = 0u; + uint32_t entry_index = IKV2_INDEX_NONE; + size_t key_length = 0u; - if (!lazy_root || !key || lazy_root->entry_count == 0u) + if (!lazy_root || !key || lazy_root->entry_count == 0u || lazy_root->bucket_count == 0u || !lazy_root->bucket_heads) return NULL; - right = lazy_root->entry_count; - while (left < right) + hash = ikv2_hash_key(key); + key_length = strlen(key); + bucket = hash % lazy_root->bucket_count; + entry_index = lazy_root->bucket_heads[bucket]; + + while (entry_index != IKV2_INDEX_NONE) { - uint32_t mid = left + (right - left) / 2u; - int compare = strcmp(key, lazy_root->entries[mid].key ? lazy_root->entries[mid].key : ""); - if (compare == 0) - return &lazy_root->entries[mid]; - if (compare < 0) - right = mid; - else - left = mid + 1u; + ikv2_index_entry_t *entry = &lazy_root->entries[entry_index]; + if (entry->key_hash == hash && + entry->key_length == (uint32_t)key_length && + strcmp(key, entry->key ? entry->key : "") == 0) + return entry; + entry_index = entry->next_in_bucket; } return NULL; } -static bool ikv2_read_payload_from_file(const char *path, uint32_t offset, uint32_t size, uint8_t **out_data) +static bool ikv2_read_payload_from_file(ikv2_lazy_root_t *lazy_root, uint32_t offset, uint32_t size, uint8_t **out_data) { - FILE *file = NULL; uint8_t *data = NULL; - if (!path || !out_data) + if (!lazy_root || !lazy_root->file_handle || !out_data) return false; *out_data = NULL; - file = IKV_FOPEN(path, "rb"); - if (!file) - return false; - if (IKV_FSEEK(file, (long)offset, SEEK_SET) != 0) - { - IKV_FCLOSE(file); + if (IKV_FSEEK(lazy_root->file_handle, (long)offset, SEEK_SET) != 0) return false; - } data = (uint8_t *)IKV_MALLOC(size); if (!data) - { - IKV_FCLOSE(file); return false; - } - if (size > 0u && IKV_FREAD(data, 1u, size, file) != size) + if (size > 0u && IKV_FREAD(data, 1u, size, lazy_root->file_handle) != size) { IKV_FREE(data); - IKV_FCLOSE(file); return false; } - IKV_FCLOSE(file); *out_data = data; return true; } @@ -516,7 +580,7 @@ static ikv_node_t *ikv2_lazy_root_load_object_key(ikv_lazy_state_t *state, ikv_n } else { - if (!ikv2_read_payload_from_file(lazy_root->file_path, entry->payload_offset, entry->payload_size, &owned_payload)) + if (!ikv2_read_payload_from_file(lazy_root, entry->payload_offset, entry->payload_size, &owned_payload)) return NULL; payload_data = owned_payload; } @@ -530,7 +594,6 @@ static ikv_node_t *ikv2_lazy_root_load_object_key(ikv_lazy_state_t *state, ikv_n return NULL; } - entry->loaded = true; return node; } @@ -610,6 +673,9 @@ static ikv_node_t *ikv2_parse_indexed_binary_buffer(uint8_t *buffer, size_t size ikv_free(root); return NULL; } + lazy_root->entries[i].key_length = (uint32_t)strlen(lazy_root->entries[i].key); + lazy_root->entries[i].key_hash = ikv2_hash_key(lazy_root->entries[i].key); + lazy_root->entries[i].next_in_bucket = IKV2_INDEX_NONE; } for (uint32_t i = 0; i < entry_count; ++i) @@ -624,6 +690,13 @@ static ikv_node_t *ikv2_parse_indexed_binary_buffer(uint8_t *buffer, size_t size } } + if (!ikv2_build_entry_buckets(lazy_root)) + { + ikv2_lazy_root_destroy(&lazy_root->base); + ikv_free(root); + return NULL; + } + lazy_root->source_kind = IKV2_SOURCE_MEMORY; lazy_root->memory_data = buffer; lazy_root->memory_size = size; @@ -758,6 +831,9 @@ static ikv_node_t *ikv2_parse_binary_file(const char *path) IKV_FCLOSE(file); return NULL; } + lazy_root->entries[i].key_length = (uint32_t)strlen(lazy_root->entries[i].key); + lazy_root->entries[i].key_hash = ikv2_hash_key(lazy_root->entries[i].key); + lazy_root->entries[i].next_in_bucket = IKV2_INDEX_NONE; } for (uint32_t i = 0; i < entry_count; ++i) @@ -773,7 +849,15 @@ static ikv_node_t *ikv2_parse_binary_file(const char *path) } } - IKV_FCLOSE(file); + if (!ikv2_build_entry_buckets(lazy_root)) + { + ikv2_lazy_root_destroy(&lazy_root->base); + ikv_free(root); + IKV_FCLOSE(file); + return NULL; + } + + lazy_root->file_handle = file; root->lazy_state = &lazy_root->base; return root; }