690 lines
16 KiB
C
690 lines
16 KiB
C
/*
|
|
* Copyright (c) 2016-2017 David Leeds <davidesleeds@gmail.com>
|
|
*
|
|
* Hashmap is free software; you can redistribute it and/or modify
|
|
* it under the terms of the MIT license. See LICENSE for details.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
|
|
#include "hashmap.h"
|
|
|
|
#ifndef HASHMAP_NOASSERT
|
|
#include <assert.h>
|
|
#define HASHMAP_ASSERT(expr) assert(expr)
|
|
#else
|
|
#define HASHMAP_ASSERT(expr)
|
|
#endif
|
|
|
|
/* Table sizes must be powers of 2 */
|
|
#define HASHMAP_SIZE_MIN (1 << 5) /* 32 */
|
|
#define HASHMAP_SIZE_DEFAULT (1 << 8) /* 256 */
|
|
#define HASHMAP_SIZE_MOD(map, val) ((val) & ((map)->table_size - 1))
|
|
|
|
/* Limit for probing is 1/2 of table_size */
|
|
#define HASHMAP_PROBE_LEN(map) ((map)->table_size >> 1)
|
|
/* Return the next linear probe index */
|
|
#define HASHMAP_PROBE_NEXT(map, index) HASHMAP_SIZE_MOD(map, (index) + 1)
|
|
|
|
/* Check if index b is less than or equal to index a */
|
|
#define HASHMAP_INDEX_LE(map, a, b) \
|
|
((a) == (b) || (((b) - (a)) & ((map)->table_size >> 1)) != 0)
|
|
|
|
|
|
struct hashmap_entry {
|
|
void *key;
|
|
void *data;
|
|
#ifdef HASHMAP_METRICS
|
|
size_t num_collisions;
|
|
#endif
|
|
};
|
|
|
|
|
|
/*
|
|
* Enforce a maximum 0.75 load factor.
|
|
*/
|
|
static inline size_t hashmap_table_min_size_calc(size_t num_entries)
|
|
{
|
|
return num_entries + (num_entries / 3);
|
|
}
|
|
|
|
/*
|
|
* Calculate the optimal table size, given the specified max number
|
|
* of elements.
|
|
*/
|
|
static size_t hashmap_table_size_calc(size_t num_entries)
|
|
{
|
|
size_t table_size;
|
|
size_t min_size;
|
|
|
|
table_size = hashmap_table_min_size_calc(num_entries);
|
|
|
|
/* Table size is always a power of 2 */
|
|
min_size = HASHMAP_SIZE_MIN;
|
|
while (min_size < table_size) {
|
|
min_size <<= 1;
|
|
}
|
|
return min_size;
|
|
}
|
|
|
|
/*
|
|
* Get a valid hash table index from a key.
|
|
*/
|
|
static inline size_t hashmap_calc_index(const struct hashmap *map,
|
|
const void *key)
|
|
{
|
|
return HASHMAP_SIZE_MOD(map, map->hash(key));
|
|
}
|
|
|
|
/*
|
|
* Return the next populated entry, starting with the specified one.
|
|
* Returns NULL if there are no more valid entries.
|
|
*/
|
|
static struct hashmap_entry *hashmap_entry_get_populated(
|
|
const struct hashmap *map, struct hashmap_entry *entry)
|
|
{
|
|
for (; entry < &map->table[map->table_size]; ++entry) {
|
|
if (entry->key) {
|
|
return entry;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Find the hashmap entry with the specified key, or an empty slot.
|
|
* Returns NULL if the entire table has been searched without finding a match.
|
|
*/
|
|
static struct hashmap_entry *hashmap_entry_find(const struct hashmap *map,
|
|
const void *key, bool find_empty)
|
|
{
|
|
size_t i;
|
|
size_t index;
|
|
size_t probe_len = HASHMAP_PROBE_LEN(map);
|
|
struct hashmap_entry *entry;
|
|
|
|
index = hashmap_calc_index(map, key);
|
|
|
|
/* Linear probing */
|
|
for (i = 0; i < probe_len; ++i) {
|
|
entry = &map->table[index];
|
|
if (!entry->key) {
|
|
if (find_empty) {
|
|
#ifdef HASHMAP_METRICS
|
|
entry->num_collisions = i;
|
|
#endif
|
|
return entry;
|
|
}
|
|
return NULL;
|
|
}
|
|
if (map->key_compare(key, entry->key) == 0) {
|
|
return entry;
|
|
}
|
|
index = HASHMAP_PROBE_NEXT(map, index);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Removes the specified entry and processes the proceeding entries to reduce
|
|
* the load factor and keep the chain continuous. This is a required
|
|
* step for hash maps using linear probing.
|
|
*/
|
|
static void hashmap_entry_remove(struct hashmap *map,
|
|
struct hashmap_entry *removed_entry)
|
|
{
|
|
size_t i;
|
|
#ifdef HASHMAP_METRICS
|
|
size_t removed_i = 0;
|
|
#endif
|
|
size_t index;
|
|
size_t entry_index;
|
|
size_t removed_index = (removed_entry - map->table);
|
|
struct hashmap_entry *entry;
|
|
|
|
/* Free the key */
|
|
if (map->key_free) {
|
|
map->key_free(removed_entry->key);
|
|
}
|
|
--map->num_entries;
|
|
|
|
/* Fill the free slot in the chain */
|
|
index = HASHMAP_PROBE_NEXT(map, removed_index);
|
|
for (i = 1; i < map->table_size; ++i) {
|
|
entry = &map->table[index];
|
|
if (!entry->key) {
|
|
/* Reached end of chain */
|
|
break;
|
|
}
|
|
entry_index = hashmap_calc_index(map, entry->key);
|
|
/* Shift in entries with an index <= to the removed slot */
|
|
if (HASHMAP_INDEX_LE(map, removed_index, entry_index)) {
|
|
#ifdef HASHMAP_METRICS
|
|
entry->num_collisions -= (i - removed_i);
|
|
removed_i = i;
|
|
#endif
|
|
memcpy(removed_entry, entry, sizeof(*removed_entry));
|
|
removed_index = index;
|
|
removed_entry = entry;
|
|
}
|
|
index = HASHMAP_PROBE_NEXT(map, index);
|
|
}
|
|
/* Clear the last removed entry */
|
|
memset(removed_entry, 0, sizeof(*removed_entry));
|
|
}
|
|
|
|
/*
|
|
* Reallocates the hash table to the new size and rehashes all entries.
|
|
* new_size MUST be a power of 2.
|
|
* Returns 0 on success and -1 on allocation or hash function failure.
|
|
*/
|
|
static int hashmap_rehash(struct hashmap *map, size_t new_size)
|
|
{
|
|
size_t old_size;
|
|
struct hashmap_entry *old_table;
|
|
struct hashmap_entry *new_table;
|
|
struct hashmap_entry *entry;
|
|
struct hashmap_entry *new_entry;
|
|
|
|
HASHMAP_ASSERT(new_size >= HASHMAP_SIZE_MIN);
|
|
HASHMAP_ASSERT((new_size & (new_size - 1)) == 0);
|
|
|
|
new_table = (struct hashmap_entry *)calloc(new_size,
|
|
sizeof(struct hashmap_entry));
|
|
if (!new_table) {
|
|
return -1;
|
|
}
|
|
/* Backup old elements in case of rehash failure */
|
|
old_size = map->table_size;
|
|
old_table = map->table;
|
|
map->table_size = new_size;
|
|
map->table = new_table;
|
|
/* Rehash */
|
|
for (entry = old_table; entry < &old_table[old_size]; ++entry) {
|
|
if (!entry->data) {
|
|
/* Only copy entries with data */
|
|
continue;
|
|
}
|
|
new_entry = hashmap_entry_find(map, entry->key, true);
|
|
if (!new_entry) {
|
|
/*
|
|
* The load factor is still too high with the new table
|
|
* size, or a poor hash function was used.
|
|
*/
|
|
goto revert;
|
|
}
|
|
/* Shallow copy (intentionally omits num_collisions) */
|
|
new_entry->key = entry->key;
|
|
new_entry->data = entry->data;
|
|
}
|
|
free(old_table);
|
|
return 0;
|
|
revert:
|
|
map->table_size = old_size;
|
|
map->table = old_table;
|
|
free(new_table);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Iterate through all entries and free all keys.
|
|
*/
|
|
static void hashmap_free_keys(struct hashmap *map)
|
|
{
|
|
struct hashmap_iter *iter;
|
|
|
|
if (!map->key_free) {
|
|
return;
|
|
}
|
|
for (iter = hashmap_iter(map); iter;
|
|
iter = hashmap_iter_next(map, iter)) {
|
|
map->key_free((void *)hashmap_iter_get_key(iter));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize an empty hashmap. A hash function and a key comparator are
|
|
* required.
|
|
*
|
|
* hash_func should return an even distribution of numbers between 0
|
|
* and SIZE_MAX varying on the key provided.
|
|
*
|
|
* key_compare_func should return 0 if the keys match, and non-zero otherwise.
|
|
*
|
|
* initial_size is optional, and may be set to the max number of entries
|
|
* expected to be put in the hash table. This is used as a hint to
|
|
* pre-allocate the hash table to the minimum size needed to avoid
|
|
* gratuitous rehashes. If initial_size 0, a default size will be used.
|
|
*/
|
|
int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *),
|
|
int (*key_compare_func)(const void *, const void *),
|
|
size_t initial_size)
|
|
{
|
|
HASHMAP_ASSERT(map != NULL);
|
|
HASHMAP_ASSERT(hash_func != NULL);
|
|
HASHMAP_ASSERT(key_compare_func != NULL);
|
|
|
|
if (!initial_size) {
|
|
initial_size = HASHMAP_SIZE_DEFAULT;
|
|
} else {
|
|
/* Convert init size to valid table size */
|
|
initial_size = hashmap_table_size_calc(initial_size);
|
|
}
|
|
map->table_size_init = initial_size;
|
|
map->table_size = initial_size;
|
|
map->num_entries = 0;
|
|
map->table = (struct hashmap_entry *)calloc(initial_size,
|
|
sizeof(struct hashmap_entry));
|
|
if (!map->table) {
|
|
return -1;
|
|
}
|
|
map->hash = hash_func;
|
|
map->key_compare = key_compare_func;
|
|
map->key_alloc = NULL;
|
|
map->key_free = NULL;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Free the hashmap and all associated memory.
|
|
*/
|
|
void hashmap_destroy(struct hashmap *map)
|
|
{
|
|
if (!map) {
|
|
return;
|
|
}
|
|
hashmap_free_keys(map);
|
|
free(map->table);
|
|
memset(map, 0, sizeof(*map));
|
|
}
|
|
|
|
/*
|
|
* Enable internal memory management of hash keys.
|
|
*/
|
|
void hashmap_set_key_alloc_funcs(struct hashmap *map,
|
|
void *(*key_alloc_func)(const void *),
|
|
void (*key_free_func)(void *))
|
|
{
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
map->key_alloc = key_alloc_func;
|
|
map->key_free = key_free_func;
|
|
}
|
|
|
|
/*
|
|
* Add an entry to the hashmap. If an entry with a matching key already
|
|
* exists and has a data pointer associated with it, the existing data
|
|
* pointer is returned, instead of assigning the new value. Compare
|
|
* the return value with the data passed in to determine if a new entry was
|
|
* created. Returns NULL if memory allocation failed.
|
|
*/
|
|
void *hashmap_put(struct hashmap *map, const void *key, void *data)
|
|
{
|
|
struct hashmap_entry *entry;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
HASHMAP_ASSERT(key != NULL);
|
|
|
|
/* Rehash with 2x capacity if load factor is approaching 0.75 */
|
|
if (map->table_size <= hashmap_table_min_size_calc(map->num_entries)) {
|
|
hashmap_rehash(map, map->table_size << 1);
|
|
}
|
|
entry = hashmap_entry_find(map, key, true);
|
|
if (!entry) {
|
|
/*
|
|
* Cannot find an empty slot. Either out of memory, or using
|
|
* a poor hash function. Attempt to rehash once to reduce
|
|
* chain length.
|
|
*/
|
|
if (hashmap_rehash(map, map->table_size << 1) < 0) {
|
|
return NULL;
|
|
}
|
|
entry = hashmap_entry_find(map, key, true);
|
|
if (!entry) {
|
|
return NULL;
|
|
}
|
|
}
|
|
if (!entry->key) {
|
|
/* Allocate copy of key to simplify memory management */
|
|
if (map->key_alloc) {
|
|
entry->key = map->key_alloc(key);
|
|
if (!entry->key) {
|
|
return NULL;
|
|
}
|
|
} else {
|
|
entry->key = (void *)key;
|
|
}
|
|
++map->num_entries;
|
|
} else if (entry->data) {
|
|
/* Do not overwrite existing data */
|
|
return entry->data;
|
|
}
|
|
entry->data = data;
|
|
return data;
|
|
}
|
|
|
|
/*
|
|
* Return the data pointer, or NULL if no entry exists.
|
|
*/
|
|
void *hashmap_get(const struct hashmap *map, const void *key)
|
|
{
|
|
struct hashmap_entry *entry;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
HASHMAP_ASSERT(key != NULL);
|
|
|
|
entry = hashmap_entry_find(map, key, false);
|
|
if (!entry) {
|
|
return NULL;
|
|
}
|
|
return entry->data;
|
|
}
|
|
|
|
/*
|
|
* Remove an entry with the specified key from the map.
|
|
* Returns the data pointer, or NULL, if no entry was found.
|
|
*/
|
|
void *hashmap_remove(struct hashmap *map, const void *key)
|
|
{
|
|
struct hashmap_entry *entry;
|
|
void *data;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
HASHMAP_ASSERT(key != NULL);
|
|
|
|
entry = hashmap_entry_find(map, key, false);
|
|
if (!entry) {
|
|
return NULL;
|
|
}
|
|
data = entry->data;
|
|
/* Clear the entry and make the chain contiguous */
|
|
hashmap_entry_remove(map, entry);
|
|
return data;
|
|
}
|
|
|
|
/*
|
|
* Remove all entries.
|
|
*/
|
|
void hashmap_clear(struct hashmap *map)
|
|
{
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
hashmap_free_keys(map);
|
|
map->num_entries = 0;
|
|
memset(map->table, 0, sizeof(struct hashmap_entry) * map->table_size);
|
|
}
|
|
|
|
/*
|
|
* Remove all entries and reset the hash table to its initial size.
|
|
*/
|
|
void hashmap_reset(struct hashmap *map)
|
|
{
|
|
struct hashmap_entry *new_table;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
hashmap_clear(map);
|
|
if (map->table_size == map->table_size_init) {
|
|
return;
|
|
}
|
|
new_table = (struct hashmap_entry *)realloc(map->table,
|
|
sizeof(struct hashmap_entry) * map->table_size_init);
|
|
if (!new_table) {
|
|
return;
|
|
}
|
|
map->table = new_table;
|
|
map->table_size = map->table_size_init;
|
|
}
|
|
|
|
/*
|
|
* Return the number of entries in the hash map.
|
|
*/
|
|
size_t hashmap_size(const struct hashmap *map)
|
|
{
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
return map->num_entries;
|
|
}
|
|
|
|
/*
|
|
* Get a new hashmap iterator. The iterator is an opaque
|
|
* pointer that may be used with hashmap_iter_*() functions.
|
|
* Hashmap iterators are INVALID after a put or remove operation is performed.
|
|
* hashmap_iter_remove() allows safe removal during iteration.
|
|
*/
|
|
struct hashmap_iter *hashmap_iter(const struct hashmap *map)
|
|
{
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
if (!map->num_entries) {
|
|
return NULL;
|
|
}
|
|
return (struct hashmap_iter *)hashmap_entry_get_populated(map,
|
|
map->table);
|
|
}
|
|
|
|
/*
|
|
* Return an iterator to the next hashmap entry. Returns NULL if there are
|
|
* no more entries.
|
|
*/
|
|
struct hashmap_iter *hashmap_iter_next(const struct hashmap *map,
|
|
const struct hashmap_iter *iter)
|
|
{
|
|
struct hashmap_entry *entry = (struct hashmap_entry *)iter;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
if (!iter) {
|
|
return NULL;
|
|
}
|
|
return (struct hashmap_iter *)hashmap_entry_get_populated(map,
|
|
entry + 1);
|
|
}
|
|
|
|
/*
|
|
* Remove the hashmap entry pointed to by this iterator and return an
|
|
* iterator to the next entry. Returns NULL if there are no more entries.
|
|
*/
|
|
struct hashmap_iter *hashmap_iter_remove(struct hashmap *map,
|
|
const struct hashmap_iter *iter)
|
|
{
|
|
struct hashmap_entry *entry = (struct hashmap_entry *)iter;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
if (!iter) {
|
|
return NULL;
|
|
}
|
|
if (!entry->key) {
|
|
/* Iterator is invalid, so just return the next valid entry */
|
|
return hashmap_iter_next(map, iter);
|
|
}
|
|
hashmap_entry_remove(map, entry);
|
|
return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry);
|
|
}
|
|
|
|
/*
|
|
* Return the key of the entry pointed to by the iterator.
|
|
*/
|
|
const void *hashmap_iter_get_key(const struct hashmap_iter *iter)
|
|
{
|
|
if (!iter) {
|
|
return NULL;
|
|
}
|
|
return (const void *)((struct hashmap_entry *)iter)->key;
|
|
}
|
|
|
|
/*
|
|
* Return the data of the entry pointed to by the iterator.
|
|
*/
|
|
void *hashmap_iter_get_data(const struct hashmap_iter *iter)
|
|
{
|
|
if (!iter) {
|
|
return NULL;
|
|
}
|
|
return ((struct hashmap_entry *)iter)->data;
|
|
}
|
|
|
|
/*
|
|
* Set the data pointer of the entry pointed to by the iterator.
|
|
*/
|
|
void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data)
|
|
{
|
|
if (!iter) {
|
|
return;
|
|
}
|
|
((struct hashmap_entry *)iter)->data = data;
|
|
}
|
|
|
|
/*
|
|
* Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*()
|
|
* interface, this function supports calls to hashmap_remove() during iteration.
|
|
* However, it is an error to put or remove an entry other than the current one,
|
|
* and doing so will immediately halt iteration and return an error.
|
|
* Iteration is stopped if func returns non-zero. Returns func's return
|
|
* value if it is < 0, otherwise, 0.
|
|
*/
|
|
int hashmap_foreach(const struct hashmap *map,
|
|
int (*func)(const void *, void *, void *), void *arg)
|
|
{
|
|
struct hashmap_entry *entry;
|
|
size_t num_entries;
|
|
const void *key;
|
|
int rc;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
HASHMAP_ASSERT(func != NULL);
|
|
|
|
entry = map->table;
|
|
for (entry = map->table; entry < &map->table[map->table_size];
|
|
++entry) {
|
|
if (!entry->key) {
|
|
continue;
|
|
}
|
|
num_entries = map->num_entries;
|
|
key = entry->key;
|
|
rc = func(entry->key, entry->data, arg);
|
|
if (rc < 0) {
|
|
return rc;
|
|
}
|
|
if (rc > 0) {
|
|
return 0;
|
|
}
|
|
/* Run this entry again if func() deleted it */
|
|
if (entry->key != key) {
|
|
--entry;
|
|
} else if (num_entries != map->num_entries) {
|
|
/* Stop immediately if func put/removed another entry */
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Default hash function for string keys.
|
|
* This is an implementation of the well-documented Jenkins one-at-a-time
|
|
* hash function.
|
|
*/
|
|
size_t hashmap_hash_string(const void *key)
|
|
{
|
|
const char *key_str = (const char *)key;
|
|
size_t hash = 0;
|
|
|
|
for (; *key_str; ++key_str) {
|
|
hash += *key_str;
|
|
hash += (hash << 10);
|
|
hash ^= (hash >> 6);
|
|
}
|
|
hash += (hash << 3);
|
|
hash ^= (hash >> 11);
|
|
hash += (hash << 15);
|
|
return hash;
|
|
}
|
|
|
|
/*
|
|
* Default key comparator function for string keys.
|
|
*/
|
|
int hashmap_compare_string(const void *a, const void *b)
|
|
{
|
|
return strcmp((const char *)a, (const char *)b);
|
|
}
|
|
|
|
/*
|
|
* Default key allocation function for string keys. Use free() for the
|
|
* key_free_func.
|
|
*/
|
|
void *hashmap_alloc_key_string(const void *key)
|
|
{
|
|
return (void *)strdup((const char *)key);
|
|
}
|
|
|
|
#ifdef HASHMAP_METRICS
|
|
/*
|
|
* Return the load factor.
|
|
*/
|
|
double hashmap_load_factor(const struct hashmap *map)
|
|
{
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
if (!map->table_size) {
|
|
return 0;
|
|
}
|
|
return (double)map->num_entries / map->table_size;
|
|
}
|
|
|
|
/*
|
|
* Return the average number of collisions per entry.
|
|
*/
|
|
double hashmap_collisions_mean(const struct hashmap *map)
|
|
{
|
|
struct hashmap_entry *entry;
|
|
size_t total_collisions = 0;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
if (!map->num_entries) {
|
|
return 0;
|
|
}
|
|
for (entry = map->table; entry < &map->table[map->table_size];
|
|
++entry) {
|
|
if (!entry->key) {
|
|
continue;
|
|
}
|
|
total_collisions += entry->num_collisions;
|
|
}
|
|
return (double)total_collisions / map->num_entries;
|
|
}
|
|
|
|
/*
|
|
* Return the variance between entry collisions. The higher the variance,
|
|
* the more likely the hash function is poor and is resulting in clustering.
|
|
*/
|
|
double hashmap_collisions_variance(const struct hashmap *map)
|
|
{
|
|
struct hashmap_entry *entry;
|
|
double mean_collisions;
|
|
double variance;
|
|
double total_variance = 0;
|
|
|
|
HASHMAP_ASSERT(map != NULL);
|
|
|
|
if (!map->num_entries) {
|
|
return 0;
|
|
}
|
|
mean_collisions = hashmap_collisions_mean(map);
|
|
for (entry = map->table; entry < &map->table[map->table_size];
|
|
++entry) {
|
|
if (!entry->key) {
|
|
continue;
|
|
}
|
|
variance = (double)entry->num_collisions - mean_collisions;
|
|
total_variance += variance * variance;
|
|
}
|
|
return total_variance / map->num_entries;
|
|
}
|
|
#endif
|