diff --git a/source/compiler/CMakeLists.txt b/source/compiler/CMakeLists.txt index a4d48a2..146d8d9 100644 --- a/source/compiler/CMakeLists.txt +++ b/source/compiler/CMakeLists.txt @@ -68,8 +68,9 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) # The Pawn compiler shared library set(PAWNC_SRCS - hashmap/hashmap.c - hashmap/hashmap.h + hashtable/wrap_hashtable.c + hashtable/wrap_hashtable.h + hashtable/hashtable.h libpawnc.c lstring.c lstring.h diff --git a/source/compiler/hashmap/LICENSE b/source/compiler/hashmap/LICENSE deleted file mode 100644 index 2d576cc..0000000 --- a/source/compiler/hashmap/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2016 David Leeds - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/source/compiler/hashmap/hashmap.c b/source/compiler/hashmap/hashmap.c deleted file mode 100644 index d169457..0000000 --- a/source/compiler/hashmap/hashmap.c +++ /dev/null @@ -1,689 +0,0 @@ -/* - * Copyright (c) 2016-2017 David Leeds - * - * Hashmap is free software; you can redistribute it and/or modify - * it under the terms of the MIT license. See LICENSE for details. - */ - -#include -#include -#include -#include - -#include "hashmap.h" - -#ifndef HASHMAP_NOASSERT -#include -#define HASHMAP_ASSERT(expr) assert(expr) -#else -#define HASHMAP_ASSERT(expr) -#endif - -/* Table sizes must be powers of 2 */ -#define HASHMAP_SIZE_MIN (1 << 5) /* 32 */ -#define HASHMAP_SIZE_DEFAULT (1 << 8) /* 256 */ -#define HASHMAP_SIZE_MOD(map, val) ((val) & ((map)->table_size - 1)) - -/* Limit for probing is 1/2 of table_size */ -#define HASHMAP_PROBE_LEN(map) ((map)->table_size >> 1) -/* Return the next linear probe index */ -#define HASHMAP_PROBE_NEXT(map, index) HASHMAP_SIZE_MOD(map, (index) + 1) - -/* Check if index b is less than or equal to index a */ -#define HASHMAP_INDEX_LE(map, a, b) \ - ((a) == (b) || (((b) - (a)) & ((map)->table_size >> 1)) != 0) - - -struct hashmap_entry { - void *key; - void *data; -#ifdef HASHMAP_METRICS - size_t num_collisions; -#endif -}; - - -/* - * Enforce a maximum 0.75 load factor. - */ -static inline size_t hashmap_table_min_size_calc(size_t num_entries) -{ - return num_entries + (num_entries / 3); -} - -/* - * Calculate the optimal table size, given the specified max number - * of elements. - */ -static size_t hashmap_table_size_calc(size_t num_entries) -{ - size_t table_size; - size_t min_size; - - table_size = hashmap_table_min_size_calc(num_entries); - - /* Table size is always a power of 2 */ - min_size = HASHMAP_SIZE_MIN; - while (min_size < table_size) { - min_size <<= 1; - } - return min_size; -} - -/* - * Get a valid hash table index from a key. - */ -static inline size_t hashmap_calc_index(const struct hashmap *map, - const void *key) -{ - return HASHMAP_SIZE_MOD(map, map->hash(key)); -} - -/* - * Return the next populated entry, starting with the specified one. - * Returns NULL if there are no more valid entries. - */ -static struct hashmap_entry *hashmap_entry_get_populated( - const struct hashmap *map, struct hashmap_entry *entry) -{ - for (; entry < &map->table[map->table_size]; ++entry) { - if (entry->key) { - return entry; - } - } - return NULL; -} - -/* - * Find the hashmap entry with the specified key, or an empty slot. - * Returns NULL if the entire table has been searched without finding a match. - */ -static struct hashmap_entry *hashmap_entry_find(const struct hashmap *map, - const void *key, bool find_empty) -{ - size_t i; - size_t index; - size_t probe_len = HASHMAP_PROBE_LEN(map); - struct hashmap_entry *entry; - - index = hashmap_calc_index(map, key); - - /* Linear probing */ - for (i = 0; i < probe_len; ++i) { - entry = &map->table[index]; - if (!entry->key) { - if (find_empty) { -#ifdef HASHMAP_METRICS - entry->num_collisions = i; -#endif - return entry; - } - return NULL; - } - if (map->key_compare(key, entry->key) == 0) { - return entry; - } - index = HASHMAP_PROBE_NEXT(map, index); - } - return NULL; -} - -/* - * Removes the specified entry and processes the proceeding entries to reduce - * the load factor and keep the chain continuous. This is a required - * step for hash maps using linear probing. - */ -static void hashmap_entry_remove(struct hashmap *map, - struct hashmap_entry *removed_entry) -{ - size_t i; -#ifdef HASHMAP_METRICS - size_t removed_i = 0; -#endif - size_t index; - size_t entry_index; - size_t removed_index = (removed_entry - map->table); - struct hashmap_entry *entry; - - /* Free the key */ - if (map->key_free) { - map->key_free(removed_entry->key); - } - --map->num_entries; - - /* Fill the free slot in the chain */ - index = HASHMAP_PROBE_NEXT(map, removed_index); - for (i = 1; i < map->table_size; ++i) { - entry = &map->table[index]; - if (!entry->key) { - /* Reached end of chain */ - break; - } - entry_index = hashmap_calc_index(map, entry->key); - /* Shift in entries with an index <= to the removed slot */ - if (HASHMAP_INDEX_LE(map, removed_index, entry_index)) { -#ifdef HASHMAP_METRICS - entry->num_collisions -= (i - removed_i); - removed_i = i; -#endif - memcpy(removed_entry, entry, sizeof(*removed_entry)); - removed_index = index; - removed_entry = entry; - } - index = HASHMAP_PROBE_NEXT(map, index); - } - /* Clear the last removed entry */ - memset(removed_entry, 0, sizeof(*removed_entry)); -} - -/* - * Reallocates the hash table to the new size and rehashes all entries. - * new_size MUST be a power of 2. - * Returns 0 on success and -1 on allocation or hash function failure. - */ -static int hashmap_rehash(struct hashmap *map, size_t new_size) -{ - size_t old_size; - struct hashmap_entry *old_table; - struct hashmap_entry *new_table; - struct hashmap_entry *entry; - struct hashmap_entry *new_entry; - - HASHMAP_ASSERT(new_size >= HASHMAP_SIZE_MIN); - HASHMAP_ASSERT((new_size & (new_size - 1)) == 0); - - new_table = (struct hashmap_entry *)calloc(new_size, - sizeof(struct hashmap_entry)); - if (!new_table) { - return -1; - } - /* Backup old elements in case of rehash failure */ - old_size = map->table_size; - old_table = map->table; - map->table_size = new_size; - map->table = new_table; - /* Rehash */ - for (entry = old_table; entry < &old_table[old_size]; ++entry) { - if (!entry->data) { - /* Only copy entries with data */ - continue; - } - new_entry = hashmap_entry_find(map, entry->key, true); - if (!new_entry) { - /* - * The load factor is still too high with the new table - * size, or a poor hash function was used. - */ - goto revert; - } - /* Shallow copy (intentionally omits num_collisions) */ - new_entry->key = entry->key; - new_entry->data = entry->data; - } - free(old_table); - return 0; -revert: - map->table_size = old_size; - map->table = old_table; - free(new_table); - return -1; -} - -/* - * Iterate through all entries and free all keys. - */ -static void hashmap_free_keys(struct hashmap *map) -{ - struct hashmap_iter *iter; - - if (!map->key_free) { - return; - } - for (iter = hashmap_iter(map); iter; - iter = hashmap_iter_next(map, iter)) { - map->key_free((void *)hashmap_iter_get_key(iter)); - } -} - -/* - * Initialize an empty hashmap. A hash function and a key comparator are - * required. - * - * hash_func should return an even distribution of numbers between 0 - * and SIZE_MAX varying on the key provided. - * - * key_compare_func should return 0 if the keys match, and non-zero otherwise. - * - * initial_size is optional, and may be set to the max number of entries - * expected to be put in the hash table. This is used as a hint to - * pre-allocate the hash table to the minimum size needed to avoid - * gratuitous rehashes. If initial_size 0, a default size will be used. - */ -int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), - int (*key_compare_func)(const void *, const void *), - size_t initial_size) -{ - HASHMAP_ASSERT(map != NULL); - HASHMAP_ASSERT(hash_func != NULL); - HASHMAP_ASSERT(key_compare_func != NULL); - - if (!initial_size) { - initial_size = HASHMAP_SIZE_DEFAULT; - } else { - /* Convert init size to valid table size */ - initial_size = hashmap_table_size_calc(initial_size); - } - map->table_size_init = initial_size; - map->table_size = initial_size; - map->num_entries = 0; - map->table = (struct hashmap_entry *)calloc(initial_size, - sizeof(struct hashmap_entry)); - if (!map->table) { - return -1; - } - map->hash = hash_func; - map->key_compare = key_compare_func; - map->key_alloc = NULL; - map->key_free = NULL; - return 0; -} - -/* - * Free the hashmap and all associated memory. - */ -void hashmap_destroy(struct hashmap *map) -{ - if (!map) { - return; - } - hashmap_free_keys(map); - free(map->table); - memset(map, 0, sizeof(*map)); -} - -/* - * Enable internal memory management of hash keys. - */ -void hashmap_set_key_alloc_funcs(struct hashmap *map, - void *(*key_alloc_func)(const void *), - void (*key_free_func)(void *)) -{ - HASHMAP_ASSERT(map != NULL); - - map->key_alloc = key_alloc_func; - map->key_free = key_free_func; -} - -/* - * Add an entry to the hashmap. If an entry with a matching key already - * exists and has a data pointer associated with it, the existing data - * pointer is returned, instead of assigning the new value. Compare - * the return value with the data passed in to determine if a new entry was - * created. Returns NULL if memory allocation failed. - */ -void *hashmap_put(struct hashmap *map, const void *key, void *data) -{ - struct hashmap_entry *entry; - - HASHMAP_ASSERT(map != NULL); - HASHMAP_ASSERT(key != NULL); - - /* Rehash with 2x capacity if load factor is approaching 0.75 */ - if (map->table_size <= hashmap_table_min_size_calc(map->num_entries)) { - hashmap_rehash(map, map->table_size << 1); - } - entry = hashmap_entry_find(map, key, true); - if (!entry) { - /* - * Cannot find an empty slot. Either out of memory, or using - * a poor hash function. Attempt to rehash once to reduce - * chain length. - */ - if (hashmap_rehash(map, map->table_size << 1) < 0) { - return NULL; - } - entry = hashmap_entry_find(map, key, true); - if (!entry) { - return NULL; - } - } - if (!entry->key) { - /* Allocate copy of key to simplify memory management */ - if (map->key_alloc) { - entry->key = map->key_alloc(key); - if (!entry->key) { - return NULL; - } - } else { - entry->key = (void *)key; - } - ++map->num_entries; - } else if (entry->data) { - /* Do not overwrite existing data */ - return entry->data; - } - entry->data = data; - return data; -} - -/* - * Return the data pointer, or NULL if no entry exists. - */ -void *hashmap_get(const struct hashmap *map, const void *key) -{ - struct hashmap_entry *entry; - - HASHMAP_ASSERT(map != NULL); - HASHMAP_ASSERT(key != NULL); - - entry = hashmap_entry_find(map, key, false); - if (!entry) { - return NULL; - } - return entry->data; -} - -/* - * Remove an entry with the specified key from the map. - * Returns the data pointer, or NULL, if no entry was found. - */ -void *hashmap_remove(struct hashmap *map, const void *key) -{ - struct hashmap_entry *entry; - void *data; - - HASHMAP_ASSERT(map != NULL); - HASHMAP_ASSERT(key != NULL); - - entry = hashmap_entry_find(map, key, false); - if (!entry) { - return NULL; - } - data = entry->data; - /* Clear the entry and make the chain contiguous */ - hashmap_entry_remove(map, entry); - return data; -} - -/* - * Remove all entries. - */ -void hashmap_clear(struct hashmap *map) -{ - HASHMAP_ASSERT(map != NULL); - - hashmap_free_keys(map); - map->num_entries = 0; - memset(map->table, 0, sizeof(struct hashmap_entry) * map->table_size); -} - -/* - * Remove all entries and reset the hash table to its initial size. - */ -void hashmap_reset(struct hashmap *map) -{ - struct hashmap_entry *new_table; - - HASHMAP_ASSERT(map != NULL); - - hashmap_clear(map); - if (map->table_size == map->table_size_init) { - return; - } - new_table = (struct hashmap_entry *)realloc(map->table, - sizeof(struct hashmap_entry) * map->table_size_init); - if (!new_table) { - return; - } - map->table = new_table; - map->table_size = map->table_size_init; -} - -/* - * Return the number of entries in the hash map. - */ -size_t hashmap_size(const struct hashmap *map) -{ - HASHMAP_ASSERT(map != NULL); - - return map->num_entries; -} - -/* - * Get a new hashmap iterator. The iterator is an opaque - * pointer that may be used with hashmap_iter_*() functions. - * Hashmap iterators are INVALID after a put or remove operation is performed. - * hashmap_iter_remove() allows safe removal during iteration. - */ -struct hashmap_iter *hashmap_iter(const struct hashmap *map) -{ - HASHMAP_ASSERT(map != NULL); - - if (!map->num_entries) { - return NULL; - } - return (struct hashmap_iter *)hashmap_entry_get_populated(map, - map->table); -} - -/* - * Return an iterator to the next hashmap entry. Returns NULL if there are - * no more entries. - */ -struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, - const struct hashmap_iter *iter) -{ - struct hashmap_entry *entry = (struct hashmap_entry *)iter; - - HASHMAP_ASSERT(map != NULL); - - if (!iter) { - return NULL; - } - return (struct hashmap_iter *)hashmap_entry_get_populated(map, - entry + 1); -} - -/* - * Remove the hashmap entry pointed to by this iterator and return an - * iterator to the next entry. Returns NULL if there are no more entries. - */ -struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, - const struct hashmap_iter *iter) -{ - struct hashmap_entry *entry = (struct hashmap_entry *)iter; - - HASHMAP_ASSERT(map != NULL); - - if (!iter) { - return NULL; - } - if (!entry->key) { - /* Iterator is invalid, so just return the next valid entry */ - return hashmap_iter_next(map, iter); - } - hashmap_entry_remove(map, entry); - return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry); -} - -/* - * Return the key of the entry pointed to by the iterator. - */ -const void *hashmap_iter_get_key(const struct hashmap_iter *iter) -{ - if (!iter) { - return NULL; - } - return (const void *)((struct hashmap_entry *)iter)->key; -} - -/* - * Return the data of the entry pointed to by the iterator. - */ -void *hashmap_iter_get_data(const struct hashmap_iter *iter) -{ - if (!iter) { - return NULL; - } - return ((struct hashmap_entry *)iter)->data; -} - -/* - * Set the data pointer of the entry pointed to by the iterator. - */ -void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data) -{ - if (!iter) { - return; - } - ((struct hashmap_entry *)iter)->data = data; -} - -/* - * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() - * interface, this function supports calls to hashmap_remove() during iteration. - * However, it is an error to put or remove an entry other than the current one, - * and doing so will immediately halt iteration and return an error. - * Iteration is stopped if func returns non-zero. Returns func's return - * value if it is < 0, otherwise, 0. - */ -int hashmap_foreach(const struct hashmap *map, - int (*func)(const void *, void *, void *), void *arg) -{ - struct hashmap_entry *entry; - size_t num_entries; - const void *key; - int rc; - - HASHMAP_ASSERT(map != NULL); - HASHMAP_ASSERT(func != NULL); - - entry = map->table; - for (entry = map->table; entry < &map->table[map->table_size]; - ++entry) { - if (!entry->key) { - continue; - } - num_entries = map->num_entries; - key = entry->key; - rc = func(entry->key, entry->data, arg); - if (rc < 0) { - return rc; - } - if (rc > 0) { - return 0; - } - /* Run this entry again if func() deleted it */ - if (entry->key != key) { - --entry; - } else if (num_entries != map->num_entries) { - /* Stop immediately if func put/removed another entry */ - return -1; - } - } - return 0; -} - -/* - * Default hash function for string keys. - * This is an implementation of the well-documented Jenkins one-at-a-time - * hash function. - */ -size_t hashmap_hash_string(const void *key) -{ - const char *key_str = (const char *)key; - size_t hash = 0; - - for (; *key_str; ++key_str) { - hash += *key_str; - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - -/* - * Default key comparator function for string keys. - */ -int hashmap_compare_string(const void *a, const void *b) -{ - return strcmp((const char *)a, (const char *)b); -} - -/* - * Default key allocation function for string keys. Use free() for the - * key_free_func. - */ -void *hashmap_alloc_key_string(const void *key) -{ - return (void *)strdup((const char *)key); -} - -#ifdef HASHMAP_METRICS -/* - * Return the load factor. - */ -double hashmap_load_factor(const struct hashmap *map) -{ - HASHMAP_ASSERT(map != NULL); - - if (!map->table_size) { - return 0; - } - return (double)map->num_entries / map->table_size; -} - -/* - * Return the average number of collisions per entry. - */ -double hashmap_collisions_mean(const struct hashmap *map) -{ - struct hashmap_entry *entry; - size_t total_collisions = 0; - - HASHMAP_ASSERT(map != NULL); - - if (!map->num_entries) { - return 0; - } - for (entry = map->table; entry < &map->table[map->table_size]; - ++entry) { - if (!entry->key) { - continue; - } - total_collisions += entry->num_collisions; - } - return (double)total_collisions / map->num_entries; -} - -/* - * Return the variance between entry collisions. The higher the variance, - * the more likely the hash function is poor and is resulting in clustering. - */ -double hashmap_collisions_variance(const struct hashmap *map) -{ - struct hashmap_entry *entry; - double mean_collisions; - double variance; - double total_variance = 0; - - HASHMAP_ASSERT(map != NULL); - - if (!map->num_entries) { - return 0; - } - mean_collisions = hashmap_collisions_mean(map); - for (entry = map->table; entry < &map->table[map->table_size]; - ++entry) { - if (!entry->key) { - continue; - } - variance = (double)entry->num_collisions - mean_collisions; - total_variance += variance * variance; - } - return total_variance / map->num_entries; -} -#endif diff --git a/source/compiler/hashmap/hashmap.h b/source/compiler/hashmap/hashmap.h deleted file mode 100644 index b4ce16e..0000000 --- a/source/compiler/hashmap/hashmap.h +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2016-2017 David Leeds - * - * Hashmap is free software; you can redistribute it and/or modify - * it under the terms of the MIT license. See LICENSE for details. - */ - -#ifndef __HASHMAP_H__ -#define __HASHMAP_H__ - -/* - * Define HASHMAP_METRICS to compile in performance analysis - * functions for use in assessing hash function performance. - */ -/* #define HASHMAP_METRICS */ - -/* - * Define HASHMAP_NOASSERT to compile out all assertions used internally. - */ -/* #define HASHMAP_NOASSERT */ - -/* - * Macros to declare type-specific versions of hashmap_*() functions to - * allow compile-time type checking and avoid the need for type casting. - */ -#define HASHMAP_FUNCS_DECLARE(name, key_type, data_type) \ - data_type *name##_hashmap_put(struct hashmap *map, key_type *key, \ - data_type *data); \ - data_type *name##_hashmap_get(const struct hashmap *map, \ - key_type *key); \ - data_type *name##_hashmap_remove(struct hashmap *map, \ - key_type *key); \ - key_type *name##_hashmap_iter_get_key( \ - const struct hashmap_iter *iter); \ - data_type *name##_hashmap_iter_get_data( \ - const struct hashmap_iter *iter); \ - void name##_hashmap_iter_set_data(const struct hashmap_iter *iter, \ - data_type *data); \ - int name##_hashmap_foreach(const struct hashmap *map, \ - int (*func)(key_type *, data_type *, void *), void *arg); - -#define HASHMAP_FUNCS_CREATE(name, key_type, data_type) \ - data_type *name##_hashmap_put(struct hashmap *map, key_type *key, \ - data_type *data) \ - { \ - return (data_type *)hashmap_put(map, (const void *)key, \ - (void *)data); \ - } \ - data_type *name##_hashmap_get(const struct hashmap *map, \ - key_type *key) \ - { \ - return (data_type *)hashmap_get(map, (const void *)key); \ - } \ - data_type *name##_hashmap_remove(struct hashmap *map, \ - key_type *key) \ - { \ - return (data_type *)hashmap_remove(map, (const void *)key); \ - } \ - key_type *name##_hashmap_iter_get_key( \ - const struct hashmap_iter *iter) \ - { \ - return (key_type *)hashmap_iter_get_key(iter); \ - } \ - data_type *name##_hashmap_iter_get_data( \ - const struct hashmap_iter *iter) \ - { \ - return (data_type *)hashmap_iter_get_data(iter); \ - } \ - void name##_hashmap_iter_set_data(const struct hashmap_iter *iter, \ - data_type *data) \ - { \ - hashmap_iter_set_data(iter, (void *)data); \ - } \ - struct __##name##_hashmap_foreach_state { \ - int (*func)(key_type *, data_type *, void *); \ - void *arg; \ - }; \ - static inline int __##name##_hashmap_foreach_callback(const void *key, \ - void *data, void *arg) \ - { \ - struct __##name##_hashmap_foreach_state *s = \ - (struct __##name##_hashmap_foreach_state *)arg; \ - return s->func((key_type *)key, (data_type *)data, s->arg); \ - } \ - int name##_hashmap_foreach(const struct hashmap *map, \ - int (*func)(key_type *, data_type *, void *), void *arg) \ - { \ - struct __##name##_hashmap_foreach_state s = { func, arg }; \ - return hashmap_foreach(map, \ - __##name##_hashmap_foreach_callback, &s); \ - } - - -struct hashmap_iter; -struct hashmap_entry; - -/* - * The hashmap state structure. - */ -struct hashmap { - size_t table_size_init; - size_t table_size; - size_t num_entries; - struct hashmap_entry *table; - size_t (*hash)(const void *); - int (*key_compare)(const void *, const void *); - void *(*key_alloc)(const void *); - void (*key_free)(void *); -}; - -/* - * Initialize an empty hashmap. A hash function and a key comparator are - * required. - * - * hash_func should return an even distribution of numbers between 0 - * and SIZE_MAX varying on the key provided. - * - * key_compare_func should return 0 if the keys match, and non-zero otherwise. - * - * initial_size is optional, and may be set to the max number of entries - * expected to be put in the hash table. This is used as a hint to - * pre-allocate the hash table to the minimum size to avoid gratuitous rehashes. - * If initial_size 0, a default size will be used. - */ -int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), - int (*key_compare_func)(const void *, const void *), - size_t initial_size); - -/* - * Free the hashmap and all associated memory. - */ -void hashmap_destroy(struct hashmap *map); - -/* - * Enable internal memory allocation and management of hash keys. - */ -void hashmap_set_key_alloc_funcs(struct hashmap *map, - void *(*key_alloc_func)(const void *), - void (*key_free_func)(void *)); - -/* - * Add an entry to the hashmap. If an entry with a matching key already - * exists and has a data pointer associated with it, the existing data - * pointer is returned, instead of assigning the new value. Compare - * the return value with the data passed in to determine if a new entry was - * created. Returns NULL if memory allocation failed. - */ -void *hashmap_put(struct hashmap *map, const void *key, void *data); - -/* - * Return the data pointer, or NULL if no entry exists. - */ -void *hashmap_get(const struct hashmap *map, const void *key); - -/* - * Remove an entry with the specified key from the map. - * Returns the data pointer, or NULL, if no entry was found. - */ -void *hashmap_remove(struct hashmap *map, const void *key); - -/* - * Remove all entries. - */ -void hashmap_clear(struct hashmap *map); - -/* - * Remove all entries and reset the hash table to its initial size. - */ -void hashmap_reset(struct hashmap *map); - -/* - * Return the number of entries in the hash map. - */ -size_t hashmap_size(const struct hashmap *map); - -/* - * Get a new hashmap iterator. The iterator is an opaque - * pointer that may be used with hashmap_iter_*() functions. - * Hashmap iterators are INVALID after a put or remove operation is performed. - * hashmap_iter_remove() allows safe removal during iteration. - */ -struct hashmap_iter *hashmap_iter(const struct hashmap *map); - -/* - * Return an iterator to the next hashmap entry. Returns NULL if there are - * no more entries. - */ -struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, - const struct hashmap_iter *iter); - -/* - * Remove the hashmap entry pointed to by this iterator and returns an - * iterator to the next entry. Returns NULL if there are no more entries. - */ -struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, - const struct hashmap_iter *iter); - -/* - * Return the key of the entry pointed to by the iterator. - */ -const void *hashmap_iter_get_key(const struct hashmap_iter *iter); - -/* - * Return the data of the entry pointed to by the iterator. - */ -void *hashmap_iter_get_data(const struct hashmap_iter *iter); - -/* - * Set the data pointer of the entry pointed to by the iterator. - */ -void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data); - -/* - * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() - * interface, this function supports calls to hashmap_remove() during iteration. - * However, it is an error to put or remove an entry other than the current one, - * and doing so will immediately halt iteration and return an error. - * Iteration is stopped if func returns non-zero. Returns func's return - * value if it is < 0, otherwise, 0. - */ -int hashmap_foreach(const struct hashmap *map, - int (*func)(const void *, void *, void *), void *arg); - -/* - * Default hash function for string keys. - * This is an implementation of the well-documented Jenkins one-at-a-time - * hash function. - */ -size_t hashmap_hash_string(const void *key); - -/* - * Default key comparator function for string keys. - */ -int hashmap_compare_string(const void *a, const void *b); - -/* - * Default key allocation function for string keys. Use free() for the - * key_free_func. - */ -void *hashmap_alloc_key_string(const void *key); - - -#ifdef HASHMAP_METRICS -/* - * Return the load factor. - */ -double hashmap_load_factor(const struct hashmap *map); - -/* - * Return the average number of collisions per entry. - */ -double hashmap_collisions_mean(const struct hashmap *map); - -/* - * Return the variance between entry collisions. The higher the variance, - * the more likely the hash function is poor and is resulting in clustering. - */ -double hashmap_collisions_variance(const struct hashmap *map); -#endif - - -#endif /* __HASHMAP_H__ */ - diff --git a/source/compiler/hashtable/hashtable.h b/source/compiler/hashtable/hashtable.h new file mode 100644 index 0000000..5c7474e --- /dev/null +++ b/source/compiler/hashtable/hashtable.h @@ -0,0 +1,742 @@ +/* +------------------------------------------------------------------------------ + Licensing information can be found at the end of the file. +------------------------------------------------------------------------------ + +hashtable.h - v1.1 - Cache efficient hash table implementation for C/C++. + +Do this: + #define HASHTABLE_IMPLEMENTATION +before you include this file in *one* C/C++ file to create the implementation. +*/ + +#ifndef hashtable_h +#define hashtable_h + +#ifndef HASHTABLE_U64 + #define HASHTABLE_U64 unsigned long long +#endif + +typedef struct hashtable_t hashtable_t; + +void hashtable_init( hashtable_t* table, int item_size, int initial_capacity, void* memctx ); +void hashtable_term( hashtable_t* table ); + +int hashtable_insert( hashtable_t* table, HASHTABLE_U64 key, void const* item ); +void hashtable_remove( hashtable_t* table, HASHTABLE_U64 key ); +void hashtable_clear( hashtable_t* table ); + +void* hashtable_find( hashtable_t const* table, HASHTABLE_U64 key ); + +int hashtable_count( hashtable_t const* table ); +void* hashtable_items( hashtable_t const* table ); +HASHTABLE_U64 const* hashtable_keys( hashtable_t const* table ); + +void hashtable_swap( hashtable_t* table, int index_a, int index_b ); + + +#endif /* hashtable_h */ + + +/** + +Example +======= + + #define HASHTABLE_IMPLEMENTATION + #include "hashtable.h" + + #include // for printf + + int main( int argc, char** argv ) + { + (void) argc, argv; + + // define some example key and value types + typedef struct key_t { int a, b, c; } key_t; + typedef struct value_t + { + char id[ 64 ]; + float x, y, z; + int n[ 250 ]; + } value_t; + + // create a couple of sample keys + // (don't bother to fill in the fields for this sample) + key_t* key_a = (key_t*)malloc( sizeof( key_t ) ); + key_t* key_b = (key_t*)malloc( sizeof( key_t ) ); + + hashtable_t table; + hashtable_init( &table, sizeof( value_t ), 256, 0 ); + + { + // values are copied into the table, not stored by pointer + // (don't bother to fill in all the fields for this sample) + value_t value_a = { "Item A" }; + value_t value_b = { "Item B" }; + hashtable_insert( &table, (HASHTABLE_U64)(uintptr_t)key_a, &value_a ); + hashtable_insert( &table, (HASHTABLE_U64)(uintptr_t)key_b, &value_b ); + } + + // find the values by key + value_t* value_a = (value_t*)hashtable_find( &table, (HASHTABLE_U64)(uintptr_t)key_a ); + printf( "First item: %s\n", value_a->id ); + value_t* value_b = (value_t*)hashtable_find( &table, (HASHTABLE_U64)(uintptr_t)key_b ); + printf( "Second item: %s\n", value_b->id ); + + // remove one of the items + hashtable_remove( &table, (HASHTABLE_U64)(uintptr_t)key_a ); + + // it is possible to enumerate keys and values + int count = hashtable_count( &table ); + HASHTABLE_U64 const* keys = hashtable_keys( &table ); + value_t* items = (value_t*)hashtable_items( &table ); + printf( "\nEnumeration:\n" ); + for( int i = 0; i < count; ++i ) + printf( " 0x%X : %s\n", (int) keys[ i ], items[ i ].id ); + + // cleanup + hashtable_term( &table ); + free( key_b ); + free( key_a ); + return 0; + } + + +API Documentation +================= + +hashtable.h is a small library for storing values in a table and access them efficiently by a 64-bit key. It is a +single-header library, and does not need any .lib files or other binaries, or any build scripts. To use it, you just +include hashtable.h to get the API declarations. To get the definitions, you must include hashtable.h from *one* single +C or C++ file, and #define the symbol `HASHTABLE_IMPLEMENTATION` before you do. + +The key value must be unique per entry, and is hashed for efficient lookup using an internal hashing algorithm. This +library does not support custom key types, so typically pointers or handles are used as key values. + +The library is written with efficiency in mind. Data and keys are stored in separate structures, for better cache +coherency, and hash collisions are resolved with open addressing/linear probing using the next available slot, which is +also good for the cache. + + +Customization +------------- +There are a few different things in hashtable.h which are configurable by #defines. Most of the API use the `int` data +type, for integer values where the exact size is not important. However, for some functions, it specifically makes use +of 32 and 64 bit data types. These default to using `unsigned int` and `unsigned long long` by default, but can be +redefined by #defining HASHTABLE_U32 and HASHTABLE_U64 respectively, before including hashtable.h. This is useful if +you, for example, use the types from `` in the rest of your program, and you want hashtable.h to use +compatible types. In this case, you would include hashtable.h using the following code: + + #define HASHTABLE_U32 uint32_t + #define HASHTABLE_U64 uint64_t + #include "hashtable.h" + +Note that when customizing the data types, you need to use the same definition in every place where you include +hashtable.h, as they affect the declarations as well as the definitions. + +The rest of the customizations only affect the implementation, so will only need to be defined in the file where you +have the #define HASHTABLE_IMPLEMENTATION. + +Note that if all customizations are utilized, hashtable.h will include no external files whatsoever, which might be +useful if you need full control over what code is being built. + + +### size_t + +Internally, the hashtable.h implementation makes use of the standard `size_t` data type. This requires including the +c runtime library header ``. To allow full configurability, and avoid hashtable.h including stddef.h, you can +specify which type hashtable.h should use for its size_t, by #defining HASHTABLE_SIZE_T, like this: + + #define HASHTABLE_IMPLEMENTATION + #define HASHTABLE_SIZE_T uint64_t + #include "hashtable.h" + +If not specified, hashtable.h will by default include stddef.h and use the standard `size_t` type. + + +### Custom memory allocators + +To store the internal data structures, hashtable.h needs to do dynamic allocation by calling `malloc`. Programs might +want to keep track of allocations done, or use custom defined pools to allocate memory from. hashtable.h allows for +specifying custom memory allocation functions for `malloc` and `free`. This is done with the following code: + + #define HASHTABLE_IMPLEMENTATION + #define HASHTABLE_MALLOC( ctx, size ) ( my_custom_malloc( ctx, size ) ) + #define HASHTABLE_FREE( ctx, ptr ) ( my_custom_free( ctx, ptr ) ) + #include "hashtable.h" + +where `my_custom_malloc` and `my_custom_free` are your own memory allocation/deallocation functions. The `ctx` parameter +is an optional parameter of type `void*`. When `hashtable_init` is called, you can pass in a `memctx` parameter, which +can be a pointer to anything you like, and which will be passed through as the `ctx` parameter to every +`HASHTABLE_MALLOC`/`HASHTABLE_FREE` call. For example, if you are doing memory tracking, you can pass a pointer to your +tracking data as `memctx`, and in your custom allocation/deallocation function, you can cast the `ctx` param back to the +right type, and access the tracking data. + +If no custom allocator is defined, hashtable.h will default to `malloc` and `free` from the C runtime library. + + +### Custom assert + +hashtable.h makes use of asserts to report usage errors and failed allocation errors. By default, it makes use of the C +runtime library `assert` macro, which only executes in debug builds. However, it allows for substituting with your own +assert function or macro using the following code: + + #define HASHTABLE_IMPLEMENTATION + #define HASHTABLE_ASSERT( condition ) ( my_custom_assert( condition ) ) + #include "hashtable.h" + +Note that if you only want the asserts to trigger in debug builds, you must add a check for this in your custom assert. + + +### Custom C runtime functions + +The library makes use of two additional functions from the C runtime library, and for full flexibility, it allows you +to substitute them for your own. Here's an example: + + #define HASHTABLE_IMPLEMENTATION + #define HASHTABLE_MEMCPY( dst, src, cnt ) ( my_memcpy_func( dst, src, cnt ) ) + #define HASHTABLE_MEMSET( ptr, val, cnt ) ( my_memset_func( ptr, val, cnt ) ) + #include "hashtable.h" + +If no custom function is defined, hashtable.h will default to the C runtime library equivalent. + + +hashtable_init +-------------- + + void hashtable_init( hashtable_t* table, int item_size, int initial_capacity, void* memctx ) + +Initialize a hashtable instance. `item_size` specifies the size, in bytes, of the data type holding a single item stored +in the table. `initial_capacity` is the number of items to allocate storage for initially - capacity will automatically +grow as needed, by reallocating memory. + + +hashtable_term +-------------- + + void hashtable_term( hashtable_t* table ) + +Terminates a hashtable instance, releasing all memory used by it. No further calls to the hashtable API are valid until +the instance is reinitialized by another call to `hashtable_init`. + + +hashtable_insert +---------------- + + int hashtable_insert( hashtable_t* table, HASHTABLE_U64 key, void const* item ) + +Inserts a data item into the hashtable, associating it with the specified key. The item is copied into the hashtable, +rather than just storing the `item` pointer, so the `item` pointer can be safely released after the call to +`hashtable_insert`. The value of `key` must be unique - it is not valid to store two items with the same key value. An +assert is triggered if trying to add a key which already exists, which means that if the default assert is used, it will +only be checked in debug builds - in release builds, it is up to the calling code to ensure this doesn't happen, or the +hashtable will be left in an undefined state. + + +hashtable_remove +---------------- + + void hashtable_remove( hashtable_t* table, HASHTABLE_U64 key ) + +Removes the item associated with the specified key, and the instance of the key itself, from the hashtable. If the +specified key could not be found, an assert is triggered. + + +hashtable_clear +--------------- + + void hashtable_clear( hashtable_t* table ) + +Removes all the items stored in the hashtable, without deallocating any of the memory it has allocated. + + +hashtable_find +-------------- + + void* hashtable_find( hashtable_t const* table, HASHTABLE_U64 key ) + +Returns a pointer to the item associated with the specified key, or NULL it the key was not found. The lookup is +designed for efficiency, and for minimizing cache missed. + + +hashtable_count +--------------- + + int hashtable_count( hashtable_t const* table ) + +Returns the number of items currently held in the table. + + +hashtable_items +--------------- + + void* hashtable_items( hashtable_t const* table ) + +Returns a pointer to the items currently held in the table. All items are stored in a contiguous memory block, and you +can get to the next item be moving the pointer `item_size` bytes forward, where `item_size` is the same value as passed +to hash_table_init. The easiest way to acces items is to cast the return value to the correct type and just index it as +a normal array. It contains as many items as returned by `hashtable_count`. + + +hashtable_keys +-------------- + + HASHTABLE_U64 const* hashtable_keys( hashtable_t const* table ) + +Returns a pointer to the keys currently held in the table, in the same order as the items returned from +`hashtable_items`. Can be indexed as an array with as many elements as returned by `hashtable_count`. + + +hashtable_swap +-------------- + + void hashtable_swap( hashtable_t* table, int index_a, int index_b ) + +Swaps the specified item/key pairs, and updates the hash lookup for both. Can be used to re-order the contents, as +retrieved by calling `hashtable_items` and `hashtable_keys`, while keeping the hashing intact. + +*/ + +/* +---------------------- + IMPLEMENTATION +---------------------- +*/ + +#ifndef hashtable_t_h +#define hashtable_t_h + +#ifndef HASHTABLE_U32 + #define HASHTABLE_U32 unsigned int +#endif + +struct hashtable_internal_slot_t + { + HASHTABLE_U32 key_hash; + int item_index; + int base_count; + }; + +struct hashtable_t + { + void* memctx; + int count; + int item_size; + + struct hashtable_internal_slot_t* slots; + int slot_capacity; + + HASHTABLE_U64* items_key; + int* items_slot; + void* items_data; + int item_capacity; + + void* swap_temp; + }; + +#endif /* hashtable_t_h */ + + +#ifdef HASHTABLE_IMPLEMENTATION +#undef HASHTABLE_IMPLEMENTATION + +#ifndef HASHTABLE_SIZE_T + #undef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #undef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #include + #define HASHTABLE_SIZE_T size_t +#endif + +#ifndef HASHTABLE_ASSERT + #undef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #undef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #include + #define HASHTABLE_ASSERT( x ) assert( x ) +#endif + +#ifndef HASHTABLE_MEMSET + #undef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #undef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #include + #define HASHTABLE_MEMSET( ptr, val, cnt ) ( memset( ptr, val, cnt ) ) +#endif + +#ifndef HASHTABLE_MEMCPY + #undef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #undef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #include + #define HASHTABLE_MEMCPY( dst, src, cnt ) ( memcpy( dst, src, cnt ) ) +#endif + +#ifndef HASHTABLE_MALLOC + #undef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #undef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #include + #define HASHTABLE_MALLOC( ctx, size ) ( malloc( size ) ) + #define HASHTABLE_FREE( ctx, ptr ) ( free( ptr ) ) +#endif + + +static HASHTABLE_U32 hashtable_internal_pow2ceil( HASHTABLE_U32 v ) + { + --v; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + ++v; + v += ( v == 0 ); + return v; + } + + +void hashtable_init( hashtable_t* table, int item_size, int initial_capacity, void* memctx ) + { + initial_capacity = (int)hashtable_internal_pow2ceil( initial_capacity >=0 ? (HASHTABLE_U32) initial_capacity : 32U ); + table->memctx = memctx; + table->count = 0; + table->item_size = item_size; + table->slot_capacity = (int) hashtable_internal_pow2ceil( (HASHTABLE_U32) ( initial_capacity + initial_capacity / 2 ) ); + int slots_size = (int)( table->slot_capacity * sizeof( *table->slots ) ); + table->slots = (struct hashtable_internal_slot_t*) HASHTABLE_MALLOC( table->memctx, (HASHTABLE_SIZE_T) slots_size ); + HASHTABLE_ASSERT( table->slots ); + HASHTABLE_MEMSET( table->slots, 0, (HASHTABLE_SIZE_T) slots_size ); + table->item_capacity = (int) hashtable_internal_pow2ceil( (HASHTABLE_U32) initial_capacity ); + table->items_key = (HASHTABLE_U64*) HASHTABLE_MALLOC( table->memctx, + table->item_capacity * ( sizeof( *table->items_key ) + sizeof( *table->items_slot ) + table->item_size ) + table->item_size ); + HASHTABLE_ASSERT( table->items_key ); + table->items_slot = (int*)( table->items_key + table->item_capacity ); + table->items_data = (void*)( table->items_slot + table->item_capacity ); + table->swap_temp = (void*)( ( (uintptr_t) table->items_data ) + table->item_size * table->item_capacity ); + } + + +void hashtable_term( hashtable_t* table ) + { + HASHTABLE_FREE( table->memctx, table->items_key ); + HASHTABLE_FREE( table->memctx, table->slots ); + } + + +// from https://gist.github.com/badboy/6267743 +static HASHTABLE_U32 hashtable_internal_calculate_hash( HASHTABLE_U64 key ) + { + key = ( ~key ) + ( key << 18 ); + key = key ^ ( key >> 31 ); + key = key * 21; + key = key ^ ( key >> 11 ); + key = key + ( key << 6 ); + key = key ^ ( key >> 22 ); + HASHTABLE_ASSERT( key ); + return (HASHTABLE_U32) key; + } + + +static int hashtable_internal_find_slot( hashtable_t const* table, HASHTABLE_U64 key ) + { + int const slot_mask = table->slot_capacity - 1; + HASHTABLE_U32 const hash = hashtable_internal_calculate_hash( key ); + + int const base_slot = (int)( hash & (HASHTABLE_U32)slot_mask ); + int base_count = table->slots[ base_slot ].base_count; + int slot = base_slot; + + while( base_count > 0 ) + { + HASHTABLE_U32 slot_hash = table->slots[ slot ].key_hash; + if( slot_hash ) + { + int slot_base = (int)( slot_hash & (HASHTABLE_U32)slot_mask ); + if( slot_base == base_slot ) + { + HASHTABLE_ASSERT( base_count > 0 ); + --base_count; + if( slot_hash == hash && table->items_key[ table->slots[ slot ].item_index ] == key ) + return slot; + } + } + slot = ( slot + 1 ) & slot_mask; + } + + return -1; + } + + +static void hashtable_internal_expand_slots( hashtable_t* table ) + { + int const old_capacity = table->slot_capacity; + struct hashtable_internal_slot_t* old_slots = table->slots; + + table->slot_capacity *= 2; + int const slot_mask = table->slot_capacity - 1; + + int const size = (int)( table->slot_capacity * sizeof( *table->slots ) ); + table->slots = (struct hashtable_internal_slot_t*) HASHTABLE_MALLOC( table->memctx, (HASHTABLE_SIZE_T) size ); + HASHTABLE_ASSERT( table->slots ); + HASHTABLE_MEMSET( table->slots, 0, (HASHTABLE_SIZE_T) size ); + + for( int i = 0; i < old_capacity; ++i ) + { + HASHTABLE_U32 const hash = old_slots[ i ].key_hash; + if( hash ) + { + int const base_slot = (int)( hash & (HASHTABLE_U32)slot_mask ); + int slot = base_slot; + while( table->slots[ slot ].key_hash ) + slot = ( slot + 1 ) & slot_mask; + table->slots[ slot ].key_hash = hash; + int item_index = old_slots[ i ].item_index; + table->slots[ slot ].item_index = item_index; + table->items_slot[ item_index ] = slot; + ++table->slots[ base_slot ].base_count; + } + } + + HASHTABLE_FREE( table->memctx, old_slots ); + } + + +static int hashtable_internal_expand_items( hashtable_t* table ) + { + table->item_capacity *= 2; + HASHTABLE_U64* const new_items_key = (HASHTABLE_U64*) HASHTABLE_MALLOC( table->memctx, + table->item_capacity * ( sizeof( *table->items_key ) + sizeof( *table->items_slot ) + table->item_size ) + table->item_size); + if( new_items_key == NULL ) + return 0; + + int* const new_items_slot = (int*)( new_items_key + table->item_capacity ); + void* const new_items_data = (void*)( new_items_slot + table->item_capacity ); + void* const new_swap_temp = (void*)( ( (uintptr_t) new_items_data ) + table->item_size * table->item_capacity ); + + HASHTABLE_MEMCPY( new_items_key, table->items_key, table->count * sizeof( *table->items_key ) ); + HASHTABLE_MEMCPY( new_items_slot, table->items_slot, table->count * sizeof( *table->items_key ) ); + HASHTABLE_MEMCPY( new_items_data, table->items_data, (HASHTABLE_SIZE_T) table->count * table->item_size ); + + HASHTABLE_FREE( table->memctx, table->items_key ); + + table->items_key = new_items_key; + table->items_slot = new_items_slot; + table->items_data = new_items_data; + table->swap_temp = new_swap_temp; + + return 1; + } + + +int hashtable_insert( hashtable_t* table, HASHTABLE_U64 key, void const* item ) + { + HASHTABLE_ASSERT( hashtable_internal_find_slot( table, key ) < 0 ); + + if( table->count >= ( table->slot_capacity - table->slot_capacity / 3 ) ) + hashtable_internal_expand_slots( table ); + + int const slot_mask = table->slot_capacity - 1; + HASHTABLE_U32 const hash = hashtable_internal_calculate_hash( key ); + + int const base_slot = (int)( hash & (HASHTABLE_U32)slot_mask ); + int base_count = table->slots[ base_slot ].base_count; + int slot = base_slot; + int first_free = slot; + while( base_count ) + { + HASHTABLE_U32 const slot_hash = table->slots[ slot ].key_hash; + if( slot_hash == 0 && table->slots[ first_free ].key_hash != 0 ) first_free = slot; + int slot_base = (int)( slot_hash & (HASHTABLE_U32)slot_mask ); + if( slot_base == base_slot ) + --base_count; + slot = ( slot + 1 ) & slot_mask; + } + + slot = first_free; + while( table->slots[ slot ].key_hash ) + slot = ( slot + 1 ) & slot_mask; + + if( table->count >= table->item_capacity ) + if( !hashtable_internal_expand_items( table ) ) + return 0; + + HASHTABLE_ASSERT( !table->slots[ slot ].key_hash && ( hash & (HASHTABLE_U32) slot_mask ) == (HASHTABLE_U32) base_slot ); + HASHTABLE_ASSERT( hash ); + table->slots[ slot ].key_hash = hash; + table->slots[ slot ].item_index = table->count; + ++table->slots[ base_slot ].base_count; + + + void* dest_item = (void*)( ( (uintptr_t) table->items_data ) + table->count * table->item_size ); + memcpy( dest_item, item, (HASHTABLE_SIZE_T) table->item_size ); + table->items_key[ table->count ] = key; + table->items_slot[ table->count ] = slot; + ++table->count; + + return 1; + } + + +void hashtable_remove( hashtable_t* table, HASHTABLE_U64 key ) + { + int const slot = hashtable_internal_find_slot( table, key ); + HASHTABLE_ASSERT( slot >= 0 ); + + int const slot_mask = table->slot_capacity - 1; + HASHTABLE_U32 const hash = table->slots[ slot ].key_hash; + int const base_slot = (int)( hash & (HASHTABLE_U32) slot_mask ); + HASHTABLE_ASSERT( hash ); + --table->slots[ base_slot ].base_count; + table->slots[ slot ].key_hash = 0; + + int index = table->slots[ slot ].item_index; + int last_index = table->count - 1; + if( index != last_index ) + { + table->items_key[ index ] = table->items_key[ last_index ]; + table->items_slot[ index ] = table->items_slot[ last_index ]; + void* dst_item = (void*)( ( (uintptr_t) table->items_data ) + index * table->item_size ); + void* src_item = (void*)( ( (uintptr_t) table->items_data ) + last_index * table->item_size ); + HASHTABLE_MEMCPY( dst_item, src_item, (HASHTABLE_SIZE_T) table->item_size ); + table->slots[ table->items_slot[ last_index ] ].item_index = index; + } + --table->count; + } + + +void hashtable_clear( hashtable_t* table ) + { + table->count = 0; + HASHTABLE_MEMSET( table->slots, 0, table->slot_capacity * sizeof( *table->slots ) ); + } + + +void* hashtable_find( hashtable_t const* table, HASHTABLE_U64 key ) + { + int const slot = hashtable_internal_find_slot( table, key ); + if( slot < 0 ) return 0; + + int const index = table->slots[ slot ].item_index; + void* const item = (void*)( ( (uintptr_t) table->items_data ) + index * table->item_size ); + return item; + } + + +int hashtable_count( hashtable_t const* table ) + { + return table->count; + } + + +void* hashtable_items( hashtable_t const* table ) + { + return table->items_data; + } + + +HASHTABLE_U64 const* hashtable_keys( hashtable_t const* table ) + { + return table->items_key; + } + + +void hashtable_swap( hashtable_t* table, int index_a, int index_b ) + { + if( index_a < 0 || index_a >= table->count || index_b < 0 || index_b >= table->count ) return; + + int slot_a = table->items_slot[ index_a ]; + int slot_b = table->items_slot[ index_b ]; + + table->items_slot[ index_a ] = slot_b; + table->items_slot[ index_b ] = slot_a; + + HASHTABLE_U64 temp_key = table->items_key[ index_a ]; + table->items_key[ index_a ] = table->items_key[ index_b ]; + table->items_key[ index_b ] = temp_key; + + void* item_a = (void*)( ( (uintptr_t) table->items_data ) + index_a * table->item_size ); + void* item_b = (void*)( ( (uintptr_t) table->items_data ) + index_b * table->item_size ); + HASHTABLE_MEMCPY( table->swap_temp, item_a, table->item_size ); + HASHTABLE_MEMCPY( item_a, item_b, table->item_size ); + HASHTABLE_MEMCPY( item_b, table->swap_temp, table->item_size ); + + table->slots[ slot_a ].item_index = index_b; + table->slots[ slot_b ].item_index = index_a; + } + + +#endif /* HASHTABLE_IMPLEMENTATION */ + +/* + +contributors: + Randy Gaul (hashtable_clear, hashtable_swap ) + +revision history: + 1.1 added hashtable_clear, hashtable_swap + 1.0 first released version + +*/ + +/* +------------------------------------------------------------------------------ + +This software is available under 2 licenses - you may choose the one you like. + +------------------------------------------------------------------------------ + +ALTERNATIVE A - MIT License + +Copyright (c) 2015 Mattias Gustavsson + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------------------------------------------ + +ALTERNATIVE B - Public Domain (www.unlicense.org) + +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------ +*/ diff --git a/source/compiler/hashtable/wrap_hashtable.c b/source/compiler/hashtable/wrap_hashtable.c new file mode 100644 index 0000000..4a6722e --- /dev/null +++ b/source/compiler/hashtable/wrap_hashtable.c @@ -0,0 +1,27 @@ +/* Simple wrapper for a single-file hashtable implementation + * from Mattias Gustavsson. + * + * Copyright (c) Stanislav Gromov, 2018 + * + * This software is provided "as-is", without any express or implied warranty. + * In no event will the authors be held liable for any damages arising from + * the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software in + * a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + + +#include "wrap_hashtable.h" + +#define HASHTABLE_IMPLEMENTATION +#include "hashtable.h" diff --git a/source/compiler/hashtable/wrap_hashtable.h b/source/compiler/hashtable/wrap_hashtable.h new file mode 100644 index 0000000..699b09f --- /dev/null +++ b/source/compiler/hashtable/wrap_hashtable.h @@ -0,0 +1,27 @@ +/* Simple wrapper for a single-file hashtable implementation + * from Mattias Gustavsson. + * + * Copyright (c) Stanislav Gromov, 2018 + * + * This software is provided "as-is", without any express or implied warranty. + * In no event will the authors be held liable for any damages arising from + * the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software in + * a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + + +#include +#define HASHTABLE_SIZE_T size_t +#define HASHTABLE_U64 size_t +#include "hashtable.h" diff --git a/source/compiler/sc.h b/source/compiler/sc.h index 89e08ed..f60a1ad 100644 --- a/source/compiler/sc.h +++ b/source/compiler/sc.h @@ -41,7 +41,7 @@ #else #include #endif -#include "hashmap/hashmap.h" +#include "hashtable/wrap_hashtable.h" #include "../amx/osdefs.h" #include "../amx/amx.h" @@ -127,6 +127,7 @@ typedef struct s_symbol { struct s_symbol *next; struct s_symbol *parent; /* hierarchical types (multi-dimensional arrays) */ struct s_symbol *child; + struct s_symbol *htnext; char name[sNAMEMAX+1]; cell addr; /* address or offset (or value for constant, index for native function) */ cell codeaddr; /* address (in the code segment) where the symbol declaration starts */ @@ -161,12 +162,6 @@ typedef struct s_symbol { char *documentation; /* optional documentation string */ } symbol; -/* new symbol struct for cached global symbols with the same names*/ -typedef struct s_symbol2 { - struct s_symbol *symbol; - struct s_symbol2 *next; -} symbol2; - /* Possible entries for "ident". These are used in the "symbol", "value" * and arginfo structures. Not every constant is valid for every use. @@ -798,7 +793,7 @@ SC_FUNC int state_conflict_id(int listid1,int listid2); #if !defined SC_SKIP_VDECL SC_VDECL symbol loctab; /* local symbol table */ SC_VDECL symbol glbtab; /* global symbol table */ -SC_VDECL struct hashmap symbol_cache_map; +SC_VDECL struct hashtable_t symbol_cache_ht; SC_VDECL symbol *line_sym; SC_VDECL cell *litq; /* the literal queue */ SC_VDECL unsigned char pline[]; /* the line read from the input file */ diff --git a/source/compiler/sc1.c b/source/compiler/sc1.c index fe94880..631345f 100644 --- a/source/compiler/sc1.c +++ b/source/compiler/sc1.c @@ -769,7 +769,7 @@ cleanup: * done (i.e. on a fatal error) */ delete_symbols(&glbtab,0,TRUE,TRUE); line_sym=NULL; - hashmap_destroy(&symbol_cache_map); + hashtable_term(&symbol_cache_ht); delete_consttable(&tagname_tab); delete_consttable(&libname_tab); delete_consttable(&sc_automaton_tab); @@ -937,7 +937,7 @@ static void initglobals(void) litq=NULL; /* the literal queue */ glbtab.next=NULL; /* clear global variables/constants table */ loctab.next=NULL; /* " local " / " " */ - hashmap_init(&symbol_cache_map,hashmap_hash_string,hashmap_compare_string,10000); + hashtable_init(&symbol_cache_ht, sizeof(symbol *),(16384/3*2),NULL); /* 16384 slots */ tagname_tab.next=NULL; /* tagname table */ libname_tab.next=NULL; /* library table (#pragma library "..." syntax) */ diff --git a/source/compiler/sc2.c b/source/compiler/sc2.c index dba0a3b..d820654 100644 --- a/source/compiler/sc2.c +++ b/source/compiler/sc2.c @@ -2615,58 +2615,171 @@ SC_FUNC int ishex(char c) return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F'); } -static void symbol_cache_add(symbol *sym,symbol2 *new_cache_sym) +static uint32_t murmurhash2_aligned(const void *key,int len,uint32_t seed) { - symbol2 *cache_sym; + /* Based on public domain code by Austin Appleby. + * https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp + */ + #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - if (new_cache_sym==NULL) { - new_cache_sym=(symbol2 *)malloc(sizeof(symbol2)); - if (new_cache_sym==NULL) - error(103); /* insufficient memory */ - new_cache_sym->symbol=sym; - new_cache_sym->next=NULL; - } + const uint32_t m=0x5bd1e995; + const int r=24; + const unsigned char *data=(const unsigned char *)key; + uint32_t h=seed ^ len; + int align=(uint64_t)data & 3; + int sl,sr; - cache_sym=hashmap_get(&symbol_cache_map,sym->name); - if (cache_sym==NULL) { - if (hashmap_put(&symbol_cache_map,sym->name,new_cache_sym)==NULL) - error(103); /* insufficient memory */ + if (align && (len>=4)) { + // Pre-load the temp registers + uint32_t t=0,d=0; + + switch (align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } /* switch */ + + t <<= (8*align); + + data += 4-align; + len -= 4-align; + + sl=8*(4-align); + sr=8*align; + + // Mix + while (len>=4) { + uint32_t k; + + d=*(uint32_t *)data; + t=(t >> sr) | (d << sl); + + k=t; + + MIX(h,k,m); + + t=d; + + data += 4; + len -= 4; + } /* while */ + + // Handle leftover data in temp registers + d=0; + if (len>=align) { + uint32_t k; + + switch (align) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } /* switch */ + + k=(t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + switch (len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + } /* switch */ + } else { + switch (len) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); + h *= m; + } /* switch */ + } /* if */ + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; } else { - while(cache_sym->next!=NULL) - cache_sym=cache_sym->next; - cache_sym->next=new_cache_sym; - } + while (len>=4) { + uint32_t k=*(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } /* while */ + + //---------- + // Handle tail bytes + switch (len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + } /* switch */ + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } /* if */ + + #undef MIX } -static symbol2 *symbol_cache_remove(symbol *sym,int free_cache_sym) -{ - symbol2 *cache_sym; - symbol2 *parent_cache_sym=NULL; +#define namehash(name) \ + (HASHTABLE_U64)murmurhash2_aligned(name,strlen(name),0) - cache_sym=hashmap_get(&symbol_cache_map,sym->name); +static void symbol_cache_add(symbol *sym) +{ + const HASHTABLE_U64 key=namehash(sym->name); + symbol **pcache_sym=(symbol **)hashtable_find(&symbol_cache_ht,key); + symbol *cache_sym; + + if (pcache_sym==NULL) { + if (hashtable_insert(&symbol_cache_ht,key,&sym)==0) + error(103); /* insufficient memory */ + return; + } /* if */ + cache_sym=*pcache_sym; + while (cache_sym->htnext!=NULL) + cache_sym=cache_sym->htnext; + cache_sym->htnext=sym; +} + +static void symbol_cache_remove(symbol *sym) +{ + const HASHTABLE_U64 key=namehash(sym->name); + symbol **pcache_sym; + symbol *cache_sym=NULL; + symbol *parent_cache_sym=NULL; + + pcache_sym=(symbol **)hashtable_find(&symbol_cache_ht,key); + if (pcache_sym!=NULL) + cache_sym=*pcache_sym; for ( ;; ) { if (cache_sym==NULL) - return NULL; - if (cache_sym->symbol==sym) + return; + if (cache_sym==sym) break; parent_cache_sym=cache_sym; - cache_sym=cache_sym->next; - } + cache_sym=cache_sym->htnext; + } /* for */ - if (parent_cache_sym!=NULL) { - parent_cache_sym->next=cache_sym->next; + if (parent_cache_sym==NULL) { + if (cache_sym->htnext==NULL) + hashtable_remove(&symbol_cache_ht,key); + else + *pcache_sym=cache_sym->htnext; } else { - hashmap_remove(&symbol_cache_map,sym->name); - if (cache_sym->next!=NULL) - if (hashmap_put(&symbol_cache_map,sym->name,cache_sym->next)==NULL) - error(103); /* insufficient memory */ - } - if (free_cache_sym) { - free(cache_sym); - return NULL; - } - cache_sym->next=NULL; - return cache_sym; + parent_cache_sym->htnext=cache_sym->htnext; + } /* if */ } /* The local variable table must be searched backwards, so that the deepest @@ -2690,8 +2803,9 @@ static symbol *add_symbol(symbol *root,symbol *entry,int sort) memcpy(newsym,entry,sizeof(symbol)); newsym->next=root->next; root->next=newsym; + newsym->htnext=NULL; if (newsym->vclass==sGLOBAL) - symbol_cache_add(newsym,NULL); + symbol_cache_add(newsym); return newsym; } @@ -2737,7 +2851,7 @@ static void free_symbol(symbol *sym) if (sym->documentation!=NULL) free(sym->documentation); if (sym->vclass==sGLOBAL) - symbol_cache_remove(sym,1); + symbol_cache_remove(sym); free(sym); } @@ -2842,31 +2956,26 @@ SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_ SC_FUNC void rename_symbol(symbol *sym,const char *newname) { - int is_global=(sym->vclass==sGLOBAL); - symbol2 *cache_sym; + const int isglobal=(sym->vclass==sGLOBAL); - if (is_global) - cache_sym=symbol_cache_remove(sym,0); + if (isglobal) + symbol_cache_remove(sym); strcpy(sym->name,newname); - if (is_global && cache_sym!=NULL) - symbol_cache_add(sym,cache_sym); + if (isglobal) + symbol_cache_add(sym); } static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int automaton,int *cmptag) { symbol *firstmatch=NULL; symbol *sym=root->next; - symbol2 *cache_sym=NULL; int count=0; - int is_global=(root==&glbtab); + const int is_global=(root==&glbtab); if (is_global) { - cache_sym=hashmap_get(&symbol_cache_map,name); - if (cache_sym) - sym=cache_sym->symbol; - else - sym=NULL; - } + symbol **pcache_sym=(symbol **)hashtable_find(&symbol_cache_ht,namehash(name)); + sym=(pcache_sym!=NULL) ? *pcache_sym : NULL; + } /* if */ while (sym!=NULL) { if ( (is_global || strcmp(name,sym->name)==0) /* check name */ @@ -2891,16 +3000,8 @@ static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int a break; } /* if */ } /* if */ - } /* */ - if (is_global) { - cache_sym=cache_sym->next; - if (cache_sym) - sym=cache_sym->symbol; - else - sym=NULL; - } else { - sym=sym->next; - } + } /* if */ + sym=(is_global) ? sym->htnext : sym->next; } /* while */ if (cmptag!=NULL && firstmatch!=NULL) { if (*cmptag==0) diff --git a/source/compiler/scvars.c b/source/compiler/scvars.c index 5055a15..43de38a 100644 --- a/source/compiler/scvars.c +++ b/source/compiler/scvars.c @@ -33,7 +33,7 @@ */ SC_VDEFINE symbol loctab; /* local symbol table */ SC_VDEFINE symbol glbtab; /* global symbol table */ -SC_VDEFINE struct hashmap symbol_cache_map; +SC_VDEFINE struct hashtable_t symbol_cache_ht; SC_VDEFINE cell *litq; /* the literal queue */ SC_VDEFINE unsigned char pline[sLINEMAX+1]; /* the line read from the input file */ SC_VDEFINE const unsigned char *lptr; /* points to the current position in "pline" */