diff --git a/appveyor.yml b/appveyor.yml index 4b13ef6..f5753ee 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,10 +1,11 @@ version: '{build}' +image: Visual Studio 2015 configuration: - RelWithDebInfo before_build: - - cmake -G "Visual Studio 10 2010" source/compiler -DCPACK_GENERATOR=ZIP + - cmake -G "Visual Studio 14 2015" source/compiler -DCPACK_GENERATOR=ZIP build_script: - cmake --build . --config %CONFIGURATION% diff --git a/source/compiler/CMakeLists.txt b/source/compiler/CMakeLists.txt index e5bbeb5..63a0e39 100644 --- a/source/compiler/CMakeLists.txt +++ b/source/compiler/CMakeLists.txt @@ -60,6 +60,8 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) # The Pawn compiler shared library set(PAWNC_SRCS + hashmap/hashmap.c + hashmap/hashmap.h libpawnc.c lstring.c lstring.h diff --git a/source/compiler/hashmap/LICENSE b/source/compiler/hashmap/LICENSE new file mode 100644 index 0000000..2d576cc --- /dev/null +++ b/source/compiler/hashmap/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 David Leeds + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/source/compiler/hashmap/hashmap.c b/source/compiler/hashmap/hashmap.c new file mode 100644 index 0000000..d169457 --- /dev/null +++ b/source/compiler/hashmap/hashmap.c @@ -0,0 +1,689 @@ +/* + * Copyright (c) 2016-2017 David Leeds + * + * Hashmap is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include +#include +#include +#include + +#include "hashmap.h" + +#ifndef HASHMAP_NOASSERT +#include +#define HASHMAP_ASSERT(expr) assert(expr) +#else +#define HASHMAP_ASSERT(expr) +#endif + +/* Table sizes must be powers of 2 */ +#define HASHMAP_SIZE_MIN (1 << 5) /* 32 */ +#define HASHMAP_SIZE_DEFAULT (1 << 8) /* 256 */ +#define HASHMAP_SIZE_MOD(map, val) ((val) & ((map)->table_size - 1)) + +/* Limit for probing is 1/2 of table_size */ +#define HASHMAP_PROBE_LEN(map) ((map)->table_size >> 1) +/* Return the next linear probe index */ +#define HASHMAP_PROBE_NEXT(map, index) HASHMAP_SIZE_MOD(map, (index) + 1) + +/* Check if index b is less than or equal to index a */ +#define HASHMAP_INDEX_LE(map, a, b) \ + ((a) == (b) || (((b) - (a)) & ((map)->table_size >> 1)) != 0) + + +struct hashmap_entry { + void *key; + void *data; +#ifdef HASHMAP_METRICS + size_t num_collisions; +#endif +}; + + +/* + * Enforce a maximum 0.75 load factor. + */ +static inline size_t hashmap_table_min_size_calc(size_t num_entries) +{ + return num_entries + (num_entries / 3); +} + +/* + * Calculate the optimal table size, given the specified max number + * of elements. + */ +static size_t hashmap_table_size_calc(size_t num_entries) +{ + size_t table_size; + size_t min_size; + + table_size = hashmap_table_min_size_calc(num_entries); + + /* Table size is always a power of 2 */ + min_size = HASHMAP_SIZE_MIN; + while (min_size < table_size) { + min_size <<= 1; + } + return min_size; +} + +/* + * Get a valid hash table index from a key. + */ +static inline size_t hashmap_calc_index(const struct hashmap *map, + const void *key) +{ + return HASHMAP_SIZE_MOD(map, map->hash(key)); +} + +/* + * Return the next populated entry, starting with the specified one. + * Returns NULL if there are no more valid entries. + */ +static struct hashmap_entry *hashmap_entry_get_populated( + const struct hashmap *map, struct hashmap_entry *entry) +{ + for (; entry < &map->table[map->table_size]; ++entry) { + if (entry->key) { + return entry; + } + } + return NULL; +} + +/* + * Find the hashmap entry with the specified key, or an empty slot. + * Returns NULL if the entire table has been searched without finding a match. + */ +static struct hashmap_entry *hashmap_entry_find(const struct hashmap *map, + const void *key, bool find_empty) +{ + size_t i; + size_t index; + size_t probe_len = HASHMAP_PROBE_LEN(map); + struct hashmap_entry *entry; + + index = hashmap_calc_index(map, key); + + /* Linear probing */ + for (i = 0; i < probe_len; ++i) { + entry = &map->table[index]; + if (!entry->key) { + if (find_empty) { +#ifdef HASHMAP_METRICS + entry->num_collisions = i; +#endif + return entry; + } + return NULL; + } + if (map->key_compare(key, entry->key) == 0) { + return entry; + } + index = HASHMAP_PROBE_NEXT(map, index); + } + return NULL; +} + +/* + * Removes the specified entry and processes the proceeding entries to reduce + * the load factor and keep the chain continuous. This is a required + * step for hash maps using linear probing. + */ +static void hashmap_entry_remove(struct hashmap *map, + struct hashmap_entry *removed_entry) +{ + size_t i; +#ifdef HASHMAP_METRICS + size_t removed_i = 0; +#endif + size_t index; + size_t entry_index; + size_t removed_index = (removed_entry - map->table); + struct hashmap_entry *entry; + + /* Free the key */ + if (map->key_free) { + map->key_free(removed_entry->key); + } + --map->num_entries; + + /* Fill the free slot in the chain */ + index = HASHMAP_PROBE_NEXT(map, removed_index); + for (i = 1; i < map->table_size; ++i) { + entry = &map->table[index]; + if (!entry->key) { + /* Reached end of chain */ + break; + } + entry_index = hashmap_calc_index(map, entry->key); + /* Shift in entries with an index <= to the removed slot */ + if (HASHMAP_INDEX_LE(map, removed_index, entry_index)) { +#ifdef HASHMAP_METRICS + entry->num_collisions -= (i - removed_i); + removed_i = i; +#endif + memcpy(removed_entry, entry, sizeof(*removed_entry)); + removed_index = index; + removed_entry = entry; + } + index = HASHMAP_PROBE_NEXT(map, index); + } + /* Clear the last removed entry */ + memset(removed_entry, 0, sizeof(*removed_entry)); +} + +/* + * Reallocates the hash table to the new size and rehashes all entries. + * new_size MUST be a power of 2. + * Returns 0 on success and -1 on allocation or hash function failure. + */ +static int hashmap_rehash(struct hashmap *map, size_t new_size) +{ + size_t old_size; + struct hashmap_entry *old_table; + struct hashmap_entry *new_table; + struct hashmap_entry *entry; + struct hashmap_entry *new_entry; + + HASHMAP_ASSERT(new_size >= HASHMAP_SIZE_MIN); + HASHMAP_ASSERT((new_size & (new_size - 1)) == 0); + + new_table = (struct hashmap_entry *)calloc(new_size, + sizeof(struct hashmap_entry)); + if (!new_table) { + return -1; + } + /* Backup old elements in case of rehash failure */ + old_size = map->table_size; + old_table = map->table; + map->table_size = new_size; + map->table = new_table; + /* Rehash */ + for (entry = old_table; entry < &old_table[old_size]; ++entry) { + if (!entry->data) { + /* Only copy entries with data */ + continue; + } + new_entry = hashmap_entry_find(map, entry->key, true); + if (!new_entry) { + /* + * The load factor is still too high with the new table + * size, or a poor hash function was used. + */ + goto revert; + } + /* Shallow copy (intentionally omits num_collisions) */ + new_entry->key = entry->key; + new_entry->data = entry->data; + } + free(old_table); + return 0; +revert: + map->table_size = old_size; + map->table = old_table; + free(new_table); + return -1; +} + +/* + * Iterate through all entries and free all keys. + */ +static void hashmap_free_keys(struct hashmap *map) +{ + struct hashmap_iter *iter; + + if (!map->key_free) { + return; + } + for (iter = hashmap_iter(map); iter; + iter = hashmap_iter_next(map, iter)) { + map->key_free((void *)hashmap_iter_get_key(iter)); + } +} + +/* + * Initialize an empty hashmap. A hash function and a key comparator are + * required. + * + * hash_func should return an even distribution of numbers between 0 + * and SIZE_MAX varying on the key provided. + * + * key_compare_func should return 0 if the keys match, and non-zero otherwise. + * + * initial_size is optional, and may be set to the max number of entries + * expected to be put in the hash table. This is used as a hint to + * pre-allocate the hash table to the minimum size needed to avoid + * gratuitous rehashes. If initial_size 0, a default size will be used. + */ +int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), + int (*key_compare_func)(const void *, const void *), + size_t initial_size) +{ + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(hash_func != NULL); + HASHMAP_ASSERT(key_compare_func != NULL); + + if (!initial_size) { + initial_size = HASHMAP_SIZE_DEFAULT; + } else { + /* Convert init size to valid table size */ + initial_size = hashmap_table_size_calc(initial_size); + } + map->table_size_init = initial_size; + map->table_size = initial_size; + map->num_entries = 0; + map->table = (struct hashmap_entry *)calloc(initial_size, + sizeof(struct hashmap_entry)); + if (!map->table) { + return -1; + } + map->hash = hash_func; + map->key_compare = key_compare_func; + map->key_alloc = NULL; + map->key_free = NULL; + return 0; +} + +/* + * Free the hashmap and all associated memory. + */ +void hashmap_destroy(struct hashmap *map) +{ + if (!map) { + return; + } + hashmap_free_keys(map); + free(map->table); + memset(map, 0, sizeof(*map)); +} + +/* + * Enable internal memory management of hash keys. + */ +void hashmap_set_key_alloc_funcs(struct hashmap *map, + void *(*key_alloc_func)(const void *), + void (*key_free_func)(void *)) +{ + HASHMAP_ASSERT(map != NULL); + + map->key_alloc = key_alloc_func; + map->key_free = key_free_func; +} + +/* + * Add an entry to the hashmap. If an entry with a matching key already + * exists and has a data pointer associated with it, the existing data + * pointer is returned, instead of assigning the new value. Compare + * the return value with the data passed in to determine if a new entry was + * created. Returns NULL if memory allocation failed. + */ +void *hashmap_put(struct hashmap *map, const void *key, void *data) +{ + struct hashmap_entry *entry; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + /* Rehash with 2x capacity if load factor is approaching 0.75 */ + if (map->table_size <= hashmap_table_min_size_calc(map->num_entries)) { + hashmap_rehash(map, map->table_size << 1); + } + entry = hashmap_entry_find(map, key, true); + if (!entry) { + /* + * Cannot find an empty slot. Either out of memory, or using + * a poor hash function. Attempt to rehash once to reduce + * chain length. + */ + if (hashmap_rehash(map, map->table_size << 1) < 0) { + return NULL; + } + entry = hashmap_entry_find(map, key, true); + if (!entry) { + return NULL; + } + } + if (!entry->key) { + /* Allocate copy of key to simplify memory management */ + if (map->key_alloc) { + entry->key = map->key_alloc(key); + if (!entry->key) { + return NULL; + } + } else { + entry->key = (void *)key; + } + ++map->num_entries; + } else if (entry->data) { + /* Do not overwrite existing data */ + return entry->data; + } + entry->data = data; + return data; +} + +/* + * Return the data pointer, or NULL if no entry exists. + */ +void *hashmap_get(const struct hashmap *map, const void *key) +{ + struct hashmap_entry *entry; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + entry = hashmap_entry_find(map, key, false); + if (!entry) { + return NULL; + } + return entry->data; +} + +/* + * Remove an entry with the specified key from the map. + * Returns the data pointer, or NULL, if no entry was found. + */ +void *hashmap_remove(struct hashmap *map, const void *key) +{ + struct hashmap_entry *entry; + void *data; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + entry = hashmap_entry_find(map, key, false); + if (!entry) { + return NULL; + } + data = entry->data; + /* Clear the entry and make the chain contiguous */ + hashmap_entry_remove(map, entry); + return data; +} + +/* + * Remove all entries. + */ +void hashmap_clear(struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + hashmap_free_keys(map); + map->num_entries = 0; + memset(map->table, 0, sizeof(struct hashmap_entry) * map->table_size); +} + +/* + * Remove all entries and reset the hash table to its initial size. + */ +void hashmap_reset(struct hashmap *map) +{ + struct hashmap_entry *new_table; + + HASHMAP_ASSERT(map != NULL); + + hashmap_clear(map); + if (map->table_size == map->table_size_init) { + return; + } + new_table = (struct hashmap_entry *)realloc(map->table, + sizeof(struct hashmap_entry) * map->table_size_init); + if (!new_table) { + return; + } + map->table = new_table; + map->table_size = map->table_size_init; +} + +/* + * Return the number of entries in the hash map. + */ +size_t hashmap_size(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + return map->num_entries; +} + +/* + * Get a new hashmap iterator. The iterator is an opaque + * pointer that may be used with hashmap_iter_*() functions. + * Hashmap iterators are INVALID after a put or remove operation is performed. + * hashmap_iter_remove() allows safe removal during iteration. + */ +struct hashmap_iter *hashmap_iter(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return NULL; + } + return (struct hashmap_iter *)hashmap_entry_get_populated(map, + map->table); +} + +/* + * Return an iterator to the next hashmap entry. Returns NULL if there are + * no more entries. + */ +struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, + const struct hashmap_iter *iter) +{ + struct hashmap_entry *entry = (struct hashmap_entry *)iter; + + HASHMAP_ASSERT(map != NULL); + + if (!iter) { + return NULL; + } + return (struct hashmap_iter *)hashmap_entry_get_populated(map, + entry + 1); +} + +/* + * Remove the hashmap entry pointed to by this iterator and return an + * iterator to the next entry. Returns NULL if there are no more entries. + */ +struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, + const struct hashmap_iter *iter) +{ + struct hashmap_entry *entry = (struct hashmap_entry *)iter; + + HASHMAP_ASSERT(map != NULL); + + if (!iter) { + return NULL; + } + if (!entry->key) { + /* Iterator is invalid, so just return the next valid entry */ + return hashmap_iter_next(map, iter); + } + hashmap_entry_remove(map, entry); + return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry); +} + +/* + * Return the key of the entry pointed to by the iterator. + */ +const void *hashmap_iter_get_key(const struct hashmap_iter *iter) +{ + if (!iter) { + return NULL; + } + return (const void *)((struct hashmap_entry *)iter)->key; +} + +/* + * Return the data of the entry pointed to by the iterator. + */ +void *hashmap_iter_get_data(const struct hashmap_iter *iter) +{ + if (!iter) { + return NULL; + } + return ((struct hashmap_entry *)iter)->data; +} + +/* + * Set the data pointer of the entry pointed to by the iterator. + */ +void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data) +{ + if (!iter) { + return; + } + ((struct hashmap_entry *)iter)->data = data; +} + +/* + * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() + * interface, this function supports calls to hashmap_remove() during iteration. + * However, it is an error to put or remove an entry other than the current one, + * and doing so will immediately halt iteration and return an error. + * Iteration is stopped if func returns non-zero. Returns func's return + * value if it is < 0, otherwise, 0. + */ +int hashmap_foreach(const struct hashmap *map, + int (*func)(const void *, void *, void *), void *arg) +{ + struct hashmap_entry *entry; + size_t num_entries; + const void *key; + int rc; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(func != NULL); + + entry = map->table; + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + num_entries = map->num_entries; + key = entry->key; + rc = func(entry->key, entry->data, arg); + if (rc < 0) { + return rc; + } + if (rc > 0) { + return 0; + } + /* Run this entry again if func() deleted it */ + if (entry->key != key) { + --entry; + } else if (num_entries != map->num_entries) { + /* Stop immediately if func put/removed another entry */ + return -1; + } + } + return 0; +} + +/* + * Default hash function for string keys. + * This is an implementation of the well-documented Jenkins one-at-a-time + * hash function. + */ +size_t hashmap_hash_string(const void *key) +{ + const char *key_str = (const char *)key; + size_t hash = 0; + + for (; *key_str; ++key_str) { + hash += *key_str; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/* + * Default key comparator function for string keys. + */ +int hashmap_compare_string(const void *a, const void *b) +{ + return strcmp((const char *)a, (const char *)b); +} + +/* + * Default key allocation function for string keys. Use free() for the + * key_free_func. + */ +void *hashmap_alloc_key_string(const void *key) +{ + return (void *)strdup((const char *)key); +} + +#ifdef HASHMAP_METRICS +/* + * Return the load factor. + */ +double hashmap_load_factor(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + if (!map->table_size) { + return 0; + } + return (double)map->num_entries / map->table_size; +} + +/* + * Return the average number of collisions per entry. + */ +double hashmap_collisions_mean(const struct hashmap *map) +{ + struct hashmap_entry *entry; + size_t total_collisions = 0; + + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return 0; + } + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + total_collisions += entry->num_collisions; + } + return (double)total_collisions / map->num_entries; +} + +/* + * Return the variance between entry collisions. The higher the variance, + * the more likely the hash function is poor and is resulting in clustering. + */ +double hashmap_collisions_variance(const struct hashmap *map) +{ + struct hashmap_entry *entry; + double mean_collisions; + double variance; + double total_variance = 0; + + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return 0; + } + mean_collisions = hashmap_collisions_mean(map); + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + variance = (double)entry->num_collisions - mean_collisions; + total_variance += variance * variance; + } + return total_variance / map->num_entries; +} +#endif diff --git a/source/compiler/hashmap/hashmap.h b/source/compiler/hashmap/hashmap.h new file mode 100644 index 0000000..b4ce16e --- /dev/null +++ b/source/compiler/hashmap/hashmap.h @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2016-2017 David Leeds + * + * Hashmap is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef __HASHMAP_H__ +#define __HASHMAP_H__ + +/* + * Define HASHMAP_METRICS to compile in performance analysis + * functions for use in assessing hash function performance. + */ +/* #define HASHMAP_METRICS */ + +/* + * Define HASHMAP_NOASSERT to compile out all assertions used internally. + */ +/* #define HASHMAP_NOASSERT */ + +/* + * Macros to declare type-specific versions of hashmap_*() functions to + * allow compile-time type checking and avoid the need for type casting. + */ +#define HASHMAP_FUNCS_DECLARE(name, key_type, data_type) \ + data_type *name##_hashmap_put(struct hashmap *map, key_type *key, \ + data_type *data); \ + data_type *name##_hashmap_get(const struct hashmap *map, \ + key_type *key); \ + data_type *name##_hashmap_remove(struct hashmap *map, \ + key_type *key); \ + key_type *name##_hashmap_iter_get_key( \ + const struct hashmap_iter *iter); \ + data_type *name##_hashmap_iter_get_data( \ + const struct hashmap_iter *iter); \ + void name##_hashmap_iter_set_data(const struct hashmap_iter *iter, \ + data_type *data); \ + int name##_hashmap_foreach(const struct hashmap *map, \ + int (*func)(key_type *, data_type *, void *), void *arg); + +#define HASHMAP_FUNCS_CREATE(name, key_type, data_type) \ + data_type *name##_hashmap_put(struct hashmap *map, key_type *key, \ + data_type *data) \ + { \ + return (data_type *)hashmap_put(map, (const void *)key, \ + (void *)data); \ + } \ + data_type *name##_hashmap_get(const struct hashmap *map, \ + key_type *key) \ + { \ + return (data_type *)hashmap_get(map, (const void *)key); \ + } \ + data_type *name##_hashmap_remove(struct hashmap *map, \ + key_type *key) \ + { \ + return (data_type *)hashmap_remove(map, (const void *)key); \ + } \ + key_type *name##_hashmap_iter_get_key( \ + const struct hashmap_iter *iter) \ + { \ + return (key_type *)hashmap_iter_get_key(iter); \ + } \ + data_type *name##_hashmap_iter_get_data( \ + const struct hashmap_iter *iter) \ + { \ + return (data_type *)hashmap_iter_get_data(iter); \ + } \ + void name##_hashmap_iter_set_data(const struct hashmap_iter *iter, \ + data_type *data) \ + { \ + hashmap_iter_set_data(iter, (void *)data); \ + } \ + struct __##name##_hashmap_foreach_state { \ + int (*func)(key_type *, data_type *, void *); \ + void *arg; \ + }; \ + static inline int __##name##_hashmap_foreach_callback(const void *key, \ + void *data, void *arg) \ + { \ + struct __##name##_hashmap_foreach_state *s = \ + (struct __##name##_hashmap_foreach_state *)arg; \ + return s->func((key_type *)key, (data_type *)data, s->arg); \ + } \ + int name##_hashmap_foreach(const struct hashmap *map, \ + int (*func)(key_type *, data_type *, void *), void *arg) \ + { \ + struct __##name##_hashmap_foreach_state s = { func, arg }; \ + return hashmap_foreach(map, \ + __##name##_hashmap_foreach_callback, &s); \ + } + + +struct hashmap_iter; +struct hashmap_entry; + +/* + * The hashmap state structure. + */ +struct hashmap { + size_t table_size_init; + size_t table_size; + size_t num_entries; + struct hashmap_entry *table; + size_t (*hash)(const void *); + int (*key_compare)(const void *, const void *); + void *(*key_alloc)(const void *); + void (*key_free)(void *); +}; + +/* + * Initialize an empty hashmap. A hash function and a key comparator are + * required. + * + * hash_func should return an even distribution of numbers between 0 + * and SIZE_MAX varying on the key provided. + * + * key_compare_func should return 0 if the keys match, and non-zero otherwise. + * + * initial_size is optional, and may be set to the max number of entries + * expected to be put in the hash table. This is used as a hint to + * pre-allocate the hash table to the minimum size to avoid gratuitous rehashes. + * If initial_size 0, a default size will be used. + */ +int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), + int (*key_compare_func)(const void *, const void *), + size_t initial_size); + +/* + * Free the hashmap and all associated memory. + */ +void hashmap_destroy(struct hashmap *map); + +/* + * Enable internal memory allocation and management of hash keys. + */ +void hashmap_set_key_alloc_funcs(struct hashmap *map, + void *(*key_alloc_func)(const void *), + void (*key_free_func)(void *)); + +/* + * Add an entry to the hashmap. If an entry with a matching key already + * exists and has a data pointer associated with it, the existing data + * pointer is returned, instead of assigning the new value. Compare + * the return value with the data passed in to determine if a new entry was + * created. Returns NULL if memory allocation failed. + */ +void *hashmap_put(struct hashmap *map, const void *key, void *data); + +/* + * Return the data pointer, or NULL if no entry exists. + */ +void *hashmap_get(const struct hashmap *map, const void *key); + +/* + * Remove an entry with the specified key from the map. + * Returns the data pointer, or NULL, if no entry was found. + */ +void *hashmap_remove(struct hashmap *map, const void *key); + +/* + * Remove all entries. + */ +void hashmap_clear(struct hashmap *map); + +/* + * Remove all entries and reset the hash table to its initial size. + */ +void hashmap_reset(struct hashmap *map); + +/* + * Return the number of entries in the hash map. + */ +size_t hashmap_size(const struct hashmap *map); + +/* + * Get a new hashmap iterator. The iterator is an opaque + * pointer that may be used with hashmap_iter_*() functions. + * Hashmap iterators are INVALID after a put or remove operation is performed. + * hashmap_iter_remove() allows safe removal during iteration. + */ +struct hashmap_iter *hashmap_iter(const struct hashmap *map); + +/* + * Return an iterator to the next hashmap entry. Returns NULL if there are + * no more entries. + */ +struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, + const struct hashmap_iter *iter); + +/* + * Remove the hashmap entry pointed to by this iterator and returns an + * iterator to the next entry. Returns NULL if there are no more entries. + */ +struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, + const struct hashmap_iter *iter); + +/* + * Return the key of the entry pointed to by the iterator. + */ +const void *hashmap_iter_get_key(const struct hashmap_iter *iter); + +/* + * Return the data of the entry pointed to by the iterator. + */ +void *hashmap_iter_get_data(const struct hashmap_iter *iter); + +/* + * Set the data pointer of the entry pointed to by the iterator. + */ +void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data); + +/* + * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() + * interface, this function supports calls to hashmap_remove() during iteration. + * However, it is an error to put or remove an entry other than the current one, + * and doing so will immediately halt iteration and return an error. + * Iteration is stopped if func returns non-zero. Returns func's return + * value if it is < 0, otherwise, 0. + */ +int hashmap_foreach(const struct hashmap *map, + int (*func)(const void *, void *, void *), void *arg); + +/* + * Default hash function for string keys. + * This is an implementation of the well-documented Jenkins one-at-a-time + * hash function. + */ +size_t hashmap_hash_string(const void *key); + +/* + * Default key comparator function for string keys. + */ +int hashmap_compare_string(const void *a, const void *b); + +/* + * Default key allocation function for string keys. Use free() for the + * key_free_func. + */ +void *hashmap_alloc_key_string(const void *key); + + +#ifdef HASHMAP_METRICS +/* + * Return the load factor. + */ +double hashmap_load_factor(const struct hashmap *map); + +/* + * Return the average number of collisions per entry. + */ +double hashmap_collisions_mean(const struct hashmap *map); + +/* + * Return the variance between entry collisions. The higher the variance, + * the more likely the hash function is poor and is resulting in clustering. + */ +double hashmap_collisions_variance(const struct hashmap *map); +#endif + + +#endif /* __HASHMAP_H__ */ + diff --git a/source/compiler/sc.h b/source/compiler/sc.h index 9d68b0e..9db453d 100644 --- a/source/compiler/sc.h +++ b/source/compiler/sc.h @@ -41,6 +41,7 @@ #else #include #endif +#include "hashmap/hashmap.h" #include "../amx/osdefs.h" #include "../amx/amx.h" @@ -130,8 +131,8 @@ typedef struct s_constvalue { typedef struct s_symbol { struct s_symbol *next; struct s_symbol *parent; /* hierarchical types (multi-dimensional arrays) */ + struct s_symbol *child; char name[sNAMEMAX+1]; - uint32_t hash; /* value derived from name, for quicker searching */ cell addr; /* address or offset (or value for constant, index for native function) */ cell codeaddr; /* address (in the code segment) where the symbol declaration starts */ char vclass; /* sLOCAL if "addr" refers to a local symbol */ @@ -165,6 +166,12 @@ typedef struct s_symbol { char *documentation; /* optional documentation string */ } symbol; +/* new symbol struct for cached global symbols with the same names*/ +typedef struct s_symbol2 { + struct s_symbol *symbol; + struct s_symbol2 *next; +} symbol2; + /* Possible entries for "ident". These are used in the "symbol", "value" * and arginfo structures. Not every constant is valid for every use. @@ -581,7 +588,7 @@ SC_FUNC void delete_symbol(symbol *root,symbol *sym); SC_FUNC void delete_symbols(symbol *root,int level,int del_labels,int delete_functions); SC_FUNC int refer_symbol(symbol *entry,symbol *bywhom); SC_FUNC void markusage(symbol *sym,int usage); -SC_FUNC uint32_t namehash(const char *name); +SC_FUNC void rename_symbol(symbol *sym,const char *newname); SC_FUNC symbol *findglb(const char *name,int filter); SC_FUNC symbol *findloc(const char *name); SC_FUNC symbol *findconst(const char *name,int *matchtag); @@ -610,7 +617,6 @@ SC_FUNC void setline(int chkbounds); SC_FUNC void setfiledirect(char *name); SC_FUNC void setfileconst(char *name); SC_FUNC void setlinedirect(int line); -SC_FUNC void setlineconst(int line); SC_FUNC void setlabel(int index); SC_FUNC void markexpr(optmark type,const char *name,cell offset); SC_FUNC void startfunc(char *fname); @@ -787,6 +793,8 @@ SC_FUNC int state_conflict_id(int listid1,int listid2); #if !defined SC_SKIP_VDECL SC_VDECL symbol loctab; /* local symbol table */ SC_VDECL symbol glbtab; /* global symbol table */ +SC_VDECL struct hashmap symbol_cache_map; +SC_VDECL symbol *line_sym; SC_VDECL cell *litq; /* the literal queue */ SC_VDECL unsigned char pline[]; /* the line read from the input file */ SC_VDECL const unsigned char *lptr;/* points to the current position in "pline" */ diff --git a/source/compiler/sc1.c b/source/compiler/sc1.c index 5822946..4f58bfc 100644 --- a/source/compiler/sc1.c +++ b/source/compiler/sc1.c @@ -765,6 +765,8 @@ cleanup: delete_symbols(&loctab,0,TRUE,TRUE); /* delete local variables if not yet * done (i.e. on a fatal error) */ delete_symbols(&glbtab,0,TRUE,TRUE); + line_sym=NULL; + hashmap_destroy(&symbol_cache_map); delete_consttable(&tagname_tab); delete_consttable(&libname_tab); delete_consttable(&sc_automaton_tab); @@ -931,6 +933,7 @@ static void initglobals(void) litq=NULL; /* the literal queue */ glbtab.next=NULL; /* clear global variables/constants table */ loctab.next=NULL; /* " local " / " " */ + hashmap_init(&symbol_cache_map,hashmap_hash_string,hashmap_compare_string,8388608); /* 2^23 */ tagname_tab.next=NULL;/* tagname table */ libname_tab.next=NULL;/* library table (#pragma library "..." syntax) */ @@ -1514,7 +1517,7 @@ static void setconstants(void) add_builtin_constant("__Pawn",VERSION_INT,sGLOBAL,0); add_builtin_constant("__PawnBuild",VERSION_BUILD,sGLOBAL,0); - add_builtin_constant("__line",0,sGLOBAL,0); + line_sym=add_builtin_constant("__line",0,sGLOBAL,0); add_builtin_constant("__compat",pc_compat,sGLOBAL,0); debug=0; @@ -2937,6 +2940,8 @@ static void decl_enum(int vclass,int fstatic) sym->dim.array.length=size; sym->dim.array.level=0; sym->parent=enumsym; + if (enumsym) + enumsym->child=sym; if (fstatic) sym->fnumber=filenum; @@ -3271,8 +3276,7 @@ static int operatoradjust(int opertok,symbol *sym,char *opername,int resulttag) refer_symbol(sym,oldsym->refer[i]); delete_symbol(&glbtab,oldsym); } /* if */ - strcpy(sym->name,tmpname); - sym->hash=namehash(sym->name);/* calculate new hash */ + rename_symbol(sym,tmpname); /* operators should return a value, except the '~' operator */ if (opertok!='~') @@ -3531,6 +3535,8 @@ static void funcstub(int fnative) assert(sym!=NULL); sub=addvariable(symbolname,0,iARRAY,sGLOBAL,tag,dim,numdim,idxtag,0); sub->parent=sym; + if (sym) + sym->child=sub; } /* if */ litidx=0; /* clear the literal pool */ @@ -6522,6 +6528,8 @@ static void doreturn(void) sub=addvariable(curfunc->name,(argcount+3)*sizeof(cell),iREFARRAY,sGLOBAL, curfunc->tag,dim,numdim,idxtag,0); sub->parent=curfunc; + if (curfunc) + curfunc->child=sub; } /* if */ /* get the hidden parameter, copy the array (the array is on the heap; * it stays on the heap for the moment, and it is removed -usually- at diff --git a/source/compiler/sc2.c b/source/compiler/sc2.c index 2c9fd1a..233a40a 100644 --- a/source/compiler/sc2.c +++ b/source/compiler/sc2.c @@ -431,7 +431,8 @@ static void readline(unsigned char *line) line+=strlen((char*)line); } /* if */ fline+=1; - setlineconst(fline); + assert(line_sym!=NULL); + line_sym->addr=fline; } while (num>=0 && cont); } @@ -2623,6 +2624,60 @@ SC_FUNC int ishex(char c) return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F'); } +static void symbol_cache_add(symbol *sym,symbol2 *new_cache_sym) +{ + symbol2 *cache_sym; + + if (new_cache_sym==NULL) { + new_cache_sym=(symbol2 *)malloc(sizeof(symbol2)); + if (new_cache_sym==NULL) + error(103); /* insufficient memory */ + new_cache_sym->symbol=sym; + new_cache_sym->next=NULL; + } + + cache_sym=hashmap_get(&symbol_cache_map,sym->name); + if (cache_sym==NULL) { + if (hashmap_put(&symbol_cache_map,sym->name,new_cache_sym)==NULL) + error(103); /* insufficient memory */ + } else { + while(cache_sym->next!=NULL) + cache_sym=cache_sym->next; + cache_sym->next=new_cache_sym; + } +} + +static symbol2 *symbol_cache_remove(symbol *sym,int free_cache_sym) +{ + symbol2 *cache_sym; + symbol2 *parent_cache_sym=NULL; + + cache_sym=hashmap_get(&symbol_cache_map,sym->name); + for ( ;; ) { + if (cache_sym==NULL) + return NULL; + if (cache_sym->symbol==sym) + break; + parent_cache_sym=cache_sym; + cache_sym=cache_sym->next; + } + + if (parent_cache_sym!=NULL) { + parent_cache_sym->next=cache_sym->next; + } else { + hashmap_remove(&symbol_cache_map,sym->name); + if (cache_sym->next!=NULL) + if (hashmap_put(&symbol_cache_map,sym->name,cache_sym->next)==NULL) + error(103); /* insufficient memory */ + } + if (free_cache_sym) { + free(cache_sym); + return NULL; + } + cache_sym->next=NULL; + return cache_sym; +} + /* The local variable table must be searched backwards, so that the deepest * nesting of local variables is searched first. The simplest way to do * this is to insert all new items at the head of the list. @@ -2644,6 +2699,8 @@ static symbol *add_symbol(symbol *root,symbol *entry,int sort) memcpy(newsym,entry,sizeof(symbol)); newsym->next=root->next; root->next=newsym; + if (newsym->vclass==sGLOBAL) + symbol_cache_add(newsym,NULL); return newsym; } @@ -2688,6 +2745,8 @@ static void free_symbol(symbol *sym) free(sym->refer); if (sym->documentation!=NULL) free(sym->documentation); + if (sym->vclass==sGLOBAL) + symbol_cache_remove(sym,1); free(sym); } @@ -2790,28 +2849,36 @@ SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_ } /* while */ } -/* The purpose of the hash is to reduce the frequency of a "name" - * comparison (which is costly). There is little interest in avoiding - * clusters in similar names, which is why this function is plain simple. - */ -SC_FUNC uint32_t namehash(const char *name) +SC_FUNC void rename_symbol(symbol *sym,const char *newname) { - const unsigned char *ptr=(const unsigned char *)name; - int len=strlen(name); - if (len==0) - return 0L; - assert(len<256); - return (len<<24Lu) + (ptr[0]<<16Lu) + (ptr[len-1]<<8Lu) + (ptr[len>>1Lu]); + int is_global=(sym->vclass==sGLOBAL); + symbol2 *cache_sym; + + if (is_global) + cache_sym=symbol_cache_remove(sym,0); + strcpy(sym->name,newname); + if (is_global && cache_sym!=NULL) + symbol_cache_add(sym,cache_sym); } static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int automaton,int *cmptag) { symbol *firstmatch=NULL; symbol *sym=root->next; + symbol2 *cache_sym=NULL; int count=0; - unsigned long hash=namehash(name); + int is_global=(root==&glbtab); + + if (is_global) { + cache_sym=hashmap_get(&symbol_cache_map,name); + if (cache_sym) + sym=cache_sym->symbol; + else + sym=NULL; + } + while (sym!=NULL) { - if (hash==sym->hash && strcmp(name,sym->name)==0 /* check name */ + if ( (is_global || strcmp(name,sym->name)==0) /* check name */ && (sym->parent==NULL || sym->ident==iCONSTEXPR) /* sub-types (hierarchical types) are skipped, except for enum fields */ && (sym->fnumber<0 || sym->fnumber==fnumber)) /* check file number for scope */ { @@ -2834,7 +2901,15 @@ static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int a } /* if */ } /* if */ } /* */ - sym=sym->next; + if (is_global) { + cache_sym=cache_sym->next; + if (cache_sym) + sym=cache_sym->symbol; + else + sym=NULL; + } else { + sym=sym->next; + } } /* while */ if (cmptag!=NULL && firstmatch!=NULL) { if (*cmptag==0) @@ -2847,12 +2922,8 @@ static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int a static symbol *find_symbol_child(const symbol *root,const symbol *sym) { - symbol *ptr=root->next; - while (ptr!=NULL) { - if (ptr->parent==sym) - return ptr; - ptr=ptr->next; - } /* while */ + if (sym->child && sym->child->parent==sym) + return sym->child; return NULL; } @@ -3010,7 +3081,6 @@ SC_FUNC symbol *addsym(const char *name,cell addr,int ident,int vclass,int tag,i /* first fill in the entry */ memset(&entry,0,sizeof entry); strcpy(entry.name,name); - entry.hash=namehash(name); entry.addr=addr; entry.codeaddr=code_idx; entry.vclass=(char)vclass; @@ -3055,6 +3125,8 @@ SC_FUNC symbol *addvariable(const char *name,cell addr,int ident,int vclass,int top->dim.array.level=(short)(numdim-level-1); top->x.tags.index=idxtag[level]; top->parent=parent; + if (parent) + parent->child=top; if (vclass==sLOCAL || vclass==sSTATIC) { top->compound=compound; /* for multiple declaration/shadowing check */ } /* if */ @@ -3088,35 +3160,26 @@ SC_FUNC int getlabel(void) */ SC_FUNC char *itoh(ucell val) { -static char itohstr[30]; - char *ptr; - int i,nibble[16]; /* a 64-bit hexadecimal cell has 16 nibbles */ - int max; + static const char hex[16]= + {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; +#if PAWN_CELL_SIZE==16 + static char itohstr[5]= + {'\0','\0','\0','\0','\0'}; + char *ptr=&itohstr[3]; +#elif PAWN_CELL_SIZE==32 + static char itohstr[9]= + {'\0','\0','\0','\0','\0','\0','\0','\0','\0'}; + char *ptr=&itohstr[7]; +#elif PAWN_CELL_SIZE==64 + static char itohstr[17]= + {'\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0'}; + char *ptr=&itohstr[15]; +#else + #error Unsupported cell size +#endif - #if PAWN_CELL_SIZE==16 - max=4; - #elif PAWN_CELL_SIZE==32 - max=8; - #elif PAWN_CELL_SIZE==64 - max=16; - #else - #error Unsupported cell size - #endif - ptr=itohstr; - for (i=0; i>=4; - } /* endfor */ - i=max-1; - while (nibble[i]==0 && i>0) /* search for highest non-zero nibble */ - i-=1; - while (i>=0){ - if (nibble[i]>=10) - *ptr++=(char)('a'+(nibble[i]-10)); - else - *ptr++=(char)('0'+nibble[i]); - i-=1; - } /* while */ - *ptr='\0'; /* and a zero-terminator */ - return itohstr; + do { + *ptr-- = hex[val&(ucell)0x0f]; + } while ((val>>=4)!=0); + return ptr+1; } diff --git a/source/compiler/sc4.c b/source/compiler/sc4.c index c2ea105..33b55fb 100644 --- a/source/compiler/sc4.c +++ b/source/compiler/sc4.c @@ -287,15 +287,6 @@ SC_FUNC void setlinedirect(int line) } /* if */ } -SC_FUNC void setlineconst(int line) -{ - symbol *sym; - - sym=findconst("__line",NULL); - assert(sym!=NULL); - sym->addr=fline; -} - /* setlabel * * Post a code label (specified as a number), on a new line. diff --git a/source/compiler/sc7.c b/source/compiler/sc7.c index 5981efc..f8d2c9d 100644 --- a/source/compiler/sc7.c +++ b/source/compiler/sc7.c @@ -1254,6 +1254,7 @@ static void stgopt(char *start,char *end,int (*outputfunc)(char *str)); static char *stgbuf=NULL; static int stgmax=0; /* current size of the staging buffer */ +static int stglen=0; /* current length of the staging buffer */ static char *stgpipe=NULL; static int pipemax=0; /* current size of the stage pipe, a second staging buffer */ @@ -1290,6 +1291,7 @@ SC_FUNC void stgbuffer_cleanup(void) if (stgbuf!=NULL) { free(stgbuf); stgbuf=NULL; + stglen=0; stgmax=0; } /* if */ if (stgpipe!=NULL) { @@ -1323,6 +1325,7 @@ SC_FUNC void stgmark(char mark) if (staging) { CHECK_STGBUFFER(stgidx); stgbuf[stgidx++]=mark; + stglen++; } /* if */ } @@ -1364,10 +1367,12 @@ static int filewrite(char *str) * Global references: stgidx (altered) * stgbuf (altered) * staging (referred to only) + * stglen (altered) */ SC_FUNC void stgwrite(const char *st) { int len; + int st_len; if (staging) { assert(stgidx==0 || stgbuf!=NULL); /* staging buffer must be valid if there is (apparently) something in it */ @@ -1376,17 +1381,21 @@ SC_FUNC void stgwrite(const char *st) while (*st!='\0') { /* copy to staging buffer */ CHECK_STGBUFFER(stgidx); stgbuf[stgidx++]=*st++; + stglen++; } /* while */ CHECK_STGBUFFER(stgidx); stgbuf[stgidx++]='\0'; } else { - len=(stgbuf!=NULL) ? strlen(stgbuf) : 0; - CHECK_STGBUFFER(len+strlen(st)+1); - strcat(stgbuf,st); - len=strlen(stgbuf); + len=(stgbuf!=NULL) ? stglen : 0; + st_len=strlen(st); + CHECK_STGBUFFER(len+st_len+1); + memcpy(stgbuf+len,st,st_len+1); + len=len+st_len; + stglen=len; if (len>0 && stgbuf[len-1]=='\n') { filewrite(stgbuf); stgbuf[0]='\0'; + stglen=0; } /* if */ } /* if */ } @@ -1413,6 +1422,7 @@ SC_FUNC void stgout(int index) /* first pass: sub-expressions */ if (sc_status==statWRITE) reordered=stgstring(&stgbuf[index],&stgbuf[stgidx]); + stglen=stgidx-index; stgidx=index; /* second pass: optimize the buffer created in the first pass */ @@ -1562,10 +1572,11 @@ SC_FUNC void stgset(int onoff) /* write any contents that may be put in the buffer by stgwrite() * when "staging" was 0 */ - if (strlen(stgbuf)>0) + if (stglen>0) filewrite(stgbuf); } /* if */ stgbuf[0]='\0'; + stglen=0; } #define MAX_OPT_VARS 5 diff --git a/source/compiler/scvars.c b/source/compiler/scvars.c index 697c04e..0329852 100644 --- a/source/compiler/scvars.c +++ b/source/compiler/scvars.c @@ -33,6 +33,8 @@ */ SC_VDEFINE symbol loctab; /* local symbol table */ SC_VDEFINE symbol glbtab; /* global symbol table */ +SC_VDEFINE struct hashmap symbol_cache_map; +SC_VDEFINE symbol *line_sym=NULL; SC_VDEFINE cell *litq; /* the literal queue */ SC_VDEFINE unsigned char pline[sLINEMAX+1]; /* the line read from the input file */ SC_VDEFINE const unsigned char *lptr; /* points to the current position in "pline" */