Add an AES-based qHash function, inspired on Go's

Change-Id: I09100678ff4443e6be06fffd1481e94089c47799
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Thiago Macieira 2016-10-28 22:23:36 -07:00
parent f2abfb39d7
commit ed991b7d15

View File

@ -387,8 +387,145 @@ static uint siphash(const uint8_t *in, uint inlen, const uint seed)
}
#endif
#if QT_COMPILER_SUPPORTS_HERE(AES) && QT_COMPILER_SUPPORTS_HERE(SSE4_2) && \
!(defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__))
# define AESHASH
QT_FUNCTION_TARGET(AES)
static size_t aeshash(const uchar *p, size_t len, size_t seed) Q_DECL_NOTHROW
{
__m128i key;
if (sizeof(size_t) == 8) {
#ifdef Q_PROCESSOR_X86_64
quint64 seededlen = seed ^ len;
__m128i mseed = _mm_cvtsi64_si128(seed);
key = _mm_insert_epi64(mseed, seededlen, 1);
#endif
} else {
quint32 replicated_len = quint16(len) | (quint32(quint16(len)) << 16);
__m128i mseed = _mm_cvtsi32_si128(seed);
key = _mm_insert_epi32(mseed, replicated_len, 1);
key = _mm_unpacklo_epi64(key, key);
}
// This is inspired by the algorithm in the Go language. See:
// https://github.com/golang/go/blob/894abb5f680c040777f17f9f8ee5a5ab3a03cb94/src/runtime/asm_386.s#L902
// https://github.com/golang/go/blob/894abb5f680c040777f17f9f8ee5a5ab3a03cb94/src/runtime/asm_amd64.s#L903
//
// Even though we're using the AESENC instruction from the CPU, this code
// is not encryption and this routine makes no claim to be
// cryptographically secure. We're simply using the instruction that performs
// the scrambling round (step 3 in [1]) because it's just very good at
// spreading the bits around.
//
// [1] https://en.wikipedia.org/wiki/Advanced_Encryption_Standard#High-level_description_of_the_algorithm
// hash 16 bytes, running 3 scramble rounds of AES on itself (like label "final1")
const auto hash16bytes = [](__m128i &state0, __m128i data) QT_FUNCTION_TARGET(AES) {
state0 = _mm_xor_si128(state0, data);
state0 = _mm_aesenc_si128(state0, state0);
state0 = _mm_aesenc_si128(state0, state0);
state0 = _mm_aesenc_si128(state0, state0);
};
__m128i state0 = key;
auto src = reinterpret_cast<const __m128i *>(p);
if (len < 16)
goto lt16;
if (len < 32)
goto lt32;
// rounds of 32 bytes
{
// Make state1 = ~state0:
__m128i one = _mm_cmpeq_epi64(key, key);
__m128i state1 = _mm_xor_si128(state0, one);
// do simplified rounds of 32 bytes: unlike the Go code, we only
// scramble twice and we keep 256 bits of state
const auto srcend = src + (len / 32);
while (src < srcend) {
__m128i data0 = _mm_loadu_si128(src);
__m128i data1 = _mm_loadu_si128(src + 1);
data0 = _mm_xor_si128(data0, state0);
data1 = _mm_xor_si128(data1, state1);
state0 = _mm_aesenc_si128(state0, state0);
state1 = _mm_aesenc_si128(state1, state1);
state0 = _mm_aesenc_si128(state0, state0);
state1 = _mm_aesenc_si128(state1, state1);
src += 2;
}
state0 = _mm_xor_si128(state0, state1);
}
len &= 0x1f;
// do we still have 16 or more bytes?
if (len & 0x10) {
lt32:
__m128i data = _mm_loadu_si128(src);
hash16bytes(state0, data);
++src;
}
len &= 0xf;
lt16:
if (len) {
// load the last chunk of data
// We're going to load 16 bytes and mask zero the part we don't care
// (the hash of a short string is different from the hash of a longer
// including NULLs at the end because the length is in the key)
// WARNING: this may produce valgrind warnings, but it's safe
__m128i data;
if (Q_LIKELY(quintptr(src + 1) & 0xff0)) {
// same page, we definitely can't fault:
// load all 16 bytes and mask off the bytes past the end of the source
static const qint8 maskarray[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
__m128i mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(maskarray + 15 - len));
data = _mm_loadu_si128(src);
data = _mm_and_si128(data, mask);
} else {
// too close to the end of the page, it could fault:
// load 16 bytes ending at the data end, then shuffle them to the beginning
static const qint8 shufflecontrol[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
__m128i control = _mm_loadu_si128(reinterpret_cast<const __m128i *>(shufflecontrol + 15 - len));
p = reinterpret_cast<const uchar *>(src - 1);
data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p + len));
data = _mm_shuffle_epi8(data, control);
}
hash16bytes(state0, data);
}
// extract state0
# if QT_POINTER_SIZE == 8
return _mm_cvtsi128_si64(state0);
# else
return _mm_cvtsi128_si32(state0);
# endif
}
#endif
size_t qHashBits(const void *p, size_t size, size_t seed) noexcept
{
#ifdef QT_BOOTSTRAPPED
// the seed is always 0 in bootstrapped mode (no seed generation code),
// so help the compiler do dead code elimination
seed = 0;
#endif
#ifdef AESHASH
if (seed && qCpuHasFeature(AES) && qCpuHasFeature(SSE4_2))
return aeshash(reinterpret_cast<const uchar *>(p), size, seed);
#endif
if (size <= QT_POINTER_SIZE)
return murmurhash(p, size, seed);