QByteArray: implement qstricmp with SSE 4.1

Using SSE 4.1 because of the need for PMINUB.

Change-Id: Ib48364abee9f464c96c6fffd152ebd3f8ea7fe94
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
Thiago Macieira 2018-05-14 23:23:24 -07:00 committed by Allan Sandfeld Jensen
parent cd64a96b31
commit 10e5ec2557
2 changed files with 73 additions and 8 deletions

View File

@ -47,6 +47,7 @@
#include "qlocale_p.h"
#include "qlocale_tools_p.h"
#include "private/qnumeric_p.h"
#include "private/qsimd_p.h"
#include "qstringalgorithms_p.h"
#include "qscopedpointer.h"
#include "qbytearray_p.h"
@ -410,14 +411,72 @@ int qstricmp(const char *str1, const char *str2)
{
const uchar *s1 = reinterpret_cast<const uchar *>(str1);
const uchar *s2 = reinterpret_cast<const uchar *>(str2);
int res;
uchar c;
if (!s1 || !s2)
return s1 ? 1 : (s2 ? -1 : 0);
for (; !(res = (c = latin1_lowercased[*s1]) - latin1_lowercased[*s2]); s1++, s2++)
if (!c) // strings are equal
break;
return res;
if (!s1)
return s2 ? -1 : 0;
if (!s2)
return 1;
enum { Incomplete = 256 };
qptrdiff offset = 0;
auto innerCompare = [=, &offset](qptrdiff max, bool unlimited) {
max += offset;
do {
uchar c = latin1_lowercased[s1[offset]];
int res = c - latin1_lowercased[s2[offset]];
if (Q_UNLIKELY(res))
return res;
if (Q_UNLIKELY(!c))
return 0;
++offset;
} while (unlimited || offset < max);
return int(Incomplete);
};
#ifdef __SSE4_1__
enum { PageSize = 4096, PageMask = PageSize - 1 };
const __m128i zero = _mm_setzero_si128();
forever {
// Calculate how many bytes we can load until we cross a page boundary
// for either source. This isn't an exact calculation, just something
// very quick.
quintptr u1 = quintptr(s1 + offset);
quintptr u2 = quintptr(s2 + offset);
uint n = PageSize - ((u1 | u2) & PageMask);
qptrdiff maxoffset = offset + n;
for ( ; offset + 16 <= maxoffset; offset += sizeof(__m128i)) {
// load 16 bytes from either source
__m128i a = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s1 + offset));
__m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s2 + offset));
// compare the two against each oher
__m128i cmp = _mm_cmpeq_epi8(a, b);
// find NUL terminators too
cmp = _mm_min_epu8(cmp, a);
cmp = _mm_cmpeq_epi8(cmp, zero);
// was there any difference or a NUL?
uint mask = _mm_movemask_epi8(cmp);
if (mask) {
// yes, find out where
uint start = qCountTrailingZeroBits(mask);
uint end = sizeof(mask) * 8 - qCountLeadingZeroBits(mask);
Q_ASSUME(end >= start);
offset += start;
n = end - start;
break;
}
}
// using SIMD could cause a page fault, so iterate byte by byte
int res = innerCompare(n, false);
if (res != Incomplete)
return res;
}
#endif
return innerCompare(-1, true);
}
/*! \relates QByteArray

View File

@ -860,6 +860,12 @@ void tst_QByteArray::qstricmp()
}
QCOMPARE(actual, expected);
actual = ::qstricmp("012345679abcd" + str1.toLatin1(), "012345679AbCd" + str2.toLatin1());
if ( actual != 0 ) {
actual = (actual < 0 ? -1 : 1);
}
QCOMPARE(actual, expected);
actual = str1.toLatin1().compare(str2.toLatin1(), Qt::CaseInsensitive);
if ( actual != 0 ) {
actual = (actual < 0 ? -1 : 1);