QStaticByteArrayMatcher: fix searching in 2+GiB haystacks

Add a test (same techniques as for the 4+GiB check in
tst_qcryptographichash).

Takes ~1s to build the 4GiB test data here, and skips
when RAM is too low:

  $ qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork
  [...]
  QDEBUG : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() created dataset in 891 ms
  [...]

  $ (ulimit -v 2000000; qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork)
  ********* Start testing of tst_QByteArrayMatcher *********
  [...]
  SKIP   : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() Could not allocate 4GiB plus a couple hundred bytes of RAM.
     Loc: [/home/marc/Qt/qt5/qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp(242)]
  [...]

Found during 6.3 API review.

[ChangeLog][QtCore][QStaticByteArrayMatcher] Fixed searching in
strings with size > 2GiB (on 64-bit platforms).

Fixes: QTBUG-100118
Pick-to: 6.3
Change-Id: I1df420965673b5555fef2b75e785954cc50b654f
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Marc Mutz 2022-01-21 15:02:08 +01:00
parent 3ec587666f
commit 3d3558dc8f
4 changed files with 106 additions and 14 deletions

View File

@ -62,6 +62,19 @@ int QMetaType::id() const
#if QT_REMOVED_SINCE(6, 3)
#include "qbytearraymatcher.h"
# if QT_POINTER_SIZE != 4
int QStaticByteArrayMatcherBase::indexOfIn(const char *h, uint hl, const char *n, int nl, int from) const noexcept
{
qsizetype r = indexOfIn(h, size_t(hl), n, qsizetype(nl), qsizetype(from));
Q_ASSERT(r == int(r));
return r;
}
# endif // QT_POINTER_SIZE != 4
#include "tools/qcryptographichash.h"
void QCryptographicHash::addData(const QByteArray &data)

View File

@ -395,7 +395,7 @@ qsizetype qFindByteArray(
*/
/*!
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const
\fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
Searches the char string \a haystack, which has length \a hlen, from
byte position \a from (default 0, i.e. from the first byte), for
@ -405,7 +405,7 @@ qsizetype qFindByteArray(
*/
/*!
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const
\fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
Searches the char string \a haystack, from byte position \a from
(default 0, i.e. from the first byte), for the byte array pattern()
@ -415,7 +415,7 @@ qsizetype qFindByteArray(
*/
/*!
\fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
\fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
Returns the byte array pattern that this byte array matcher will
search for.
@ -426,7 +426,7 @@ qsizetype qFindByteArray(
/*!
\internal
*/
int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept
qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
{
if (from < 0)
from = 0;
@ -435,12 +435,12 @@ int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const
}
/*!
\fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
\fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
\internal
*/
/*!
\fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
\fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
\since 5.9
\relates QStaticByteArrayMatcher

View File

@ -99,15 +99,20 @@ class QStaticByteArrayMatcherBase
uchar data[256];
} m_skiptable;
protected:
explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, uint n) noexcept
explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, size_t n) noexcept
: m_skiptable(generate(pattern, n)) {}
// compiler-generated copy/more ctors/assignment operators are ok!
// compiler-generated dtor is ok!
#if QT_REMOVED_SINCE(6, 3) && QT_POINTER_SIZE != 4
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept;
#endif
Q_CORE_EXPORT qsizetype indexOfIn(const char *needle, size_t nlen,
const char *haystack, qsizetype hlen,
qsizetype from) const noexcept;
private:
static constexpr Skiptable generate(const char *pattern, uint n) noexcept
static constexpr Skiptable generate(const char *pattern, size_t n) noexcept
{
const auto uchar_max = (std::numeric_limits<uchar>::max)();
uchar max = n > uchar_max ? uchar_max : uchar(n);
@ -143,7 +148,7 @@ private:
}
};
template <uint N>
template <size_t N>
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
{
char m_pattern[N];
@ -153,19 +158,19 @@ public:
explicit constexpr QStaticByteArrayMatcher(const char (&patternToMatch)[N]) noexcept
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
{
for (uint i = 0; i < N; ++i)
for (size_t i = 0; i < N; ++i)
m_pattern[i] = patternToMatch[i];
}
int indexIn(const QByteArray &haystack, int from = 0) const noexcept
qsizetype indexIn(const QByteArray &haystack, qsizetype from = 0) const noexcept
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
int indexIn(const char *haystack, int hlen, int from = 0) const noexcept
qsizetype indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const noexcept
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); }
QByteArray pattern() const { return QByteArray(m_pattern, qsizetype(N - 1)); }
};
template <uint N>
template <size_t N>
constexpr QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) noexcept
{ return QStaticByteArrayMatcher<N>(pattern); }

View File

@ -31,6 +31,13 @@
#include <qbytearraymatcher.h>
#include <numeric>
#include <string>
#if QT_CONFIG(cxx11_future)
# include <thread>
#endif
// COM interface
#if defined(Q_OS_WIN) && defined(interface)
# undef interface
@ -44,6 +51,7 @@ private slots:
void interface();
void indexIn();
void staticByteArrayMatcher();
void haystacksWithMoreThan4GiBWork();
};
void tst_QByteArrayMatcher::interface()
@ -208,6 +216,72 @@ void tst_QByteArrayMatcher::staticByteArrayMatcher()
}
void tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork()
{
#if QT_POINTER_SIZE > 4
// use a large needle to trigger long skips in the Boyer-Moore algorithm
// (to speed up the test)
constexpr std::string_view needle = LONG_STRING_256;
//
// GIVEN: a haystack with more than 4 GiB of data
//
// don't use QByteArray because freeSpaceAtEnd() may break reserve()
// semantics and a realloc is the last thing we need here
std::string large;
QElapsedTimer timer;
timer.start();
constexpr size_t GiB = 1024 * 1024 * 1024;
constexpr size_t BaseSize = 4 * GiB + 1;
try {
large.reserve(BaseSize + needle.size());
large.resize(BaseSize, '\0');
large.append(needle);
} catch (const std::bad_alloc &) {
QSKIP("Could not allocate 4GiB plus a couple hundred bytes of RAM.");
}
QCOMPARE(large.size(), BaseSize + needle.size());
qDebug("created dataset in %lld ms", timer.elapsed());
# if QT_CONFIG(cxx11_future)
using MaybeThread = std::thread;
# else
struct MaybeThread {
std::function<void()> func;
void join() { func(); }
};
# endif
//
// WHEN: trying to match an occurrence past the 4GiB mark
//
qsizetype dynamicResult, staticResult;
auto t = MaybeThread{[&]{
QByteArrayMatcher m(needle);
dynamicResult = m.indexIn(large);
}};
{
static_assert(needle == LONG_STRING_256); // need a string literal in the following line:
QStaticByteArrayMatcher m(LONG_STRING_256);
staticResult = m.indexIn(large.data(), large.size());
}
t.join();
//
// THEN: the result index is not trucated
//
QCOMPARE(staticResult, qsizetype(BaseSize));
QCOMPARE(dynamicResult, qsizetype(BaseSize));
#else
QSKIP("This test is 64-bit only.");
#endif
}
#undef LONG_STRING_256
#undef LONG_STRING_128
#undef LONG_STRING__64