QStaticByteArrayMatcher: fix searching in 2+GiB haystacks

Add a test (same techniques as for the 4+GiB check in
tst_qcryptographichash).

Takes ~1s to build the 4GiB test data here, and skips
when RAM is too low:

  $ qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork
  [...]
  QDEBUG : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() created dataset in 891 ms
  [...]

  $ (ulimit -v 2000000; qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork)
  ********* Start testing of tst_QByteArrayMatcher *********
  [...]
  SKIP   : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() Could not allocate 4GiB plus a couple hundred bytes of RAM.
     Loc: [/home/marc/Qt/qt5/qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp(242)]
  [...]

Found during 6.3 API review.

[ChangeLog][QtCore][QStaticByteArrayMatcher] Fixed searching in
strings with size > 2GiB (on 64-bit platforms).

Fixes: QTBUG-100118
Pick-to: 6.3
Change-Id: I1df420965673b5555fef2b75e785954cc50b654f
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Marc Mutz 2022-01-21 15:02:08 +01:00
parent 3ec587666f
commit 3d3558dc8f
4 changed files with 106 additions and 14 deletions

View File

@ -62,6 +62,19 @@ int QMetaType::id() const
#if QT_REMOVED_SINCE(6, 3) #if QT_REMOVED_SINCE(6, 3)
#include "qbytearraymatcher.h"
# if QT_POINTER_SIZE != 4
int QStaticByteArrayMatcherBase::indexOfIn(const char *h, uint hl, const char *n, int nl, int from) const noexcept
{
qsizetype r = indexOfIn(h, size_t(hl), n, qsizetype(nl), qsizetype(from));
Q_ASSERT(r == int(r));
return r;
}
# endif // QT_POINTER_SIZE != 4
#include "tools/qcryptographichash.h" #include "tools/qcryptographichash.h"
void QCryptographicHash::addData(const QByteArray &data) void QCryptographicHash::addData(const QByteArray &data)

View File

@ -395,7 +395,7 @@ qsizetype qFindByteArray(
*/ */
/*! /*!
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
Searches the char string \a haystack, which has length \a hlen, from Searches the char string \a haystack, which has length \a hlen, from
byte position \a from (default 0, i.e. from the first byte), for byte position \a from (default 0, i.e. from the first byte), for
@ -405,7 +405,7 @@ qsizetype qFindByteArray(
*/ */
/*! /*!
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
Searches the char string \a haystack, from byte position \a from Searches the char string \a haystack, from byte position \a from
(default 0, i.e. from the first byte), for the byte array pattern() (default 0, i.e. from the first byte), for the byte array pattern()
@ -415,7 +415,7 @@ qsizetype qFindByteArray(
*/ */
/*! /*!
\fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
Returns the byte array pattern that this byte array matcher will Returns the byte array pattern that this byte array matcher will
search for. search for.
@ -426,7 +426,7 @@ qsizetype qFindByteArray(
/*! /*!
\internal \internal
*/ */
int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
{ {
if (from < 0) if (from < 0)
from = 0; from = 0;
@ -435,12 +435,12 @@ int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const
} }
/*! /*!
\fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N]) \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
\internal \internal
*/ */
/*! /*!
\fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N]) \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
\since 5.9 \since 5.9
\relates QStaticByteArrayMatcher \relates QStaticByteArrayMatcher

View File

@ -99,15 +99,20 @@ class QStaticByteArrayMatcherBase
uchar data[256]; uchar data[256];
} m_skiptable; } m_skiptable;
protected: protected:
explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, uint n) noexcept explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, size_t n) noexcept
: m_skiptable(generate(pattern, n)) {} : m_skiptable(generate(pattern, n)) {}
// compiler-generated copy/more ctors/assignment operators are ok! // compiler-generated copy/more ctors/assignment operators are ok!
// compiler-generated dtor is ok! // compiler-generated dtor is ok!
#if QT_REMOVED_SINCE(6, 3) && QT_POINTER_SIZE != 4
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept; Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept;
#endif
Q_CORE_EXPORT qsizetype indexOfIn(const char *needle, size_t nlen,
const char *haystack, qsizetype hlen,
qsizetype from) const noexcept;
private: private:
static constexpr Skiptable generate(const char *pattern, uint n) noexcept static constexpr Skiptable generate(const char *pattern, size_t n) noexcept
{ {
const auto uchar_max = (std::numeric_limits<uchar>::max)(); const auto uchar_max = (std::numeric_limits<uchar>::max)();
uchar max = n > uchar_max ? uchar_max : uchar(n); uchar max = n > uchar_max ? uchar_max : uchar(n);
@ -143,7 +148,7 @@ private:
} }
}; };
template <uint N> template <size_t N>
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
{ {
char m_pattern[N]; char m_pattern[N];
@ -153,19 +158,19 @@ public:
explicit constexpr QStaticByteArrayMatcher(const char (&patternToMatch)[N]) noexcept explicit constexpr QStaticByteArrayMatcher(const char (&patternToMatch)[N]) noexcept
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern() : QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
{ {
for (uint i = 0; i < N; ++i) for (size_t i = 0; i < N; ++i)
m_pattern[i] = patternToMatch[i]; m_pattern[i] = patternToMatch[i];
} }
int indexIn(const QByteArray &haystack, int from = 0) const noexcept qsizetype indexIn(const QByteArray &haystack, qsizetype from = 0) const noexcept
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); } { return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
int indexIn(const char *haystack, int hlen, int from = 0) const noexcept qsizetype indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const noexcept
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); } { return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); } QByteArray pattern() const { return QByteArray(m_pattern, qsizetype(N - 1)); }
}; };
template <uint N> template <size_t N>
constexpr QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) noexcept constexpr QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) noexcept
{ return QStaticByteArrayMatcher<N>(pattern); } { return QStaticByteArrayMatcher<N>(pattern); }

View File

@ -31,6 +31,13 @@
#include <qbytearraymatcher.h> #include <qbytearraymatcher.h>
#include <numeric>
#include <string>
#if QT_CONFIG(cxx11_future)
# include <thread>
#endif
// COM interface // COM interface
#if defined(Q_OS_WIN) && defined(interface) #if defined(Q_OS_WIN) && defined(interface)
# undef interface # undef interface
@ -44,6 +51,7 @@ private slots:
void interface(); void interface();
void indexIn(); void indexIn();
void staticByteArrayMatcher(); void staticByteArrayMatcher();
void haystacksWithMoreThan4GiBWork();
}; };
void tst_QByteArrayMatcher::interface() void tst_QByteArrayMatcher::interface()
@ -208,6 +216,72 @@ void tst_QByteArrayMatcher::staticByteArrayMatcher()
} }
void tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork()
{
#if QT_POINTER_SIZE > 4
// use a large needle to trigger long skips in the Boyer-Moore algorithm
// (to speed up the test)
constexpr std::string_view needle = LONG_STRING_256;
//
// GIVEN: a haystack with more than 4 GiB of data
//
// don't use QByteArray because freeSpaceAtEnd() may break reserve()
// semantics and a realloc is the last thing we need here
std::string large;
QElapsedTimer timer;
timer.start();
constexpr size_t GiB = 1024 * 1024 * 1024;
constexpr size_t BaseSize = 4 * GiB + 1;
try {
large.reserve(BaseSize + needle.size());
large.resize(BaseSize, '\0');
large.append(needle);
} catch (const std::bad_alloc &) {
QSKIP("Could not allocate 4GiB plus a couple hundred bytes of RAM.");
}
QCOMPARE(large.size(), BaseSize + needle.size());
qDebug("created dataset in %lld ms", timer.elapsed());
# if QT_CONFIG(cxx11_future)
using MaybeThread = std::thread;
# else
struct MaybeThread {
std::function<void()> func;
void join() { func(); }
};
# endif
//
// WHEN: trying to match an occurrence past the 4GiB mark
//
qsizetype dynamicResult, staticResult;
auto t = MaybeThread{[&]{
QByteArrayMatcher m(needle);
dynamicResult = m.indexIn(large);
}};
{
static_assert(needle == LONG_STRING_256); // need a string literal in the following line:
QStaticByteArrayMatcher m(LONG_STRING_256);
staticResult = m.indexIn(large.data(), large.size());
}
t.join();
//
// THEN: the result index is not trucated
//
QCOMPARE(staticResult, qsizetype(BaseSize));
QCOMPARE(dynamicResult, qsizetype(BaseSize));
#else
QSKIP("This test is 64-bit only.");
#endif
}
#undef LONG_STRING_256 #undef LONG_STRING_256
#undef LONG_STRING_128 #undef LONG_STRING_128
#undef LONG_STRING__64 #undef LONG_STRING__64