QStaticByteArrayMatcher: fix searching in 2+GiB haystacks
Add a test (same techniques as for the 4+GiB check in tst_qcryptographichash). Takes ~1s to build the 4GiB test data here, and skips when RAM is too low: $ qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork [...] QDEBUG : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() created dataset in 891 ms [...] $ (ulimit -v 2000000; qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork) ********* Start testing of tst_QByteArrayMatcher ********* [...] SKIP : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() Could not allocate 4GiB plus a couple hundred bytes of RAM. Loc: [/home/marc/Qt/qt5/qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp(242)] [...] Found during 6.3 API review. [ChangeLog][QtCore][QStaticByteArrayMatcher] Fixed searching in strings with size > 2GiB (on 64-bit platforms). Fixes: QTBUG-100118 Pick-to: 6.3 Change-Id: I1df420965673b5555fef2b75e785954cc50b654f Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
3ec587666f
commit
3d3558dc8f
@ -62,6 +62,19 @@ int QMetaType::id() const
|
|||||||
|
|
||||||
#if QT_REMOVED_SINCE(6, 3)
|
#if QT_REMOVED_SINCE(6, 3)
|
||||||
|
|
||||||
|
#include "qbytearraymatcher.h"
|
||||||
|
|
||||||
|
# if QT_POINTER_SIZE != 4
|
||||||
|
|
||||||
|
int QStaticByteArrayMatcherBase::indexOfIn(const char *h, uint hl, const char *n, int nl, int from) const noexcept
|
||||||
|
{
|
||||||
|
qsizetype r = indexOfIn(h, size_t(hl), n, qsizetype(nl), qsizetype(from));
|
||||||
|
Q_ASSERT(r == int(r));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
# endif // QT_POINTER_SIZE != 4
|
||||||
|
|
||||||
#include "tools/qcryptographichash.h"
|
#include "tools/qcryptographichash.h"
|
||||||
|
|
||||||
void QCryptographicHash::addData(const QByteArray &data)
|
void QCryptographicHash::addData(const QByteArray &data)
|
||||||
|
@ -395,7 +395,7 @@ qsizetype qFindByteArray(
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const
|
\fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
|
||||||
|
|
||||||
Searches the char string \a haystack, which has length \a hlen, from
|
Searches the char string \a haystack, which has length \a hlen, from
|
||||||
byte position \a from (default 0, i.e. from the first byte), for
|
byte position \a from (default 0, i.e. from the first byte), for
|
||||||
@ -405,7 +405,7 @@ qsizetype qFindByteArray(
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const
|
\fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
|
||||||
|
|
||||||
Searches the char string \a haystack, from byte position \a from
|
Searches the char string \a haystack, from byte position \a from
|
||||||
(default 0, i.e. from the first byte), for the byte array pattern()
|
(default 0, i.e. from the first byte), for the byte array pattern()
|
||||||
@ -415,7 +415,7 @@ qsizetype qFindByteArray(
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
|
\fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
|
||||||
|
|
||||||
Returns the byte array pattern that this byte array matcher will
|
Returns the byte array pattern that this byte array matcher will
|
||||||
search for.
|
search for.
|
||||||
@ -426,7 +426,7 @@ qsizetype qFindByteArray(
|
|||||||
/*!
|
/*!
|
||||||
\internal
|
\internal
|
||||||
*/
|
*/
|
||||||
int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept
|
qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
|
||||||
{
|
{
|
||||||
if (from < 0)
|
if (from < 0)
|
||||||
from = 0;
|
from = 0;
|
||||||
@ -435,12 +435,12 @@ int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
|
\fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
|
||||||
\internal
|
\internal
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
|
\fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
|
||||||
\since 5.9
|
\since 5.9
|
||||||
\relates QStaticByteArrayMatcher
|
\relates QStaticByteArrayMatcher
|
||||||
|
|
||||||
|
@ -99,15 +99,20 @@ class QStaticByteArrayMatcherBase
|
|||||||
uchar data[256];
|
uchar data[256];
|
||||||
} m_skiptable;
|
} m_skiptable;
|
||||||
protected:
|
protected:
|
||||||
explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, uint n) noexcept
|
explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, size_t n) noexcept
|
||||||
: m_skiptable(generate(pattern, n)) {}
|
: m_skiptable(generate(pattern, n)) {}
|
||||||
// compiler-generated copy/more ctors/assignment operators are ok!
|
// compiler-generated copy/more ctors/assignment operators are ok!
|
||||||
// compiler-generated dtor is ok!
|
// compiler-generated dtor is ok!
|
||||||
|
|
||||||
|
#if QT_REMOVED_SINCE(6, 3) && QT_POINTER_SIZE != 4
|
||||||
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept;
|
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept;
|
||||||
|
#endif
|
||||||
|
Q_CORE_EXPORT qsizetype indexOfIn(const char *needle, size_t nlen,
|
||||||
|
const char *haystack, qsizetype hlen,
|
||||||
|
qsizetype from) const noexcept;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr Skiptable generate(const char *pattern, uint n) noexcept
|
static constexpr Skiptable generate(const char *pattern, size_t n) noexcept
|
||||||
{
|
{
|
||||||
const auto uchar_max = (std::numeric_limits<uchar>::max)();
|
const auto uchar_max = (std::numeric_limits<uchar>::max)();
|
||||||
uchar max = n > uchar_max ? uchar_max : uchar(n);
|
uchar max = n > uchar_max ? uchar_max : uchar(n);
|
||||||
@ -143,7 +148,7 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <uint N>
|
template <size_t N>
|
||||||
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
|
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
|
||||||
{
|
{
|
||||||
char m_pattern[N];
|
char m_pattern[N];
|
||||||
@ -153,19 +158,19 @@ public:
|
|||||||
explicit constexpr QStaticByteArrayMatcher(const char (&patternToMatch)[N]) noexcept
|
explicit constexpr QStaticByteArrayMatcher(const char (&patternToMatch)[N]) noexcept
|
||||||
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
|
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
|
||||||
{
|
{
|
||||||
for (uint i = 0; i < N; ++i)
|
for (size_t i = 0; i < N; ++i)
|
||||||
m_pattern[i] = patternToMatch[i];
|
m_pattern[i] = patternToMatch[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
int indexIn(const QByteArray &haystack, int from = 0) const noexcept
|
qsizetype indexIn(const QByteArray &haystack, qsizetype from = 0) const noexcept
|
||||||
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
|
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
|
||||||
int indexIn(const char *haystack, int hlen, int from = 0) const noexcept
|
qsizetype indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const noexcept
|
||||||
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
|
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
|
||||||
|
|
||||||
QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); }
|
QByteArray pattern() const { return QByteArray(m_pattern, qsizetype(N - 1)); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <uint N>
|
template <size_t N>
|
||||||
constexpr QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) noexcept
|
constexpr QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) noexcept
|
||||||
{ return QStaticByteArrayMatcher<N>(pattern); }
|
{ return QStaticByteArrayMatcher<N>(pattern); }
|
||||||
|
|
||||||
|
@ -31,6 +31,13 @@
|
|||||||
|
|
||||||
#include <qbytearraymatcher.h>
|
#include <qbytearraymatcher.h>
|
||||||
|
|
||||||
|
#include <numeric>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#if QT_CONFIG(cxx11_future)
|
||||||
|
# include <thread>
|
||||||
|
#endif
|
||||||
|
|
||||||
// COM interface
|
// COM interface
|
||||||
#if defined(Q_OS_WIN) && defined(interface)
|
#if defined(Q_OS_WIN) && defined(interface)
|
||||||
# undef interface
|
# undef interface
|
||||||
@ -44,6 +51,7 @@ private slots:
|
|||||||
void interface();
|
void interface();
|
||||||
void indexIn();
|
void indexIn();
|
||||||
void staticByteArrayMatcher();
|
void staticByteArrayMatcher();
|
||||||
|
void haystacksWithMoreThan4GiBWork();
|
||||||
};
|
};
|
||||||
|
|
||||||
void tst_QByteArrayMatcher::interface()
|
void tst_QByteArrayMatcher::interface()
|
||||||
@ -208,6 +216,72 @@ void tst_QByteArrayMatcher::staticByteArrayMatcher()
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork()
|
||||||
|
{
|
||||||
|
#if QT_POINTER_SIZE > 4
|
||||||
|
// use a large needle to trigger long skips in the Boyer-Moore algorithm
|
||||||
|
// (to speed up the test)
|
||||||
|
constexpr std::string_view needle = LONG_STRING_256;
|
||||||
|
|
||||||
|
//
|
||||||
|
// GIVEN: a haystack with more than 4 GiB of data
|
||||||
|
//
|
||||||
|
|
||||||
|
// don't use QByteArray because freeSpaceAtEnd() may break reserve()
|
||||||
|
// semantics and a realloc is the last thing we need here
|
||||||
|
std::string large;
|
||||||
|
QElapsedTimer timer;
|
||||||
|
timer.start();
|
||||||
|
constexpr size_t GiB = 1024 * 1024 * 1024;
|
||||||
|
constexpr size_t BaseSize = 4 * GiB + 1;
|
||||||
|
try {
|
||||||
|
large.reserve(BaseSize + needle.size());
|
||||||
|
large.resize(BaseSize, '\0');
|
||||||
|
large.append(needle);
|
||||||
|
} catch (const std::bad_alloc &) {
|
||||||
|
QSKIP("Could not allocate 4GiB plus a couple hundred bytes of RAM.");
|
||||||
|
}
|
||||||
|
QCOMPARE(large.size(), BaseSize + needle.size());
|
||||||
|
qDebug("created dataset in %lld ms", timer.elapsed());
|
||||||
|
|
||||||
|
# if QT_CONFIG(cxx11_future)
|
||||||
|
using MaybeThread = std::thread;
|
||||||
|
# else
|
||||||
|
struct MaybeThread {
|
||||||
|
std::function<void()> func;
|
||||||
|
void join() { func(); }
|
||||||
|
};
|
||||||
|
# endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// WHEN: trying to match an occurrence past the 4GiB mark
|
||||||
|
//
|
||||||
|
|
||||||
|
qsizetype dynamicResult, staticResult;
|
||||||
|
|
||||||
|
auto t = MaybeThread{[&]{
|
||||||
|
QByteArrayMatcher m(needle);
|
||||||
|
dynamicResult = m.indexIn(large);
|
||||||
|
}};
|
||||||
|
{
|
||||||
|
static_assert(needle == LONG_STRING_256); // need a string literal in the following line:
|
||||||
|
QStaticByteArrayMatcher m(LONG_STRING_256);
|
||||||
|
staticResult = m.indexIn(large.data(), large.size());
|
||||||
|
}
|
||||||
|
t.join();
|
||||||
|
|
||||||
|
//
|
||||||
|
// THEN: the result index is not trucated
|
||||||
|
//
|
||||||
|
|
||||||
|
QCOMPARE(staticResult, qsizetype(BaseSize));
|
||||||
|
QCOMPARE(dynamicResult, qsizetype(BaseSize));
|
||||||
|
#else
|
||||||
|
QSKIP("This test is 64-bit only.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
#undef LONG_STRING_256
|
#undef LONG_STRING_256
|
||||||
#undef LONG_STRING_128
|
#undef LONG_STRING_128
|
||||||
#undef LONG_STRING__64
|
#undef LONG_STRING__64
|
||||||
|
Loading…
x
Reference in New Issue
Block a user