QStaticByteArrayMatcher: fix searching in 2+GiB haystacks
Add a test (same techniques as for the 4+GiB check in tst_qcryptographichash). Takes ~1s to build the 4GiB test data here, and skips when RAM is too low: $ qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork [...] QDEBUG : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() created dataset in 891 ms [...] $ (ulimit -v 2000000; qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork) ********* Start testing of tst_QByteArrayMatcher ********* [...] SKIP : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() Could not allocate 4GiB plus a couple hundred bytes of RAM. Loc: [/home/marc/Qt/qt5/qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp(242)] [...] Found during 6.3 API review. [ChangeLog][QtCore][QStaticByteArrayMatcher] Fixed searching in strings with size > 2GiB (on 64-bit platforms). Fixes: QTBUG-100118 Pick-to: 6.3 Change-Id: I1df420965673b5555fef2b75e785954cc50b654f Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
3ec587666f
commit
3d3558dc8f
@ -62,6 +62,19 @@ int QMetaType::id() const
|
||||
|
||||
#if QT_REMOVED_SINCE(6, 3)
|
||||
|
||||
#include "qbytearraymatcher.h"
|
||||
|
||||
# if QT_POINTER_SIZE != 4
|
||||
|
||||
int QStaticByteArrayMatcherBase::indexOfIn(const char *h, uint hl, const char *n, int nl, int from) const noexcept
|
||||
{
|
||||
qsizetype r = indexOfIn(h, size_t(hl), n, qsizetype(nl), qsizetype(from));
|
||||
Q_ASSERT(r == int(r));
|
||||
return r;
|
||||
}
|
||||
|
||||
# endif // QT_POINTER_SIZE != 4
|
||||
|
||||
#include "tools/qcryptographichash.h"
|
||||
|
||||
void QCryptographicHash::addData(const QByteArray &data)
|
||||
|
@ -395,7 +395,7 @@ qsizetype qFindByteArray(
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const
|
||||
\fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
|
||||
|
||||
Searches the char string \a haystack, which has length \a hlen, from
|
||||
byte position \a from (default 0, i.e. from the first byte), for
|
||||
@ -405,7 +405,7 @@ qsizetype qFindByteArray(
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const
|
||||
\fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
|
||||
|
||||
Searches the char string \a haystack, from byte position \a from
|
||||
(default 0, i.e. from the first byte), for the byte array pattern()
|
||||
@ -415,7 +415,7 @@ qsizetype qFindByteArray(
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
|
||||
\fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
|
||||
|
||||
Returns the byte array pattern that this byte array matcher will
|
||||
search for.
|
||||
@ -426,7 +426,7 @@ qsizetype qFindByteArray(
|
||||
/*!
|
||||
\internal
|
||||
*/
|
||||
int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept
|
||||
qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
|
||||
{
|
||||
if (from < 0)
|
||||
from = 0;
|
||||
@ -435,12 +435,12 @@ int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const
|
||||
}
|
||||
|
||||
/*!
|
||||
\fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
|
||||
\fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
|
||||
\internal
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
|
||||
\fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
|
||||
\since 5.9
|
||||
\relates QStaticByteArrayMatcher
|
||||
|
||||
|
@ -99,15 +99,20 @@ class QStaticByteArrayMatcherBase
|
||||
uchar data[256];
|
||||
} m_skiptable;
|
||||
protected:
|
||||
explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, uint n) noexcept
|
||||
explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, size_t n) noexcept
|
||||
: m_skiptable(generate(pattern, n)) {}
|
||||
// compiler-generated copy/more ctors/assignment operators are ok!
|
||||
// compiler-generated dtor is ok!
|
||||
|
||||
#if QT_REMOVED_SINCE(6, 3) && QT_POINTER_SIZE != 4
|
||||
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept;
|
||||
#endif
|
||||
Q_CORE_EXPORT qsizetype indexOfIn(const char *needle, size_t nlen,
|
||||
const char *haystack, qsizetype hlen,
|
||||
qsizetype from) const noexcept;
|
||||
|
||||
private:
|
||||
static constexpr Skiptable generate(const char *pattern, uint n) noexcept
|
||||
static constexpr Skiptable generate(const char *pattern, size_t n) noexcept
|
||||
{
|
||||
const auto uchar_max = (std::numeric_limits<uchar>::max)();
|
||||
uchar max = n > uchar_max ? uchar_max : uchar(n);
|
||||
@ -143,7 +148,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
template <uint N>
|
||||
template <size_t N>
|
||||
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
|
||||
{
|
||||
char m_pattern[N];
|
||||
@ -153,19 +158,19 @@ public:
|
||||
explicit constexpr QStaticByteArrayMatcher(const char (&patternToMatch)[N]) noexcept
|
||||
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
|
||||
{
|
||||
for (uint i = 0; i < N; ++i)
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
m_pattern[i] = patternToMatch[i];
|
||||
}
|
||||
|
||||
int indexIn(const QByteArray &haystack, int from = 0) const noexcept
|
||||
qsizetype indexIn(const QByteArray &haystack, qsizetype from = 0) const noexcept
|
||||
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
|
||||
int indexIn(const char *haystack, int hlen, int from = 0) const noexcept
|
||||
qsizetype indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const noexcept
|
||||
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
|
||||
|
||||
QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); }
|
||||
QByteArray pattern() const { return QByteArray(m_pattern, qsizetype(N - 1)); }
|
||||
};
|
||||
|
||||
template <uint N>
|
||||
template <size_t N>
|
||||
constexpr QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) noexcept
|
||||
{ return QStaticByteArrayMatcher<N>(pattern); }
|
||||
|
||||
|
@ -31,6 +31,13 @@
|
||||
|
||||
#include <qbytearraymatcher.h>
|
||||
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
|
||||
#if QT_CONFIG(cxx11_future)
|
||||
# include <thread>
|
||||
#endif
|
||||
|
||||
// COM interface
|
||||
#if defined(Q_OS_WIN) && defined(interface)
|
||||
# undef interface
|
||||
@ -44,6 +51,7 @@ private slots:
|
||||
void interface();
|
||||
void indexIn();
|
||||
void staticByteArrayMatcher();
|
||||
void haystacksWithMoreThan4GiBWork();
|
||||
};
|
||||
|
||||
void tst_QByteArrayMatcher::interface()
|
||||
@ -208,6 +216,72 @@ void tst_QByteArrayMatcher::staticByteArrayMatcher()
|
||||
|
||||
}
|
||||
|
||||
void tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork()
|
||||
{
|
||||
#if QT_POINTER_SIZE > 4
|
||||
// use a large needle to trigger long skips in the Boyer-Moore algorithm
|
||||
// (to speed up the test)
|
||||
constexpr std::string_view needle = LONG_STRING_256;
|
||||
|
||||
//
|
||||
// GIVEN: a haystack with more than 4 GiB of data
|
||||
//
|
||||
|
||||
// don't use QByteArray because freeSpaceAtEnd() may break reserve()
|
||||
// semantics and a realloc is the last thing we need here
|
||||
std::string large;
|
||||
QElapsedTimer timer;
|
||||
timer.start();
|
||||
constexpr size_t GiB = 1024 * 1024 * 1024;
|
||||
constexpr size_t BaseSize = 4 * GiB + 1;
|
||||
try {
|
||||
large.reserve(BaseSize + needle.size());
|
||||
large.resize(BaseSize, '\0');
|
||||
large.append(needle);
|
||||
} catch (const std::bad_alloc &) {
|
||||
QSKIP("Could not allocate 4GiB plus a couple hundred bytes of RAM.");
|
||||
}
|
||||
QCOMPARE(large.size(), BaseSize + needle.size());
|
||||
qDebug("created dataset in %lld ms", timer.elapsed());
|
||||
|
||||
# if QT_CONFIG(cxx11_future)
|
||||
using MaybeThread = std::thread;
|
||||
# else
|
||||
struct MaybeThread {
|
||||
std::function<void()> func;
|
||||
void join() { func(); }
|
||||
};
|
||||
# endif
|
||||
|
||||
//
|
||||
// WHEN: trying to match an occurrence past the 4GiB mark
|
||||
//
|
||||
|
||||
qsizetype dynamicResult, staticResult;
|
||||
|
||||
auto t = MaybeThread{[&]{
|
||||
QByteArrayMatcher m(needle);
|
||||
dynamicResult = m.indexIn(large);
|
||||
}};
|
||||
{
|
||||
static_assert(needle == LONG_STRING_256); // need a string literal in the following line:
|
||||
QStaticByteArrayMatcher m(LONG_STRING_256);
|
||||
staticResult = m.indexIn(large.data(), large.size());
|
||||
}
|
||||
t.join();
|
||||
|
||||
//
|
||||
// THEN: the result index is not trucated
|
||||
//
|
||||
|
||||
QCOMPARE(staticResult, qsizetype(BaseSize));
|
||||
QCOMPARE(dynamicResult, qsizetype(BaseSize));
|
||||
#else
|
||||
QSKIP("This test is 64-bit only.");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#undef LONG_STRING_256
|
||||
#undef LONG_STRING_128
|
||||
#undef LONG_STRING__64
|
||||
|
Loading…
x
Reference in New Issue
Block a user