From a98715c127fb569c21a0402d3c0e76ca0bb75c0d Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sun, 19 May 2024 16:43:24 -0500 Subject: [PATCH] QByteArrayView: use memmem() to search for substrings Let's just use whatever your C library provides, on the assumption that it is optimized. Because it does the memchr() call when the size of the needle is 1, we also skip the 1-char findByteArray() call. It's been available on Linux since glibc 2.0, FreeBSD since 6.0, OpenBSD 5.4, NetBSD, Apple OSes, etc. even Solaris. If your OS doesn't have it, you should ask your vendor to add it or consider upgrading to an OS that already has it. The glibc implementation[1] also uses a hashing for short needles and the Two Way string search algorithm it describes as "a bad character shift table similar to the Boyer-Moore algorithm" for longer ones. The FreeBSD implementation[2] (which its man page says came from MUSL) uses 1-, 2-, 3-, and 4-byte search specializations before using the Two Way search algorithm too. [1] https://codebrowser.dev/glibc/glibc/string/memmem.c.html [2] https://github.com/freebsd/freebsd-src/blob/main/lib/libc/string/memmem.c Change-Id: If05cb740b64f42eba21efffd17d101e24528f7fd Reviewed-by: Ahmad Samir --- src/corelib/configure.cmake | 20 ++++++++++++++++++++ src/corelib/global/qconfig-bootstrapped.h | 1 + src/corelib/text/qbytearraymatcher.cpp | 12 ++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/corelib/configure.cmake b/src/corelib/configure.cmake index 7c07aa1c88a..b34086c8ada 100644 --- a/src/corelib/configure.cmake +++ b/src/corelib/configure.cmake @@ -324,6 +324,22 @@ linkat(AT_FDCWD, \"foo\", AT_FDCWD, \"bar\", AT_SYMLINK_FOLLOW); } ") +# memmem +qt_config_compile_test(memmem + LABEL "memmem()" + CODE +#define _APPLE_SAUCE 1 /* Apple doesn't require anything */ +"#define _BSD_SOURCE 1 /* For FreeBSD */ +#define _GNU_SOURCE 1 /* For glibc, Bionic */ +#include + +int main(void) +{ + const void *r = memmem(\"abc\", 3, \"bc\", 2); + (void)r; + return 0; +}") + # memrchr qt_config_compile_test(memrchr LABEL "memrchr()" @@ -569,6 +585,10 @@ qt_feature("std-atomic64" PUBLIC LABEL "64 bit atomic operations" CONDITION WrapAtomic_FOUND ) +qt_feature("memmem" PRIVATE + LABEL "C library function memmem()" + CONDITION TEST_memmem +) qt_feature("memrchr" PRIVATE LABEL "C library function memrchr()" CONDITION TEST_memrchr diff --git a/src/corelib/global/qconfig-bootstrapped.h b/src/corelib/global/qconfig-bootstrapped.h index 0596b9d9f4c..c2fe4661f6c 100644 --- a/src/corelib/global/qconfig-bootstrapped.h +++ b/src/corelib/global/qconfig-bootstrapped.h @@ -72,6 +72,7 @@ # define QT_FEATURE_linkat -1 #endif #define QT_FEATURE_lttng -1 +#define QT_FEATURE_memmem -1 #define QT_FEATURE_memrchr -1 #define QT_NO_QOBJECT #define QT_FEATURE_process -1 diff --git a/src/corelib/text/qbytearraymatcher.cpp b/src/corelib/text/qbytearraymatcher.cpp index a332f035efa..9f27e10f3d5 100644 --- a/src/corelib/text/qbytearraymatcher.cpp +++ b/src/corelib/text/qbytearraymatcher.cpp @@ -3,6 +3,11 @@ #include "qbytearraymatcher.h" +#include +#ifndef QT_BOOTSTRAPPED +# include +#endif + #include QT_BEGIN_NAMESPACE @@ -238,8 +243,10 @@ qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByt const auto haystack0 = haystack.data(); const auto l = haystack.size(); const auto sl = needle.size(); +#if !QT_CONFIG(memmem) if (sl == 1) return findByteArray(haystack, from, needle.front()); +#endif if (from < 0) from += l; @@ -250,6 +257,11 @@ qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByt if (!l) return -1; +#if QT_CONFIG(memmem) + auto where = memmem(haystack0 + from, l - from, needle.data(), sl); + return where ? static_cast(where) - haystack0 : -1; +#endif + /* We use the Boyer-Moore algorithm in cases where the overhead for the skip table should pay off, otherwise we use a simple