diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 5f151ffbd67..0e3bcf914f0 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -10996,8 +10996,14 @@ qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re) QRegularExpressionMatch match = re.matchView(haystack, index + 1); if (!match.hasMatch()) break; - index = match.capturedStart(); count++; + + // Search again, from the next character after the beginning of this + // capture. If the capture starts with a surrogate pair, both together + // count as "one character". + index = match.capturedStart(); + if (index < len && haystack[index].isHighSurrogate()) + ++index; } return count; } diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index aec2d2897a4..701a33a2635 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -2041,6 +2041,21 @@ void tst_QString::count() QTest::ignoreMessage(QtWarningMsg, ignoreMessagePattern); QCOMPARE(emptyStr.count(QRegularExpression("invalid regex\\")), 0); #endif + + QString nonBmpString = u8"\U00010000\U00010000abc\U00010000"; + QCOMPARE(nonBmpString.count(u"\U00010000"), 3); +#if QT_CONFIG(regularexpression) + QCOMPARE(nonBmpString.count(QRegularExpression(u8"\U00010000")), 3); + QCOMPARE(nonBmpString.count(QRegularExpression(u8"\U00010000a?")), 3); + QCOMPARE(nonBmpString.count(QRegularExpression(u8"\U00010000a")), 1); + QCOMPARE(nonBmpString.count(QRegularExpression(".")), 6); + + // can't search for unpaired surrogates + QTest::ignoreMessage(QtWarningMsg, ignoreMessagePattern); + QCOMPARE(nonBmpString.count(QRegularExpression(QChar(0xd800))), 0); + QTest::ignoreMessage(QtWarningMsg, ignoreMessagePattern); + QCOMPARE(nonBmpString.count(QRegularExpression(QChar(0xdc00))), 0); +#endif } void tst_QString::contains()