From 4c1a82e4cf82ab5c5393f01207625d7793f65f9a Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Tue, 6 Aug 2024 19:07:42 +0200 Subject: [PATCH] QRegularExpression: coalesce consecutive * tokens in wildcards When converting a wildcard into a regexp, convert a series of consecutive '*' tokens in just one '.*' (instead of a series of '.*'). The pattern matched is the same, but we reduce the effects of a possible catastrophic backtracking. I'm not actually sure whether PCRE optimizes this case out of its own or it doesn't; Perl appears not to. Change-Id: Ia83336391593d56cf6d8332c96649a034a83a15b Fixes: QTBUG-127672 Reviewed-by: Thiago Macieira (cherry picked from commit a041cd35214e57a189aaa1f1f77199ff42303f83) Reviewed-by: Qt Cherry-pick Bot --- src/corelib/text/qregularexpression.cpp | 3 +++ .../tst_qregularexpression.cpp | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index 3e3bb5a884c..099c0cdfbbc 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -1964,6 +1964,9 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil switch (c.unicode()) { case '*': rx += settings.starEscape; + // Coalesce sequences of * + while (i < wclen && wc[i] == u'*') + ++i; break; case '?': rx += settings.questionMarkEscape; diff --git a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp index 53a6d83d502..e0294e96103 100644 --- a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp +++ b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp @@ -2486,6 +2486,30 @@ void tst_QRegularExpression::wildcard_data() addRow("foo*bar", "foo\nbar", true, true); addRow("foo*bar", "foo\r\nbar", true, true); + addRow("foo**********bar", "foo/fie/baz/bar", false, true); + addRow("foo**********bar", "foo bar bar test bar bar bar", true, true); + addRow("foo**********bar", "foo\tbar", true, true); + addRow("foo**********bar", "foo\nbar", true, true); + addRow("foo**********bar", "foo\r\nbar", true, true); + + addRow("foo**********bar", "foo/fie/baz/baz", false, false); + addRow("foo**********bar", "foo bar bar test bar bar baz", false, false); + addRow("foo**********bar", "foo\tbaz", false, false); + addRow("foo**********bar", "foo\nbaz", false, false); + addRow("foo**********bar", "foo\r\nbaz", false, false); + + addRow("foo*****x*****bar", "foo/fie/bax/bar", false, true); + addRow("foo*****x*****bar", "foo bar bax test bar bar bar", true, true); + addRow("foo*****x*****bar", "foo\tbar foo\tbax foo\tbar foo\tbar", true, true); + addRow("foo*****x*****bar", "foo\nx\nbar", true, true); + addRow("foo*****x*****bar", "foo\r\nxbar", true, true); + + addRow("foo*****x*****bar", "foo/fie/baz/bar", false, false); + addRow("foo*****x*****bar", "foo bar baz test bar bar bar", false, false); + addRow("foo*****x*****bar", "foo\tbar foo\tbar foo\tbar foo\tbar", false, false); + addRow("foo*****x*****bar", "foo\nbar", false, false); + addRow("foo*****x*****bar", "foo\r\nbar", false, false); + // different anchor modes addRow("foo", "afoob", false, false, true); addRow("foo", "afoob", true, true, false);