QRegularExpression: coalesce consecutive * tokens in wildcards

When converting a wildcard into a regexp, convert a series of
consecutive '*' tokens in just one '.*' (instead of a series of '.*').
The pattern matched is the same, but we reduce the effects of a
possible catastrophic backtracking. I'm not actually sure whether
PCRE optimizes this case out of its own or it doesn't; Perl appears
not to.

Change-Id: Ia83336391593d56cf6d8332c96649a034a83a15b
Fixes: QTBUG-127672
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
(cherry picked from commit a041cd35214e57a189aaa1f1f77199ff42303f83)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
Giuseppe D'Angelo 2024-08-06 19:07:42 +02:00 committed by Qt Cherry-pick Bot
parent a903414898
commit 4c1a82e4cf
2 changed files with 27 additions and 0 deletions

View File

@ -1964,6 +1964,9 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
switch (c.unicode()) {
case '*':
rx += settings.starEscape;
// Coalesce sequences of *
while (i < wclen && wc[i] == u'*')
++i;
break;
case '?':
rx += settings.questionMarkEscape;

View File

@ -2486,6 +2486,30 @@ void tst_QRegularExpression::wildcard_data()
addRow("foo*bar", "foo\nbar", true, true);
addRow("foo*bar", "foo\r\nbar", true, true);
addRow("foo**********bar", "foo/fie/baz/bar", false, true);
addRow("foo**********bar", "foo bar bar test bar bar bar", true, true);
addRow("foo**********bar", "foo\tbar", true, true);
addRow("foo**********bar", "foo\nbar", true, true);
addRow("foo**********bar", "foo\r\nbar", true, true);
addRow("foo**********bar", "foo/fie/baz/baz", false, false);
addRow("foo**********bar", "foo bar bar test bar bar baz", false, false);
addRow("foo**********bar", "foo\tbaz", false, false);
addRow("foo**********bar", "foo\nbaz", false, false);
addRow("foo**********bar", "foo\r\nbaz", false, false);
addRow("foo*****x*****bar", "foo/fie/bax/bar", false, true);
addRow("foo*****x*****bar", "foo bar bax test bar bar bar", true, true);
addRow("foo*****x*****bar", "foo\tbar foo\tbax foo\tbar foo\tbar", true, true);
addRow("foo*****x*****bar", "foo\nx\nbar", true, true);
addRow("foo*****x*****bar", "foo\r\nxbar", true, true);
addRow("foo*****x*****bar", "foo/fie/baz/bar", false, false);
addRow("foo*****x*****bar", "foo bar baz test bar bar bar", false, false);
addRow("foo*****x*****bar", "foo\tbar foo\tbar foo\tbar foo\tbar", false, false);
addRow("foo*****x*****bar", "foo\nbar", false, false);
addRow("foo*****x*****bar", "foo\r\nbar", false, false);
// different anchor modes
addRow("foo", "afoob", false, false, true);
addRow("foo", "afoob", true, true, false);