Add wildcard-to-regexp support to QRegularExpression

This method will make QRegularExpression on par with QRegExp and
will allow to replace this class when a wildcard expression can be
set through an API (e.g. QSortFilterProxyModel::setFilterWildcard).

For other use cases, see QTBUG-34052.

[ChangeLog][QRegularExpression] Implemented support for wildcard
patterns.
Warning: QRegularExpression might not give the exact same result
as QRegExp as its implementation follows strictly the glob patterns
definition for the wildcard expressions.

Change-Id: I5ed4617ca679159430c3d46da3449f6b3100e366
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@qt.io>
This commit is contained in:
Samuel Gaist 2017-11-10 16:48:50 +01:00
parent b1d71ef8ad
commit 6d0044f1dc
6 changed files with 274 additions and 1 deletions

View File

@ -798,6 +798,83 @@ Q_AUTOTEST_EXPORT unsigned int qt_qregularexpression_optimize_after_use_count =
static const unsigned int qt_qregularexpression_optimize_after_use_count = 10;
#endif // QT_BUILD_INTERNAL
namespace QtPrivate {
/*!
internal
*/
QString wildcardToRegularExpression(const QString &wildcardString)
{
const int wclen = wildcardString.length();
QString rx;
int i = 0;
bool hasNegativeBracket = false;
const QChar *wc = wildcardString.unicode();
while (i < wclen) {
const QChar c = wc[i++];
switch (c.unicode()) {
case '*':
rx += QLatin1String(".*");
break;
case '?':
rx += QLatin1Char('.');
break;
case '$':
case '(':
case ')':
case '+':
case '.':
case '^':
case '{':
case '|':
case '}':
rx += QLatin1Char('\\');
rx += c;
break;
case '[':
// Support for the [!abc] or [!a-c] syntax
// Implements a negative look-behind for one char.
if (wc[i] == QLatin1Char(']')) {
rx += c;
rx += wc[i++];
} else if (wc[i] == QLatin1Char('!')) {
rx += QLatin1String(".(?<");
rx += wc[i++];
rx += c;
hasNegativeBracket = true;
} else {
rx += c;
}
if (i < wclen) {
if (rx[i] == QLatin1Char(']'))
rx += wc[i++];
while (i < wclen && wc[i] != QLatin1Char(']')) {
if (wc[i] == QLatin1Char('\\'))
rx += QLatin1Char('\\');
rx += wc[i++];
}
}
break;
case ']':
rx += c;
// Closes the negative look-behind expression.
if (hasNegativeBracket) {
rx += QLatin1Char(')');
hasNegativeBracket = false;
}
break;
default:
rx += c;
break;
}
}
return rx;
}
}
/*!
\internal
*/
@ -1553,6 +1630,47 @@ void QRegularExpression::setPattern(const QString &pattern)
d->pattern = pattern;
}
/*!
\since 5.12
Sets the pattern string of the regular expression to \a wildcard pattern.
The pattern options are left unchanged.
\warning Unlike QRegExp, this implementation follows closely the definition
of wildcard for glob patterns:
\table
\row \li \b{c}
\li Any character represents itself apart from those mentioned
below. Thus \b{c} matches the character \e c.
\row \li \b{?}
\li Matches any single character. It is the same as
\b{.} in full regexps.
\row \li \b{*}
\li Matches zero or more of any characters. It is the
same as \b{.*} in full regexps.
\row \li \b{[abc]}
\li Matches one character given in the bracket.
\row \li \b{[a-c]}
\li Matches one character from the range given in the bracket.
\row \li \b{[!abc]}
\li Matches one character that is not given in the bracket.
\row \li \b{[!a-c]}
\li matches one character that is not from the range given in the
bracket.
\endtable
\note This function generates a regular expression that will act following
the wildcard pattern given. However the content of the regular expression
will not be the same as the one set.
\sa pattern(), setPattern()
*/
void QRegularExpression::setWildcardPattern(const QString &pattern)
{
setPattern(QtPrivate::wildcardToRegularExpression(pattern));
}
/*!
Returns the pattern options for the regular expression.

View File

@ -96,6 +96,7 @@ public:
QString pattern() const;
void setPattern(const QString &pattern);
void setWildcardPattern(const QString &pattern);
bool isValid() const;
int patternErrorOffset() const;

View File

@ -0,0 +1,70 @@
/****************************************************************************
**
** Copyright (C) 2018 Samuel Gaist <samuel.gaist@edeltech.ch>
** Copyright (C) 2018 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QREGULAREXPRESSION_P_H
#define QREGULAREXPRESSION_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <private/qglobal_p.h>
#include <qregularexpression.h>
#include <qstring.h>
QT_REQUIRE_CONFIG(regularexpression);
QT_BEGIN_NAMESPACE
namespace QtPrivate {
QString wildcardToRegularExpression(const QString &expression);
}
QT_END_NAMESPACE
#endif

View File

@ -181,7 +181,9 @@ qtConfig(datetimeparser) {
qtConfig(regularexpression) {
QMAKE_USE_PRIVATE += pcre2
HEADERS += tools/qregularexpression.h
HEADERS += \
tools/qregularexpression.h \
tools/qregularexpression_p.h
SOURCES += tools/qregularexpression.cpp
}

View File

@ -2066,3 +2066,80 @@ void tst_QRegularExpression::QStringAndQStringRefEquivalence()
}
}
}
void tst_QRegularExpression::wildcard_data()
{
QTest::addColumn<QString>("pattern");
QTest::addColumn<QString>("string");
QTest::addColumn<int>("foundIndex");
auto addRow = [](const char *pattern, const char *string, int foundIndex) {
QTest::addRow(pattern) << pattern << string << foundIndex;
};
addRow("*.html", "test.html", 0);
addRow("*.html", "test.htm", -1);
addRow("bar*", "foobarbaz", 3);
addRow("*", "Qt Rocks!", 0);
addRow(".html", "test.html", 4);
addRow(".h", "test.cpp", -1);
addRow(".???l", "test.html", 4);
addRow("?", "test.html", 0);
addRow("?m", "test.html", 6);
addRow(".h[a-z]ml", "test.html", 4);
addRow(".h[A-Z]ml", "test.html", -1);
addRow(".h[A-Z]ml", "test.hTml", 4);
addRow(".h[!A-Z]ml", "test.hTml", -1);
addRow(".h[!A-Z]ml", "test.html", 4);
addRow(".h[!T]ml", "test.hTml", -1);
addRow(".h[!T]ml", "test.html", 4);
addRow(".h[!T]m[!L]", "test.htmL", -1);
addRow(".h[!T]m[!L]", "test.html", 4);
addRow(".h[][!]", "test.h]ml", 4);
addRow(".h[][!]", "test.h[ml", 4);
addRow(".h[][!]", "test.h!ml", 4);
}
void tst_QRegularExpression::wildcard()
{
QFETCH(QString, pattern);
QFETCH(QString, string);
QFETCH(int, foundIndex);
QRegularExpression re;
re.setWildcardPattern(pattern);
if (forceOptimize)
re.optimize();
QRegularExpressionMatch match = re.match(string);
QCOMPARE(match.capturedStart(), foundIndex);
}
void tst_QRegularExpression::testInvalidWildcard_data()
{
QTest::addColumn<QString>("pattern");
QTest::addColumn<bool>("isValid");
QTest::newRow("valid []") << "[abc]" << true;
QTest::newRow("valid ending ]") << "abc]" << true;
QTest::newRow("invalid [") << "[abc" << false;
QTest::newRow("ending [") << "abc[" << false;
QTest::newRow("ending [^") << "abc[^" << false;
QTest::newRow("ending [\\") << "abc[\\" << false;
QTest::newRow("ending []") << "abc[]" << false;
QTest::newRow("ending [[") << "abc[[" << false;
}
void tst_QRegularExpression::testInvalidWildcard()
{
QFETCH(QString, pattern);
QRegularExpression re;
re.setWildcardPattern(pattern);
if (forceOptimize)
re.optimize();
QFETCH(bool, isValid);
QCOMPARE(re.isValid(), isValid);
}

View File

@ -69,6 +69,11 @@ private slots:
void JOptionUsage();
void QStringAndQStringRefEquivalence();
void wildcard_data();
void wildcard();
void testInvalidWildcard_data();
void testInvalidWildcard();
private:
void provideRegularExpressions();
};