Replace qt_normalizePathSegments algorithm with QUrl's removeDotsFromPath

Commit 4b1547adc9b195e6acc90471fc48dec7ee0c429d rewrote the latter for
simplicity and performance, so copy it over with intent to merging the
two together. In this copy, however, we keep the behavior introduced by
commit eaf4438b3511c8380b9b691b656a87a60e342e2, which made the
normalization match the behavior found in browsers, but didn't apply to
QUrl::resolved (that's the next commit)

Task-number: QTBUG-120396
Change-Id: I2670423f6c6a5339771ffffdf15ebad46fd25cb4
Reviewed-by: David Faure <david.faure@kdab.com>
(cherry picked from commit 78e2d24752d8b6652aac89d1e79f245bc88281c6)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
(cherry picked from commit 7359d63209f56c8cb7c747307c0726d70e592927)
This commit is contained in:
Thiago Macieira 2024-09-03 15:48:41 -05:00 committed by Qt Cherry-pick Bot
parent 804b099674
commit 492c646735
3 changed files with 155 additions and 145 deletions

View File

@ -2199,168 +2199,173 @@ bool QDir::match(const QString &filter, const QString &fileName)
/*!
\internal
Returns \a path with redundant directory separators removed,
and "."s and ".."s resolved (as far as possible).
Updates \a path with redundant directory separators removed, and "."s and
".."s resolved (as far as possible). It returns \c false if there were ".."
segments left over, attempt to go up past the root (only applies to
absolute paths), or \c true otherwise.
This method is shared with QUrl, so it doesn't deal with QDir::separator(),
nor does it remove the trailing slash, if any.
When dealing with URLs, we are following section 5.2.4 (Remove dot
segments) from http://www.ietf.org/rfc/rfc3986.txt. URL mode differs from
from local path mode in these ways:
1) it can set *path to empty ("." becomes "")
2) directory path outputs end in / ("a/.." becomes "a/" instead of "a")
3) a sequence of "//" is treated as multiple path levels ("a/b//.." becomes
"a/b/" and "a/b//../.." becomes "a/"), which matches the behavior
observed in web browsers.
*/
QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok)
bool qt_normalizePathSegments(QString *path, QDirPrivate::PathNormalizations flags)
{
const bool allowUncPaths = flags.testAnyFlag(QDirPrivate::AllowUncPaths);
const bool isRemote = flags.testAnyFlag(QDirPrivate::RemotePath);
const qsizetype len = name.size();
const qsizetype prefixLength = rootLength(*path, allowUncPaths);
if (ok)
*ok = false;
// RFC 3986 says: "The input buffer is initialized with the now-appended
// path components and the output buffer is initialized to the empty
// string."
const QChar *in = path->constBegin();
if (len == 0)
return name;
qsizetype i = len - 1;
QVarLengthArray<char16_t> outVector(len);
qsizetype used = len;
char16_t *out = outVector.data();
const char16_t *p = reinterpret_cast<const char16_t *>(name.data());
const char16_t *prefix = p;
qsizetype up = 0;
const qsizetype prefixLength = rootLength(name, allowUncPaths);
p += prefixLength;
i -= prefixLength;
// replicate trailing slash (i > 0 checks for emptiness of input string p)
// except for remote paths because there can be /../ or /./ ending
if (i > 0 && p[i] == '/' && !isRemote) {
out[--used] = '/';
--i;
// Scan the input for a "." or ".." segment. If there isn't any, we may not
// need to modify this path at all. Also scan for "//" segments, which
// will be normalized if the path is local.
qsizetype i = prefixLength;
qsizetype n = path->size();
for (bool lastWasSlash = true; i < n; ++i) {
if (lastWasSlash && in[i] == u'.') {
if (i + 1 == n || in[i + 1] == u'/')
break;
if (in[i + 1] == u'.' && (i + 2 == n || in[i + 2] == u'/'))
break;
}
if (!isRemote && lastWasSlash && in[i] == u'/' && i > 0) {
// backtrack one, so the algorithm below gobbles up the remaining
// slashes
--i;
break;
}
lastWasSlash = in[i] == u'/';
}
if (i == n)
return true;
auto isDot = [](const char16_t *p, qsizetype i) {
return i > 1 && p[i - 1] == '.' && p[i - 2] == '/';
};
auto isDotDot = [](const char16_t *p, qsizetype i) {
return i > 2 && p[i - 1] == '.' && p[i - 2] == '.' && p[i - 3] == '/';
};
QChar *out = path->data(); // detaches
const QChar *start = out + prefixLength;
const QChar *end = out + path->size();
out += i;
in = out;
while (i >= 0) {
// copy trailing slashes for remote urls
if (p[i] == '/') {
if (isRemote && !up) {
if (isDot(p, i)) {
i -= 2;
// We implement a modified algorithm compared to RFC 3986, for efficiency.
bool ok = true;
do {
#if 0 // to see in the debugger
QString output = QStringView(path->constBegin(), out).toString();
QStringView input(in, end);
#endif
// First, copy the preceding slashes, so we can look at the segment's
// content. If the path is part of a URL, we copy all slashes, otherwise
// just one.
if (in[0] == u'/') {
*out++ = *in++;
while (in < end && in[0] == u'/') {
if (isRemote)
*out++ = *in++;
else
++in;
// Note: we may exit this loop with in == end, in which case we
// *shouldn't* dereference *in. But since we are pointing to a
// detached, non-empty QString, we know there's a u'\0' at the
// end, so dereferencing is safe.
}
}
// Is this path segment either "." or ".."?
enum { Nothing, Dot, DotDot } type = Nothing;
if (in[0] == u'.') {
if (in + 1 == end || in[1] == u'/')
type = Dot;
else if (in[1] == u'.' && (in + 2 == end || in[2] == u'/'))
type = DotDot;
}
if (type == Nothing) {
// If it is neither, then we copy this segment.
while (in < end && in[0] != u'/')
*out++ = *in++;
continue;
}
// Otherwise, we skip it and remove preceding slashes (if
// any, exactly one if part of a URL, all otherwise) from the
// output. If it is "..", we remove the segment before that and
// preceding slashes too in a similar fashion, if they are there.
if (type == DotDot) {
if (Q_UNLIKELY(out == start)) {
// we can't go further up from here, so we "re-root"
// without cleaning this segment
ok = false;
if (!isRemote) {
*out++ = u'.';
*out++ = u'.';
if (in + 2 != end) {
Q_ASSERT(in[2] == u'/');
*out++ = u'/';
++in;
}
start = out;
in += 2;
continue;
}
out[--used] = p[i];
}
--i;
continue;
while (out > start && *--out != u'/')
;
while (!isRemote && out > start && out[-1] == u'/')
--out;
while (out > start && out[-1] != u'/')
--out;
in += 2; // the two dots
} else {
++in; // the one dot
}
// remove current directory
if (p[i] == '.' && (i == 0 || p[i-1] == '/')) {
--i;
continue;
if (out > start) {
// backtrack one or all the slashes (so "/tmp///" -> "/tmp/")
if (out[-1] == u'/' && in != end)
--out;
while (!isRemote && out > start && out[-1] == u'/')
--out;
}
// detect up dir
if (i >= 1 && p[i] == '.' && p[i-1] == '.' && (i < 2 || p[i - 2] == '/')) {
++up;
i -= i >= 2 ? 3 : 2;
if (isRemote) {
// moving up should consider empty path segments too (/path//../ -> /path/)
while (i > 0 && up && p[i] == '/') {
--up;
--i;
}
}
continue;
if (out == start) {
// We've reached the root. Make sure we don't turn a relative path
// to absolute or, in the case of local paths that are already
// absolute, into UNC.
// Note: this will turn ".//a" into "a" even for URLs!
if (in != end && in[0] == u'/')
++in;
while (prefixLength == 0 && in != end && in[0] == u'/')
++in;
}
} while (in < end);
// prepend a slash before copying when not empty
if (!up && used != len && out[used] != '/')
out[--used] = '/';
path->truncate(out - path->constBegin());
if (!isRemote && path->isEmpty())
*path = u"."_s;
// skip or copy
while (i >= 0) {
if (p[i] == '/') {
// copy all slashes as is for remote urls if they are not part of /./ or /../
if (isRemote && !up) {
while (i > 0 && p[i] == '/' && !isDotDot(p, i)) {
// we return false only if the path was absolute
return ok || prefixLength == 0;
}
if (isDot(p, i)) {
i -= 2;
continue;
}
out[--used] = p[i];
--i;
}
// in case of /./, jump over
if (isDot(p, i))
i -= 2;
break;
}
--i;
break;
}
// actual copy
if (!up)
out[--used] = p[i];
--i;
}
// decrement up after copying/skipping
if (up)
--up;
}
// Indicate failure when ".." are left over for an absolute path.
QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok)
{
// temporary compat
QString copy = name;
bool r = qt_normalizePathSegments(&copy, flags);
if (ok)
*ok = prefixLength == 0 || up == 0;
// add remaining '..'
while (up && !isRemote) {
if (used != len && out[used] != '/') // is not empty and there isn't already a '/'
out[--used] = '/';
out[--used] = '.';
out[--used] = '.';
--up;
}
bool isEmpty = used == len;
if (prefixLength) {
if (!isEmpty && out[used] == '/') {
// Even though there is a prefix the out string is a slash. This happens, if the input
// string only consists of a prefix followed by one or more slashes. Just skip the slash.
++used;
}
for (qsizetype i = prefixLength - 1; i >= 0; --i)
out[--used] = prefix[i];
} else {
if (isEmpty) {
// After resolving the input path, the resulting string is empty (e.g. "foo/.."). Return
// a dot in that case.
out[--used] = '.';
} else if (out[used] == '/') {
// After parsing the input string, out only contains a slash. That happens whenever all
// parts are resolved and there is a trailing slash ("./" or "foo/../" for example).
// Prepend a dot to have the correct return value.
out[--used] = '.';
}
}
// If path was not modified return the original value
if (used == 0)
return name;
return QStringView(out + used, len - used).toString();
*ok = r;
return copy;
}
static QString qt_cleanPath(const QString &path, bool *ok)

View File

@ -81,6 +81,7 @@ public:
Q_DECLARE_OPERATORS_FOR_FLAGS(QDirPrivate::PathNormalizations)
Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok = nullptr);
bool qt_normalizePathSegments(QString *path, QDirPrivate::PathNormalizations flags);
QT_END_NAMESPACE

View File

@ -1385,17 +1385,22 @@ void tst_QDir::normalizePathSegments_data()
QTest::newRow("data6") << "/./" << HandleUnc << "/";
QTest::newRow("data7") << "/.." << HandleUnc << "/..";
QTest::newRow("data8") << "/../" << HandleUnc << "/../";
QTest::newRow("/../.") << "/../." << HandleUnc << "/../";
QTest::newRow("/.././") << "/.././" << HandleUnc << "/../";
QTest::newRow("/../..") << "/../.." << HandleUnc << "/../..";
QTest::newRow("data9") << "." << HandleUnc << ".";
QTest::newRow("data10") << "./" << HandleUnc << "./";
QTest::newRow("data10") << "./" << HandleUnc << ".";
QTest::newRow("data11") << "./." << HandleUnc << ".";
QTest::newRow("data12") << "././" << HandleUnc << "./";
QTest::newRow("data12") << "././" << HandleUnc << ".";
QTest::newRow("data13") << ".." << HandleUnc << "..";
QTest::newRow("data14") << "../" << HandleUnc << "../";
QTest::newRow("data15") << "../." << HandleUnc << "..";
QTest::newRow("data15") << "../." << HandleUnc << "../";
QTest::newRow("data16") << ".././" << HandleUnc << "../";
QTest::newRow("data17") << "../.." << HandleUnc << "../..";
QTest::newRow("data18") << "../../" << HandleUnc << "../../";
QTest::newRow("./file1.txt") << "./file1.txt" << HandleUnc << "file1.txt";
QTest::newRow("data19") << ".//file1.txt" << HandleUnc << "file1.txt";
QTest::newRow("/foo/bar//file1.txt") << "/foo/bar//file1.txt" << HandleUnc << "/foo/bar/file1.txt";
QTest::newRow("data20") << "/foo/bar/..//file1.txt" << HandleUnc << "/foo/file1.txt";
QTest::newRow("data21") << "foo/.." << HandleUnc << ".";
QTest::newRow("data22") << "./foo/.." << HandleUnc << ".";
@ -1420,7 +1425,7 @@ void tst_QDir::normalizePathSegments_data()
#else
QTest::newRow("data37") << "c:/." << HandleUnc << "c:";
QTest::newRow("data38") << "c:/.." << HandleUnc << ".";
QTest::newRow("data39") << "c:/../" << HandleUnc << "./";
QTest::newRow("data39") << "c:/../" << HandleUnc << ".";
#endif
QTest::newRow("data40") << "c:/./" << HandleUnc << "c:/";
QTest::newRow("data41") << "foo/../foo/.." << HandleUnc << ".";
@ -1447,10 +1452,9 @@ void tst_QDir::normalizePathSegments()
QFETCH(QString, path);
QFETCH(UncHandling, uncHandling);
QFETCH(QString, expected);
// for QDirPrivate::RemotePath, see tst_QUrl::resolving
QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization);
QCOMPARE(cleaned, expected);
if (path == expected)
QVERIFY2(path.isSharedWith(cleaned), "Strings are same but data is not shared");
}
# endif //QT_BUILD_INTERNAL