diff --git a/src/corelib/io/qdir.cpp b/src/corelib/io/qdir.cpp index 3e1c778e98b..46aee5c895a 100644 --- a/src/corelib/io/qdir.cpp +++ b/src/corelib/io/qdir.cpp @@ -2199,168 +2199,173 @@ bool QDir::match(const QString &filter, const QString &fileName) /*! \internal - Returns \a path with redundant directory separators removed, - and "."s and ".."s resolved (as far as possible). + + Updates \a path with redundant directory separators removed, and "."s and + ".."s resolved (as far as possible). It returns \c false if there were ".." + segments left over, attempt to go up past the root (only applies to + absolute paths), or \c true otherwise. This method is shared with QUrl, so it doesn't deal with QDir::separator(), nor does it remove the trailing slash, if any. + + When dealing with URLs, we are following section 5.2.4 (Remove dot + segments) from http://www.ietf.org/rfc/rfc3986.txt. URL mode differs from + from local path mode in these ways: + 1) it can set *path to empty ("." becomes "") + 2) directory path outputs end in / ("a/.." becomes "a/" instead of "a") + 3) a sequence of "//" is treated as multiple path levels ("a/b//.." becomes + "a/b/" and "a/b//../.." becomes "a/"), which matches the behavior + observed in web browsers. */ -QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok) +bool qt_normalizePathSegments(QString *path, QDirPrivate::PathNormalizations flags) { const bool allowUncPaths = flags.testAnyFlag(QDirPrivate::AllowUncPaths); const bool isRemote = flags.testAnyFlag(QDirPrivate::RemotePath); - const qsizetype len = name.size(); + const qsizetype prefixLength = rootLength(*path, allowUncPaths); - if (ok) - *ok = false; + // RFC 3986 says: "The input buffer is initialized with the now-appended + // path components and the output buffer is initialized to the empty + // string." + const QChar *in = path->constBegin(); - if (len == 0) - return name; - - qsizetype i = len - 1; - QVarLengthArray outVector(len); - qsizetype used = len; - char16_t *out = outVector.data(); - const char16_t *p = reinterpret_cast(name.data()); - const char16_t *prefix = p; - qsizetype up = 0; - - const qsizetype prefixLength = rootLength(name, allowUncPaths); - p += prefixLength; - i -= prefixLength; - - // replicate trailing slash (i > 0 checks for emptiness of input string p) - // except for remote paths because there can be /../ or /./ ending - if (i > 0 && p[i] == '/' && !isRemote) { - out[--used] = '/'; - --i; + // Scan the input for a "." or ".." segment. If there isn't any, we may not + // need to modify this path at all. Also scan for "//" segments, which + // will be normalized if the path is local. + qsizetype i = prefixLength; + qsizetype n = path->size(); + for (bool lastWasSlash = true; i < n; ++i) { + if (lastWasSlash && in[i] == u'.') { + if (i + 1 == n || in[i + 1] == u'/') + break; + if (in[i + 1] == u'.' && (i + 2 == n || in[i + 2] == u'/')) + break; + } + if (!isRemote && lastWasSlash && in[i] == u'/' && i > 0) { + // backtrack one, so the algorithm below gobbles up the remaining + // slashes + --i; + break; + } + lastWasSlash = in[i] == u'/'; } + if (i == n) + return true; - auto isDot = [](const char16_t *p, qsizetype i) { - return i > 1 && p[i - 1] == '.' && p[i - 2] == '/'; - }; - auto isDotDot = [](const char16_t *p, qsizetype i) { - return i > 2 && p[i - 1] == '.' && p[i - 2] == '.' && p[i - 3] == '/'; - }; + QChar *out = path->data(); // detaches + const QChar *start = out + prefixLength; + const QChar *end = out + path->size(); + out += i; + in = out; - while (i >= 0) { - // copy trailing slashes for remote urls - if (p[i] == '/') { - if (isRemote && !up) { - if (isDot(p, i)) { - i -= 2; + // We implement a modified algorithm compared to RFC 3986, for efficiency. + bool ok = true; + do { +#if 0 // to see in the debugger + QString output = QStringView(path->constBegin(), out).toString(); + QStringView input(in, end); +#endif + + // First, copy the preceding slashes, so we can look at the segment's + // content. If the path is part of a URL, we copy all slashes, otherwise + // just one. + if (in[0] == u'/') { + *out++ = *in++; + while (in < end && in[0] == u'/') { + if (isRemote) + *out++ = *in++; + else + ++in; + + // Note: we may exit this loop with in == end, in which case we + // *shouldn't* dereference *in. But since we are pointing to a + // detached, non-empty QString, we know there's a u'\0' at the + // end, so dereferencing is safe. + } + } + + // Is this path segment either "." or ".."? + enum { Nothing, Dot, DotDot } type = Nothing; + if (in[0] == u'.') { + if (in + 1 == end || in[1] == u'/') + type = Dot; + else if (in[1] == u'.' && (in + 2 == end || in[2] == u'/')) + type = DotDot; + } + if (type == Nothing) { + // If it is neither, then we copy this segment. + while (in < end && in[0] != u'/') + *out++ = *in++; + continue; + } + + // Otherwise, we skip it and remove preceding slashes (if + // any, exactly one if part of a URL, all otherwise) from the + // output. If it is "..", we remove the segment before that and + // preceding slashes too in a similar fashion, if they are there. + if (type == DotDot) { + if (Q_UNLIKELY(out == start)) { + // we can't go further up from here, so we "re-root" + // without cleaning this segment + ok = false; + if (!isRemote) { + *out++ = u'.'; + *out++ = u'.'; + if (in + 2 != end) { + Q_ASSERT(in[2] == u'/'); + *out++ = u'/'; + ++in; + } + start = out; + in += 2; continue; } - out[--used] = p[i]; } - - --i; - continue; + while (out > start && *--out != u'/') + ; + while (!isRemote && out > start && out[-1] == u'/') + --out; + while (out > start && out[-1] != u'/') + --out; + in += 2; // the two dots + } else { + ++in; // the one dot } - // remove current directory - if (p[i] == '.' && (i == 0 || p[i-1] == '/')) { - --i; - continue; + if (out > start) { + // backtrack one or all the slashes (so "/tmp///" -> "/tmp/") + if (out[-1] == u'/' && in != end) + --out; + while (!isRemote && out > start && out[-1] == u'/') + --out; } - - // detect up dir - if (i >= 1 && p[i] == '.' && p[i-1] == '.' && (i < 2 || p[i - 2] == '/')) { - ++up; - i -= i >= 2 ? 3 : 2; - - if (isRemote) { - // moving up should consider empty path segments too (/path//../ -> /path/) - while (i > 0 && up && p[i] == '/') { - --up; - --i; - } - } - continue; + if (out == start) { + // We've reached the root. Make sure we don't turn a relative path + // to absolute or, in the case of local paths that are already + // absolute, into UNC. + // Note: this will turn ".//a" into "a" even for URLs! + if (in != end && in[0] == u'/') + ++in; + while (prefixLength == 0 && in != end && in[0] == u'/') + ++in; } + } while (in < end); - // prepend a slash before copying when not empty - if (!up && used != len && out[used] != '/') - out[--used] = '/'; + path->truncate(out - path->constBegin()); + if (!isRemote && path->isEmpty()) + *path = u"."_s; - // skip or copy - while (i >= 0) { - if (p[i] == '/') { - // copy all slashes as is for remote urls if they are not part of /./ or /../ - if (isRemote && !up) { - while (i > 0 && p[i] == '/' && !isDotDot(p, i)) { + // we return false only if the path was absolute + return ok || prefixLength == 0; +} - if (isDot(p, i)) { - i -= 2; - continue; - } - - out[--used] = p[i]; - --i; - } - - // in case of /./, jump over - if (isDot(p, i)) - i -= 2; - - break; - } - - --i; - break; - } - - // actual copy - if (!up) - out[--used] = p[i]; - --i; - } - - // decrement up after copying/skipping - if (up) - --up; - } - - // Indicate failure when ".." are left over for an absolute path. +QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok) +{ + // temporary compat + QString copy = name; + bool r = qt_normalizePathSegments(©, flags); if (ok) - *ok = prefixLength == 0 || up == 0; - - // add remaining '..' - while (up && !isRemote) { - if (used != len && out[used] != '/') // is not empty and there isn't already a '/' - out[--used] = '/'; - out[--used] = '.'; - out[--used] = '.'; - --up; - } - - bool isEmpty = used == len; - - if (prefixLength) { - if (!isEmpty && out[used] == '/') { - // Even though there is a prefix the out string is a slash. This happens, if the input - // string only consists of a prefix followed by one or more slashes. Just skip the slash. - ++used; - } - for (qsizetype i = prefixLength - 1; i >= 0; --i) - out[--used] = prefix[i]; - } else { - if (isEmpty) { - // After resolving the input path, the resulting string is empty (e.g. "foo/.."). Return - // a dot in that case. - out[--used] = '.'; - } else if (out[used] == '/') { - // After parsing the input string, out only contains a slash. That happens whenever all - // parts are resolved and there is a trailing slash ("./" or "foo/../" for example). - // Prepend a dot to have the correct return value. - out[--used] = '.'; - } - } - - // If path was not modified return the original value - if (used == 0) - return name; - return QStringView(out + used, len - used).toString(); + *ok = r; + return copy; } static QString qt_cleanPath(const QString &path, bool *ok) diff --git a/src/corelib/io/qdir_p.h b/src/corelib/io/qdir_p.h index 7dce69c1952..908c1e8ed5b 100644 --- a/src/corelib/io/qdir_p.h +++ b/src/corelib/io/qdir_p.h @@ -81,6 +81,7 @@ public: Q_DECLARE_OPERATORS_FOR_FLAGS(QDirPrivate::PathNormalizations) Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok = nullptr); +bool qt_normalizePathSegments(QString *path, QDirPrivate::PathNormalizations flags); QT_END_NAMESPACE diff --git a/tests/auto/corelib/io/qdir/tst_qdir.cpp b/tests/auto/corelib/io/qdir/tst_qdir.cpp index 7b05a88e811..8dd5ebca3cd 100644 --- a/tests/auto/corelib/io/qdir/tst_qdir.cpp +++ b/tests/auto/corelib/io/qdir/tst_qdir.cpp @@ -1385,17 +1385,22 @@ void tst_QDir::normalizePathSegments_data() QTest::newRow("data6") << "/./" << HandleUnc << "/"; QTest::newRow("data7") << "/.." << HandleUnc << "/.."; QTest::newRow("data8") << "/../" << HandleUnc << "/../"; + QTest::newRow("/../.") << "/../." << HandleUnc << "/../"; + QTest::newRow("/.././") << "/.././" << HandleUnc << "/../"; + QTest::newRow("/../..") << "/../.." << HandleUnc << "/../.."; QTest::newRow("data9") << "." << HandleUnc << "."; - QTest::newRow("data10") << "./" << HandleUnc << "./"; + QTest::newRow("data10") << "./" << HandleUnc << "."; QTest::newRow("data11") << "./." << HandleUnc << "."; - QTest::newRow("data12") << "././" << HandleUnc << "./"; + QTest::newRow("data12") << "././" << HandleUnc << "."; QTest::newRow("data13") << ".." << HandleUnc << ".."; QTest::newRow("data14") << "../" << HandleUnc << "../"; - QTest::newRow("data15") << "../." << HandleUnc << ".."; + QTest::newRow("data15") << "../." << HandleUnc << "../"; QTest::newRow("data16") << ".././" << HandleUnc << "../"; QTest::newRow("data17") << "../.." << HandleUnc << "../.."; QTest::newRow("data18") << "../../" << HandleUnc << "../../"; + QTest::newRow("./file1.txt") << "./file1.txt" << HandleUnc << "file1.txt"; QTest::newRow("data19") << ".//file1.txt" << HandleUnc << "file1.txt"; + QTest::newRow("/foo/bar//file1.txt") << "/foo/bar//file1.txt" << HandleUnc << "/foo/bar/file1.txt"; QTest::newRow("data20") << "/foo/bar/..//file1.txt" << HandleUnc << "/foo/file1.txt"; QTest::newRow("data21") << "foo/.." << HandleUnc << "."; QTest::newRow("data22") << "./foo/.." << HandleUnc << "."; @@ -1420,7 +1425,7 @@ void tst_QDir::normalizePathSegments_data() #else QTest::newRow("data37") << "c:/." << HandleUnc << "c:"; QTest::newRow("data38") << "c:/.." << HandleUnc << "."; - QTest::newRow("data39") << "c:/../" << HandleUnc << "./"; + QTest::newRow("data39") << "c:/../" << HandleUnc << "."; #endif QTest::newRow("data40") << "c:/./" << HandleUnc << "c:/"; QTest::newRow("data41") << "foo/../foo/.." << HandleUnc << "."; @@ -1447,10 +1452,9 @@ void tst_QDir::normalizePathSegments() QFETCH(QString, path); QFETCH(UncHandling, uncHandling); QFETCH(QString, expected); + // for QDirPrivate::RemotePath, see tst_QUrl::resolving QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization); QCOMPARE(cleaned, expected); - if (path == expected) - QVERIFY2(path.isSharedWith(cleaned), "Strings are same but data is not shared"); } # endif //QT_BUILD_INTERNAL