From 5e936b60fc921e21b8153a83113886a1de333b57 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 28 Feb 2025 19:28:58 -0300 Subject: [PATCH] QUrl: decode square brackets in fromLocalFile() QUrl has an intentional deviation from RFC 3986 when it comes to those characters, matching the behavior of major browsers: we distinguish the forms '[' and ']' from their encoded forms. However, for local files, they are definitely identical, because there's no such thing as a percent-encoded file names. [ChangeLog][QtCore][QUrl] QUrl::fromLocalFile() will now decode the square bracket characters '[' and ']' to their percent-encoded forms. This will be visible in calls to toString(), toEncoded(), or the encoded form of path(). QUrl's comparison operator will consider the old (created from an encoded URL string) and new forms to be different. Pick-to: 6.8 6.9 Fixes: QTBUG-134073 Change-Id: I9eae083007103e34f73cfffd59a618069eba0e19 Reviewed-by: David Faure --- src/corelib/io/qurl.cpp | 32 ++++++++++++++++++++++++- tests/auto/corelib/io/qurl/tst_qurl.cpp | 23 ++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index b48850bf8ae..a7620e755f5 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -748,6 +748,32 @@ static const ushort * const pathInIsolation = userNameInIsolation + 5; static const ushort * const queryInIsolation = userNameInIsolation + 6; static const ushort * const fragmentInIsolation = userNameInIsolation + 7; +static const ushort localPathFromUser[] = { + // we force-decode some of the gen-delims, because + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // the gen-delim lines are leave() in qt_urlRecode, so we don't need to + // repeat them if we want to keep them decoded + // decode(':'), // allowed + // decode('@'), // allowed + encode(']'), + encode('['), + // decode('/'), // special and allowed + // decode('?'), // handled by path() and others + // decode('#'), // ditto + + // the rest is like pathInIsolation above + decode('"'), + decode('<'), + decode('>'), + decode('^'), + decode('\\'), + decode('|'), + decode('{'), + decode('}'), + + 0 +}; + static const ushort userNameInUserInfo[] = { encode(':'), // 0 decode('@'), // 1 @@ -3360,7 +3386,11 @@ QUrl QUrl::fromLocalFile(const QString &localFile) } url.setScheme(scheme); - url.setPath(deslashified, DecodedMode); + + // not directly using setPath here, as we do a few more transforms + parseDecodedComponent(deslashified); + if (!qt_urlRecode(url.d->path, deslashified, {}, localPathFromUser)) + url.d->path = deslashified; return url; } diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp index ecf5fc1e787..7e44b957623 100644 --- a/tests/auto/corelib/io/qurl/tst_qurl.cpp +++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp @@ -1367,6 +1367,8 @@ void tst_QUrl::toLocalFile_data() QTest::newRow("FILE:/") << QString::fromLatin1("FILE:/a.txt") << QString::fromLatin1("/a.txt"); QTest::newRow("path-delimiter") << QString::fromLatin1("file:///Mambo <%235>.mp3") << QString::fromLatin1("/Mambo <#5>.mp3"); + QTest::newRow("path-brackets-encoded") << u"file:///tmp/%5Btest%5D.txt"_s << u"/tmp/[test].txt"_s; + QTest::newRow("path-brackets-decoded") << u"file:///tmp/[test].txt"_s << u"/tmp/[test].txt"_s; QTest::newRow("path-percent") << QString::fromLatin1("file:///a%25.txt") << QString::fromLatin1("/a%.txt"); QTest::newRow("path-percent-percent") << QString::fromLatin1("file:///a%25%25.txt") << QString::fromLatin1("/a%%.txt"); QTest::newRow("path-percent-a-percent") << QString::fromLatin1("file:///a%25a%25.txt") << QString::fromLatin1("/a%a%.txt"); @@ -1408,6 +1410,16 @@ void tst_QUrl::toLocalFile() QUrl url(theUrl); QCOMPARE(url.toLocalFile(), theFile); QCOMPARE(url.isLocalFile(), !theFile.isEmpty()); + + // set the path to the same (encoded) thing - nothing should change + url.setPath(url.path(QUrl::FullyEncoded), QUrl::TolerantMode); + QCOMPARE(url.toLocalFile(), theFile); + QCOMPARE(url.isLocalFile(), !theFile.isEmpty()); + + // QUrl::PrettyDecoded is still URL-encoded and lossless + url.setPath(url.path(QUrl::PrettyDecoded), QUrl::TolerantMode); + QCOMPARE(url.toLocalFile(), theFile); + QCOMPARE(url.isLocalFile(), !theFile.isEmpty()); } void tst_QUrl::fromLocalFile_data() @@ -1421,6 +1433,7 @@ void tst_QUrl::fromLocalFile_data() QTest::newRow("absolute-two-path") << QString::fromLatin1("/a/b.txt") << QString::fromLatin1("file:///a/b.txt") << QString::fromLatin1("/a/b.txt"); QTest::newRow("path-delimiters") << QString::fromLatin1("/Mambo <#5>.mp3") << QString::fromLatin1("file:///Mambo <%235>.mp3") << QString::fromLatin1("/Mambo <#5>.mp3"); + QTest::newRow("path-brackets") << u"/tmp/[test].txt"_s << u"file:///tmp/%5Btest%5D.txt"_s << u"/tmp/[test].txt"_s; // Windows absolute details QTest::newRow("windows-drive") << QString::fromLatin1("c:/a.txt") << QString::fromLatin1("file:///c:/a.txt") << QString::fromLatin1("/c:/a.txt"); @@ -1483,6 +1496,16 @@ void tst_QUrl::fromLocalFile() QCOMPARE(url.toString(QUrl::DecodeReserved), theUrl); QCOMPARE(url.path(), thePath); + + // set the path to the same (encoded) thing - nothing should change + url.setPath(url.path(QUrl::FullyEncoded), QUrl::TolerantMode); + QCOMPARE(url.toString(QUrl::DecodeReserved), theUrl); + QCOMPARE(url.path(), thePath); + + // QUrl::PrettyDecoded is still URL-encoded and lossless + url.setPath(url.path(QUrl::PrettyDecoded), QUrl::TolerantMode); + QCOMPARE(url.toString(QUrl::DecodeReserved), theUrl); + QCOMPARE(url.path(), thePath); } void tst_QUrl::fromLocalFileNormalize_data()