From d064c26d2c1fb1f78013031a83b661ef6f74ce21 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 2 Apr 2025 21:32:20 -0700 Subject: [PATCH] QUrl: merge parseDecodedMode with qurlrecode.cpp algorithms parseDecodedMode() encoded only the percent ('%') characters ahead of the standard transformation into the internal format. This is a result of the decoded mode being retrofitted on top of the original encoded- only QUrl in commit 53d0624403f7f2ac8fe8364a7c5cd136717d40ed. That was slightly inefficient: if we needed to perform any additional transformations, we could end up allocating two strings. It's probably not a common scenario that the input contains a percent and something we usually encode for the internal format, but it can happen. This commit merges the two functionalities. After all, the recode() function in qurlrecode.cpp has the retryBadEncoding mode. Change-Id: I6b6d6a0f6a3553748154fffd8afb4900087ad13b Reviewed-by: David Faure --- src/corelib/io/qurl.cpp | 132 +++++++++++++++------------------- src/corelib/io/qurl_p.h | 2 + src/corelib/io/qurlrecode.cpp | 18 +++++ 3 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index 0492d135d83..8c6fcfd09b6 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -528,13 +528,13 @@ public: // the "end" parameters are like STL iterators: they point to one past the last valid element bool setScheme(const QString &value, qsizetype len, bool doSetError); void setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode); - template void setUserInfo(String value); - template void setUserName(String value); - template void setPassword(String value); + template void setUserInfo(String value, QUrl::ParsingMode mode); + template void setUserName(String value, QUrl::ParsingMode mode); + template void setPassword(String value, QUrl::ParsingMode mode); bool setHost(const QString &value, qsizetype from, qsizetype end, QUrl::ParsingMode mode); - template void setPath(String value); - template void setQuery(String value); - template void setFragment(String value); + template void setPath(String value, QUrl::ParsingMode mode); + template void setQuery(String value, QUrl::ParsingMode mode); + template void setFragment(String value, QUrl::ParsingMode mode); inline bool hasScheme() const { return sectionIsPresent & Scheme; } inline bool hasAuthority() const { return sectionIsPresent & Authority; } @@ -701,7 +701,8 @@ inline void QUrlPrivate::setError(ErrorCode errorCode, const QString &source, qs // mode in the following way: // - spaces are decoded // - valid UTF-8 sequences are decoded -// - gen-delims that can be unambiguously transformed are decoded +// - gen-delims that can be unambiguously transformed are decoded (exception: +// square brackets in path, query and fragment are left as they were) // - characters controlled by DecodeReserved are often decoded, though this behavior // can change depending on the subjective definition of "pretty" // @@ -807,23 +808,25 @@ static const ushort * const pathInUrl = userNameInUrl + 5; static const ushort * const queryInUrl = userNameInUrl + 6; static const ushort * const fragmentInUrl = userNameInUrl + 6; -static inline void parseDecodedComponent(QString &data, QUrlPrivate::Section section) -{ - data.replace(u'%', "%25"_L1); - if (section != QUrlPrivate::Host) - data.replace(u'[', "%5B"_L1).replace(u']', "%5D"_L1); -} - -static void recodeFromUser(QString &output, const QString &input, const ushort *actions) +static void +recodeFromUser(QString &output, const QString &input, const ushort *actions, QUrl::ParsingMode mode) { output.resize(0); - if (qt_urlRecode(output, input, {}, actions)) - return; - output = input; + qsizetype appended; + if (mode == QUrl::DecodedMode) + appended = qt_encodeFromUser(output, input, actions); + else + appended = qt_urlRecode(output, input, {}, actions); + if (!appended) + output = input; } -static void recodeFromUser(QString &output, QStringView input, const ushort *actions) +static void +recodeFromUser(QString &output, QStringView input, const ushort *actions, QUrl::ParsingMode mode) { + Q_ASSERT_X(mode != QUrl::DecodedMode, "recodeFromUser", + "This function should only be called when parsing encoded components"); + Q_UNUSED(mode); output.resize(0); if (qt_urlRecode(output, input, {}, actions)) return; @@ -1025,6 +1028,8 @@ inline bool QUrlPrivate::setScheme(const QString &value, qsizetype len, bool doS inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode) { + Q_ASSERT_X(mode != QUrl::DecodedMode, "setAuthority", + "This function should only be called when parsing encoded components"); sectionIsPresent &= ~Authority; port = -1; if (from == end && !auth.isNull()) @@ -1034,7 +1039,7 @@ inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsize while (from != end) { qsizetype userInfoIndex = auth.indexOf(u'@', from); if (size_t(userInfoIndex) < size_t(end)) { - setUserInfo(QStringView(auth).sliced(from, userInfoIndex - from)); + setUserInfo(QStringView(auth).sliced(from, userInfoIndex - from), mode); if (mode == QUrl::StrictMode && !validateComponent(UserInfo, auth, from, userInfoIndex)) break; from = userInfoIndex + 1; @@ -1093,48 +1098,50 @@ inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsize port = -1; } -template void QUrlPrivate::setUserInfo(String value) +template void QUrlPrivate::setUserInfo(String value, QUrl::ParsingMode mode) { + Q_ASSERT_X(mode != QUrl::DecodedMode, "setUserInfo", + "This function should only be called when parsing encoded components"); qsizetype delimIndex = value.indexOf(u':'); if (delimIndex < 0) { // no password - setUserName(value); + setUserName(value, mode); password.clear(); sectionIsPresent &= ~Password; } else { - setUserName(value.first(delimIndex)); - setPassword(value.sliced(delimIndex + 1)); + setUserName(value.first(delimIndex), mode); + setPassword(value.sliced(delimIndex + 1), mode); } } -template inline void QUrlPrivate::setUserName(String value) +template inline void QUrlPrivate::setUserName(String value, QUrl::ParsingMode mode) { sectionIsPresent |= UserName; - recodeFromUser(userName, value, userNameInIsolation); + recodeFromUser(userName, value, userNameInIsolation, mode); } -template inline void QUrlPrivate::setPassword(String value) +template inline void QUrlPrivate::setPassword(String value, QUrl::ParsingMode mode) { sectionIsPresent |= Password; - recodeFromUser(password, value, passwordInIsolation); + recodeFromUser(password, value, passwordInIsolation, mode); } -template inline void QUrlPrivate::setPath(String value) +template inline void QUrlPrivate::setPath(String value, QUrl::ParsingMode mode) { // sectionIsPresent |= Path; // not used, save some cycles - recodeFromUser(path, value, pathInIsolation); + recodeFromUser(path, value, pathInIsolation, mode); } -template inline void QUrlPrivate::setFragment(String value) +template inline void QUrlPrivate::setFragment(String value, QUrl::ParsingMode mode) { sectionIsPresent |= Fragment; - recodeFromUser(fragment, value, fragmentInIsolation); + recodeFromUser(fragment, value, fragmentInIsolation, mode); } -template inline void QUrlPrivate::setQuery(String value) +template inline void QUrlPrivate::setQuery(String value, QUrl::ParsingMode mode) { sectionIsPresent |= Query; - recodeFromUser(query, value, queryInIsolation); + recodeFromUser(query, value, queryInIsolation, mode); } // Host handling @@ -1286,6 +1293,8 @@ static const QChar *parseIp6(QString &host, const QChar *begin, const QChar *end inline bool QUrlPrivate::setHost(const QString &value, qsizetype from, qsizetype iend, QUrl::ParsingMode mode) { + Q_ASSERT_X(mode != QUrl::DecodedMode, "setUserInfo", + "This function should only be called when parsing encoded components"); const QChar *begin = value.constData() + from; const QChar *end = value.constData() + iend; @@ -1387,6 +1396,8 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode // relative-part = "//" authority path-abempty // / other path types here + Q_ASSERT_X(parsingMode != QUrl::DecodedMode, "parse", + "This function should only be called when parsing encoded URLs"); sectionIsPresent = 0; flags = 0; clearError(); @@ -1443,7 +1454,7 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode // even if we failed to set the authority properly, let's try to recover pathStart = authorityEnd; - setPath(QStringView(url).sliced(pathStart, hierEnd - pathStart)); + setPath(QStringView(url).sliced(pathStart, hierEnd - pathStart), parsingMode); } else { userName.clear(); password.clear(); @@ -1452,16 +1463,17 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode pathStart = hierStart; if (hierStart < hierEnd) - setPath(QStringView(url).sliced(hierStart, hierEnd - hierStart)); + setPath(QStringView(url).sliced(hierStart, hierEnd - hierStart), parsingMode); else path.clear(); } if (size_t(question) < size_t(hash)) - setQuery(QStringView(url).sliced(question + 1, qMin(hash, len) - question - 1)); + setQuery(QStringView(url).sliced(question + 1, qMin(hash, len) - question - 1), + parsingMode); if (hash != -1) - setFragment(QStringView(url).sliced(hash + 1, len - hash - 1)); + setFragment(QStringView(url).sliced(hash + 1, len - hash - 1), parsingMode); if (error || parsingMode == QUrl::TolerantMode) return; @@ -2048,7 +2060,7 @@ void QUrl::setUserInfo(const QString &userInfo, ParsingMode mode) return; } - d->setUserInfo(trimmed); + d->setUserInfo(trimmed, mode); if (userInfo.isNull()) { // QUrlPrivate::setUserInfo cleared almost everything // but it leaves the UserName bit set @@ -2114,13 +2126,7 @@ void QUrl::setUserName(const QString &userName, ParsingMode mode) detach(); d->clearError(); - QString data = userName; - if (mode == DecodedMode) { - parseDecodedComponent(data, QUrlPrivate::UserName); - mode = TolerantMode; - } - - d->setUserName(data); + d->setUserName(userName, mode); if (userName.isNull()) d->sectionIsPresent &= ~QUrlPrivate::UserName; else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::UserName, userName)) @@ -2177,13 +2183,7 @@ void QUrl::setPassword(const QString &password, ParsingMode mode) detach(); d->clearError(); - QString data = password; - if (mode == DecodedMode) { - parseDecodedComponent(data, QUrlPrivate::Password); - mode = TolerantMode; - } - - d->setPassword(data); + d->setPassword(password, mode); if (password.isNull()) d->sectionIsPresent &= ~QUrlPrivate::Password; else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Password, password)) @@ -2241,7 +2241,7 @@ void QUrl::setHost(const QString &host, ParsingMode mode) QString data = host; if (mode == DecodedMode) { - parseDecodedComponent(data, QUrlPrivate::Host); + data.replace(u'%', "%25"_L1); mode = TolerantMode; } @@ -2364,13 +2364,7 @@ void QUrl::setPath(const QString &path, ParsingMode mode) detach(); d->clearError(); - QString data = path; - if (mode == DecodedMode) { - parseDecodedComponent(data, QUrlPrivate::Path); - mode = TolerantMode; - } - - d->setPath(data); + d->setPath(path, mode); // optimized out, since there is no path delimiter // if (path.isNull()) @@ -2500,13 +2494,7 @@ void QUrl::setQuery(const QString &query, ParsingMode mode) detach(); d->clearError(); - QString data = query; - if (mode == DecodedMode) { - parseDecodedComponent(data, QUrlPrivate::Query); - mode = TolerantMode; - } - - d->setQuery(data); + d->setQuery(query, mode); if (query.isNull()) d->sectionIsPresent &= ~QUrlPrivate::Query; else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Query, query)) @@ -2598,13 +2586,7 @@ void QUrl::setFragment(const QString &fragment, ParsingMode mode) detach(); d->clearError(); - QString data = fragment; - if (mode == DecodedMode) { - parseDecodedComponent(data, QUrlPrivate::Fragment); - mode = TolerantMode; - } - - d->setFragment(data); + d->setFragment(fragment, mode); if (fragment.isNull()) d->sectionIsPresent &= ~QUrlPrivate::Fragment; else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Fragment, fragment)) diff --git a/src/corelib/io/qurl_p.h b/src/corelib/io/qurl_p.h index 9562d1993d6..1d0db9448af 100644 --- a/src/corelib/io/qurl_p.h +++ b/src/corelib/io/qurl_p.h @@ -25,6 +25,8 @@ QT_BEGIN_NAMESPACE extern Q_AUTOTEST_EXPORT qsizetype qt_urlRecode(QString &appendTo, QStringView url, QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications = nullptr); +qsizetype qt_encodeFromUser(QString &appendTo, const QString &input, + const ushort *tableModifications); // in qurlidna.cpp enum AceLeadingDot { AllowLeadingDot, ForbidLeadingDot }; diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index bb531e53811..f4261f8f94c 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -660,4 +660,22 @@ qt_urlRecode(QString &appendTo, QStringView in, reinterpret_cast(in.end()), encoding, actionTable, false); } +qsizetype qt_encodeFromUser(QString &appendTo, const QString &in, const ushort *tableModifications) +{ + uchar actionTable[sizeof defaultActionTable]; + memcpy(actionTable, defaultActionTable, sizeof actionTable); + + // Different defaults to the regular encoded-to-encoded recoding + actionTable['[' - ' '] = EncodeCharacter; + actionTable[']' - ' '] = EncodeCharacter; + + if (tableModifications) { + for (const ushort *p = tableModifications; *p; ++p) + actionTable[uchar(*p) - ' '] = *p >> 8; + } + + return recode(appendTo, reinterpret_cast(in.begin()), + reinterpret_cast(in.end()), {}, actionTable, true); +} + QT_END_NAMESPACE