QUrl: merge parseDecodedMode with qurlrecode.cpp algorithms

parseDecodedMode() encoded only the percent ('%') characters ahead of
the standard transformation into the internal format. This is a result
of the decoded mode being retrofitted on top of the original encoded-
only QUrl in commit 53d0624403f7f2ac8fe8364a7c5cd136717d40ed.

That was slightly inefficient: if we needed to perform any additional
transformations, we could end up allocating two strings. It's probably
not a common scenario that the input contains a percent and something we
usually encode for the internal format, but it can happen.

This commit merges the two functionalities. After all, the recode()
function in qurlrecode.cpp has the retryBadEncoding mode.

Change-Id: I6b6d6a0f6a3553748154fffd8afb4900087ad13b
Reviewed-by: David Faure <david.faure@kdab.com>
This commit is contained in:
Thiago Macieira 2025-04-02 21:32:20 -07:00
parent 31207b539a
commit d064c26d2c
3 changed files with 77 additions and 75 deletions

View File

@ -528,13 +528,13 @@ public:
// the "end" parameters are like STL iterators: they point to one past the last valid element // the "end" parameters are like STL iterators: they point to one past the last valid element
bool setScheme(const QString &value, qsizetype len, bool doSetError); bool setScheme(const QString &value, qsizetype len, bool doSetError);
void setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode); void setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode);
template <typename String> void setUserInfo(String value); template <typename String> void setUserInfo(String value, QUrl::ParsingMode mode);
template <typename String> void setUserName(String value); template <typename String> void setUserName(String value, QUrl::ParsingMode mode);
template <typename String> void setPassword(String value); template <typename String> void setPassword(String value, QUrl::ParsingMode mode);
bool setHost(const QString &value, qsizetype from, qsizetype end, QUrl::ParsingMode mode); bool setHost(const QString &value, qsizetype from, qsizetype end, QUrl::ParsingMode mode);
template <typename String> void setPath(String value); template <typename String> void setPath(String value, QUrl::ParsingMode mode);
template <typename String> void setQuery(String value); template <typename String> void setQuery(String value, QUrl::ParsingMode mode);
template <typename String> void setFragment(String value); template <typename String> void setFragment(String value, QUrl::ParsingMode mode);
inline bool hasScheme() const { return sectionIsPresent & Scheme; } inline bool hasScheme() const { return sectionIsPresent & Scheme; }
inline bool hasAuthority() const { return sectionIsPresent & Authority; } inline bool hasAuthority() const { return sectionIsPresent & Authority; }
@ -701,7 +701,8 @@ inline void QUrlPrivate::setError(ErrorCode errorCode, const QString &source, qs
// mode in the following way: // mode in the following way:
// - spaces are decoded // - spaces are decoded
// - valid UTF-8 sequences are decoded // - valid UTF-8 sequences are decoded
// - gen-delims that can be unambiguously transformed are decoded // - gen-delims that can be unambiguously transformed are decoded (exception:
// square brackets in path, query and fragment are left as they were)
// - characters controlled by DecodeReserved are often decoded, though this behavior // - characters controlled by DecodeReserved are often decoded, though this behavior
// can change depending on the subjective definition of "pretty" // can change depending on the subjective definition of "pretty"
// //
@ -807,23 +808,25 @@ static const ushort * const pathInUrl = userNameInUrl + 5;
static const ushort * const queryInUrl = userNameInUrl + 6; static const ushort * const queryInUrl = userNameInUrl + 6;
static const ushort * const fragmentInUrl = userNameInUrl + 6; static const ushort * const fragmentInUrl = userNameInUrl + 6;
static inline void parseDecodedComponent(QString &data, QUrlPrivate::Section section) static void
{ recodeFromUser(QString &output, const QString &input, const ushort *actions, QUrl::ParsingMode mode)
data.replace(u'%', "%25"_L1);
if (section != QUrlPrivate::Host)
data.replace(u'[', "%5B"_L1).replace(u']', "%5D"_L1);
}
static void recodeFromUser(QString &output, const QString &input, const ushort *actions)
{ {
output.resize(0); output.resize(0);
if (qt_urlRecode(output, input, {}, actions)) qsizetype appended;
return; if (mode == QUrl::DecodedMode)
appended = qt_encodeFromUser(output, input, actions);
else
appended = qt_urlRecode(output, input, {}, actions);
if (!appended)
output = input; output = input;
} }
static void recodeFromUser(QString &output, QStringView input, const ushort *actions) static void
recodeFromUser(QString &output, QStringView input, const ushort *actions, QUrl::ParsingMode mode)
{ {
Q_ASSERT_X(mode != QUrl::DecodedMode, "recodeFromUser",
"This function should only be called when parsing encoded components");
Q_UNUSED(mode);
output.resize(0); output.resize(0);
if (qt_urlRecode(output, input, {}, actions)) if (qt_urlRecode(output, input, {}, actions))
return; return;
@ -1025,6 +1028,8 @@ inline bool QUrlPrivate::setScheme(const QString &value, qsizetype len, bool doS
inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode) inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode)
{ {
Q_ASSERT_X(mode != QUrl::DecodedMode, "setAuthority",
"This function should only be called when parsing encoded components");
sectionIsPresent &= ~Authority; sectionIsPresent &= ~Authority;
port = -1; port = -1;
if (from == end && !auth.isNull()) if (from == end && !auth.isNull())
@ -1034,7 +1039,7 @@ inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsize
while (from != end) { while (from != end) {
qsizetype userInfoIndex = auth.indexOf(u'@', from); qsizetype userInfoIndex = auth.indexOf(u'@', from);
if (size_t(userInfoIndex) < size_t(end)) { if (size_t(userInfoIndex) < size_t(end)) {
setUserInfo(QStringView(auth).sliced(from, userInfoIndex - from)); setUserInfo(QStringView(auth).sliced(from, userInfoIndex - from), mode);
if (mode == QUrl::StrictMode && !validateComponent(UserInfo, auth, from, userInfoIndex)) if (mode == QUrl::StrictMode && !validateComponent(UserInfo, auth, from, userInfoIndex))
break; break;
from = userInfoIndex + 1; from = userInfoIndex + 1;
@ -1093,48 +1098,50 @@ inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsize
port = -1; port = -1;
} }
template <typename String> void QUrlPrivate::setUserInfo(String value) template <typename String> void QUrlPrivate::setUserInfo(String value, QUrl::ParsingMode mode)
{ {
Q_ASSERT_X(mode != QUrl::DecodedMode, "setUserInfo",
"This function should only be called when parsing encoded components");
qsizetype delimIndex = value.indexOf(u':'); qsizetype delimIndex = value.indexOf(u':');
if (delimIndex < 0) { if (delimIndex < 0) {
// no password // no password
setUserName(value); setUserName(value, mode);
password.clear(); password.clear();
sectionIsPresent &= ~Password; sectionIsPresent &= ~Password;
} else { } else {
setUserName(value.first(delimIndex)); setUserName(value.first(delimIndex), mode);
setPassword(value.sliced(delimIndex + 1)); setPassword(value.sliced(delimIndex + 1), mode);
} }
} }
template <typename String> inline void QUrlPrivate::setUserName(String value) template <typename String> inline void QUrlPrivate::setUserName(String value, QUrl::ParsingMode mode)
{ {
sectionIsPresent |= UserName; sectionIsPresent |= UserName;
recodeFromUser(userName, value, userNameInIsolation); recodeFromUser(userName, value, userNameInIsolation, mode);
} }
template <typename String> inline void QUrlPrivate::setPassword(String value) template <typename String> inline void QUrlPrivate::setPassword(String value, QUrl::ParsingMode mode)
{ {
sectionIsPresent |= Password; sectionIsPresent |= Password;
recodeFromUser(password, value, passwordInIsolation); recodeFromUser(password, value, passwordInIsolation, mode);
} }
template <typename String> inline void QUrlPrivate::setPath(String value) template <typename String> inline void QUrlPrivate::setPath(String value, QUrl::ParsingMode mode)
{ {
// sectionIsPresent |= Path; // not used, save some cycles // sectionIsPresent |= Path; // not used, save some cycles
recodeFromUser(path, value, pathInIsolation); recodeFromUser(path, value, pathInIsolation, mode);
} }
template <typename String> inline void QUrlPrivate::setFragment(String value) template <typename String> inline void QUrlPrivate::setFragment(String value, QUrl::ParsingMode mode)
{ {
sectionIsPresent |= Fragment; sectionIsPresent |= Fragment;
recodeFromUser(fragment, value, fragmentInIsolation); recodeFromUser(fragment, value, fragmentInIsolation, mode);
} }
template <typename String> inline void QUrlPrivate::setQuery(String value) template <typename String> inline void QUrlPrivate::setQuery(String value, QUrl::ParsingMode mode)
{ {
sectionIsPresent |= Query; sectionIsPresent |= Query;
recodeFromUser(query, value, queryInIsolation); recodeFromUser(query, value, queryInIsolation, mode);
} }
// Host handling // Host handling
@ -1286,6 +1293,8 @@ static const QChar *parseIp6(QString &host, const QChar *begin, const QChar *end
inline bool inline bool
QUrlPrivate::setHost(const QString &value, qsizetype from, qsizetype iend, QUrl::ParsingMode mode) QUrlPrivate::setHost(const QString &value, qsizetype from, qsizetype iend, QUrl::ParsingMode mode)
{ {
Q_ASSERT_X(mode != QUrl::DecodedMode, "setUserInfo",
"This function should only be called when parsing encoded components");
const QChar *begin = value.constData() + from; const QChar *begin = value.constData() + from;
const QChar *end = value.constData() + iend; const QChar *end = value.constData() + iend;
@ -1387,6 +1396,8 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode
// relative-part = "//" authority path-abempty // relative-part = "//" authority path-abempty
// / other path types here // / other path types here
Q_ASSERT_X(parsingMode != QUrl::DecodedMode, "parse",
"This function should only be called when parsing encoded URLs");
sectionIsPresent = 0; sectionIsPresent = 0;
flags = 0; flags = 0;
clearError(); clearError();
@ -1443,7 +1454,7 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode
// even if we failed to set the authority properly, let's try to recover // even if we failed to set the authority properly, let's try to recover
pathStart = authorityEnd; pathStart = authorityEnd;
setPath(QStringView(url).sliced(pathStart, hierEnd - pathStart)); setPath(QStringView(url).sliced(pathStart, hierEnd - pathStart), parsingMode);
} else { } else {
userName.clear(); userName.clear();
password.clear(); password.clear();
@ -1452,16 +1463,17 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode
pathStart = hierStart; pathStart = hierStart;
if (hierStart < hierEnd) if (hierStart < hierEnd)
setPath(QStringView(url).sliced(hierStart, hierEnd - hierStart)); setPath(QStringView(url).sliced(hierStart, hierEnd - hierStart), parsingMode);
else else
path.clear(); path.clear();
} }
if (size_t(question) < size_t(hash)) if (size_t(question) < size_t(hash))
setQuery(QStringView(url).sliced(question + 1, qMin<size_t>(hash, len) - question - 1)); setQuery(QStringView(url).sliced(question + 1, qMin<size_t>(hash, len) - question - 1),
parsingMode);
if (hash != -1) if (hash != -1)
setFragment(QStringView(url).sliced(hash + 1, len - hash - 1)); setFragment(QStringView(url).sliced(hash + 1, len - hash - 1), parsingMode);
if (error || parsingMode == QUrl::TolerantMode) if (error || parsingMode == QUrl::TolerantMode)
return; return;
@ -2048,7 +2060,7 @@ void QUrl::setUserInfo(const QString &userInfo, ParsingMode mode)
return; return;
} }
d->setUserInfo(trimmed); d->setUserInfo(trimmed, mode);
if (userInfo.isNull()) { if (userInfo.isNull()) {
// QUrlPrivate::setUserInfo cleared almost everything // QUrlPrivate::setUserInfo cleared almost everything
// but it leaves the UserName bit set // but it leaves the UserName bit set
@ -2114,13 +2126,7 @@ void QUrl::setUserName(const QString &userName, ParsingMode mode)
detach(); detach();
d->clearError(); d->clearError();
QString data = userName; d->setUserName(userName, mode);
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::UserName);
mode = TolerantMode;
}
d->setUserName(data);
if (userName.isNull()) if (userName.isNull())
d->sectionIsPresent &= ~QUrlPrivate::UserName; d->sectionIsPresent &= ~QUrlPrivate::UserName;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::UserName, userName)) else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::UserName, userName))
@ -2177,13 +2183,7 @@ void QUrl::setPassword(const QString &password, ParsingMode mode)
detach(); detach();
d->clearError(); d->clearError();
QString data = password; d->setPassword(password, mode);
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Password);
mode = TolerantMode;
}
d->setPassword(data);
if (password.isNull()) if (password.isNull())
d->sectionIsPresent &= ~QUrlPrivate::Password; d->sectionIsPresent &= ~QUrlPrivate::Password;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Password, password)) else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Password, password))
@ -2241,7 +2241,7 @@ void QUrl::setHost(const QString &host, ParsingMode mode)
QString data = host; QString data = host;
if (mode == DecodedMode) { if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Host); data.replace(u'%', "%25"_L1);
mode = TolerantMode; mode = TolerantMode;
} }
@ -2364,13 +2364,7 @@ void QUrl::setPath(const QString &path, ParsingMode mode)
detach(); detach();
d->clearError(); d->clearError();
QString data = path; d->setPath(path, mode);
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Path);
mode = TolerantMode;
}
d->setPath(data);
// optimized out, since there is no path delimiter // optimized out, since there is no path delimiter
// if (path.isNull()) // if (path.isNull())
@ -2500,13 +2494,7 @@ void QUrl::setQuery(const QString &query, ParsingMode mode)
detach(); detach();
d->clearError(); d->clearError();
QString data = query; d->setQuery(query, mode);
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Query);
mode = TolerantMode;
}
d->setQuery(data);
if (query.isNull()) if (query.isNull())
d->sectionIsPresent &= ~QUrlPrivate::Query; d->sectionIsPresent &= ~QUrlPrivate::Query;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Query, query)) else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Query, query))
@ -2598,13 +2586,7 @@ void QUrl::setFragment(const QString &fragment, ParsingMode mode)
detach(); detach();
d->clearError(); d->clearError();
QString data = fragment; d->setFragment(fragment, mode);
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Fragment);
mode = TolerantMode;
}
d->setFragment(data);
if (fragment.isNull()) if (fragment.isNull())
d->sectionIsPresent &= ~QUrlPrivate::Fragment; d->sectionIsPresent &= ~QUrlPrivate::Fragment;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Fragment, fragment)) else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Fragment, fragment))

View File

@ -25,6 +25,8 @@ QT_BEGIN_NAMESPACE
extern Q_AUTOTEST_EXPORT qsizetype qt_urlRecode(QString &appendTo, QStringView url, extern Q_AUTOTEST_EXPORT qsizetype qt_urlRecode(QString &appendTo, QStringView url,
QUrl::ComponentFormattingOptions encoding, QUrl::ComponentFormattingOptions encoding,
const ushort *tableModifications = nullptr); const ushort *tableModifications = nullptr);
qsizetype qt_encodeFromUser(QString &appendTo, const QString &input,
const ushort *tableModifications);
// in qurlidna.cpp // in qurlidna.cpp
enum AceLeadingDot { AllowLeadingDot, ForbidLeadingDot }; enum AceLeadingDot { AllowLeadingDot, ForbidLeadingDot };

View File

@ -660,4 +660,22 @@ qt_urlRecode(QString &appendTo, QStringView in,
reinterpret_cast<const char16_t *>(in.end()), encoding, actionTable, false); reinterpret_cast<const char16_t *>(in.end()), encoding, actionTable, false);
} }
qsizetype qt_encodeFromUser(QString &appendTo, const QString &in, const ushort *tableModifications)
{
uchar actionTable[sizeof defaultActionTable];
memcpy(actionTable, defaultActionTable, sizeof actionTable);
// Different defaults to the regular encoded-to-encoded recoding
actionTable['[' - ' '] = EncodeCharacter;
actionTable[']' - ' '] = EncodeCharacter;
if (tableModifications) {
for (const ushort *p = tableModifications; *p; ++p)
actionTable[uchar(*p) - ' '] = *p >> 8;
}
return recode(appendTo, reinterpret_cast<const char16_t *>(in.begin()),
reinterpret_cast<const char16_t *>(in.end()), {}, actionTable, true);
}
QT_END_NAMESPACE QT_END_NAMESPACE