QUrl: merge parseDecodedMode with qurlrecode.cpp algorithms

parseDecodedMode() encoded only the percent ('%') characters ahead of
the standard transformation into the internal format. This is a result
of the decoded mode being retrofitted on top of the original encoded-
only QUrl in commit 53d0624403f7f2ac8fe8364a7c5cd136717d40ed.

That was slightly inefficient: if we needed to perform any additional
transformations, we could end up allocating two strings. It's probably
not a common scenario that the input contains a percent and something we
usually encode for the internal format, but it can happen.

This commit merges the two functionalities. After all, the recode()
function in qurlrecode.cpp has the retryBadEncoding mode.

Change-Id: I6b6d6a0f6a3553748154fffd8afb4900087ad13b
Reviewed-by: David Faure <david.faure@kdab.com>
This commit is contained in:
Thiago Macieira 2025-04-02 21:32:20 -07:00
parent 31207b539a
commit d064c26d2c
3 changed files with 77 additions and 75 deletions

View File

@ -528,13 +528,13 @@ public:
// the "end" parameters are like STL iterators: they point to one past the last valid element
bool setScheme(const QString &value, qsizetype len, bool doSetError);
void setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode);
template <typename String> void setUserInfo(String value);
template <typename String> void setUserName(String value);
template <typename String> void setPassword(String value);
template <typename String> void setUserInfo(String value, QUrl::ParsingMode mode);
template <typename String> void setUserName(String value, QUrl::ParsingMode mode);
template <typename String> void setPassword(String value, QUrl::ParsingMode mode);
bool setHost(const QString &value, qsizetype from, qsizetype end, QUrl::ParsingMode mode);
template <typename String> void setPath(String value);
template <typename String> void setQuery(String value);
template <typename String> void setFragment(String value);
template <typename String> void setPath(String value, QUrl::ParsingMode mode);
template <typename String> void setQuery(String value, QUrl::ParsingMode mode);
template <typename String> void setFragment(String value, QUrl::ParsingMode mode);
inline bool hasScheme() const { return sectionIsPresent & Scheme; }
inline bool hasAuthority() const { return sectionIsPresent & Authority; }
@ -701,7 +701,8 @@ inline void QUrlPrivate::setError(ErrorCode errorCode, const QString &source, qs
// mode in the following way:
// - spaces are decoded
// - valid UTF-8 sequences are decoded
// - gen-delims that can be unambiguously transformed are decoded
// - gen-delims that can be unambiguously transformed are decoded (exception:
// square brackets in path, query and fragment are left as they were)
// - characters controlled by DecodeReserved are often decoded, though this behavior
// can change depending on the subjective definition of "pretty"
//
@ -807,23 +808,25 @@ static const ushort * const pathInUrl = userNameInUrl + 5;
static const ushort * const queryInUrl = userNameInUrl + 6;
static const ushort * const fragmentInUrl = userNameInUrl + 6;
static inline void parseDecodedComponent(QString &data, QUrlPrivate::Section section)
{
data.replace(u'%', "%25"_L1);
if (section != QUrlPrivate::Host)
data.replace(u'[', "%5B"_L1).replace(u']', "%5D"_L1);
}
static void recodeFromUser(QString &output, const QString &input, const ushort *actions)
static void
recodeFromUser(QString &output, const QString &input, const ushort *actions, QUrl::ParsingMode mode)
{
output.resize(0);
if (qt_urlRecode(output, input, {}, actions))
return;
output = input;
qsizetype appended;
if (mode == QUrl::DecodedMode)
appended = qt_encodeFromUser(output, input, actions);
else
appended = qt_urlRecode(output, input, {}, actions);
if (!appended)
output = input;
}
static void recodeFromUser(QString &output, QStringView input, const ushort *actions)
static void
recodeFromUser(QString &output, QStringView input, const ushort *actions, QUrl::ParsingMode mode)
{
Q_ASSERT_X(mode != QUrl::DecodedMode, "recodeFromUser",
"This function should only be called when parsing encoded components");
Q_UNUSED(mode);
output.resize(0);
if (qt_urlRecode(output, input, {}, actions))
return;
@ -1025,6 +1028,8 @@ inline bool QUrlPrivate::setScheme(const QString &value, qsizetype len, bool doS
inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsizetype end, QUrl::ParsingMode mode)
{
Q_ASSERT_X(mode != QUrl::DecodedMode, "setAuthority",
"This function should only be called when parsing encoded components");
sectionIsPresent &= ~Authority;
port = -1;
if (from == end && !auth.isNull())
@ -1034,7 +1039,7 @@ inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsize
while (from != end) {
qsizetype userInfoIndex = auth.indexOf(u'@', from);
if (size_t(userInfoIndex) < size_t(end)) {
setUserInfo(QStringView(auth).sliced(from, userInfoIndex - from));
setUserInfo(QStringView(auth).sliced(from, userInfoIndex - from), mode);
if (mode == QUrl::StrictMode && !validateComponent(UserInfo, auth, from, userInfoIndex))
break;
from = userInfoIndex + 1;
@ -1093,48 +1098,50 @@ inline void QUrlPrivate::setAuthority(const QString &auth, qsizetype from, qsize
port = -1;
}
template <typename String> void QUrlPrivate::setUserInfo(String value)
template <typename String> void QUrlPrivate::setUserInfo(String value, QUrl::ParsingMode mode)
{
Q_ASSERT_X(mode != QUrl::DecodedMode, "setUserInfo",
"This function should only be called when parsing encoded components");
qsizetype delimIndex = value.indexOf(u':');
if (delimIndex < 0) {
// no password
setUserName(value);
setUserName(value, mode);
password.clear();
sectionIsPresent &= ~Password;
} else {
setUserName(value.first(delimIndex));
setPassword(value.sliced(delimIndex + 1));
setUserName(value.first(delimIndex), mode);
setPassword(value.sliced(delimIndex + 1), mode);
}
}
template <typename String> inline void QUrlPrivate::setUserName(String value)
template <typename String> inline void QUrlPrivate::setUserName(String value, QUrl::ParsingMode mode)
{
sectionIsPresent |= UserName;
recodeFromUser(userName, value, userNameInIsolation);
recodeFromUser(userName, value, userNameInIsolation, mode);
}
template <typename String> inline void QUrlPrivate::setPassword(String value)
template <typename String> inline void QUrlPrivate::setPassword(String value, QUrl::ParsingMode mode)
{
sectionIsPresent |= Password;
recodeFromUser(password, value, passwordInIsolation);
recodeFromUser(password, value, passwordInIsolation, mode);
}
template <typename String> inline void QUrlPrivate::setPath(String value)
template <typename String> inline void QUrlPrivate::setPath(String value, QUrl::ParsingMode mode)
{
// sectionIsPresent |= Path; // not used, save some cycles
recodeFromUser(path, value, pathInIsolation);
recodeFromUser(path, value, pathInIsolation, mode);
}
template <typename String> inline void QUrlPrivate::setFragment(String value)
template <typename String> inline void QUrlPrivate::setFragment(String value, QUrl::ParsingMode mode)
{
sectionIsPresent |= Fragment;
recodeFromUser(fragment, value, fragmentInIsolation);
recodeFromUser(fragment, value, fragmentInIsolation, mode);
}
template <typename String> inline void QUrlPrivate::setQuery(String value)
template <typename String> inline void QUrlPrivate::setQuery(String value, QUrl::ParsingMode mode)
{
sectionIsPresent |= Query;
recodeFromUser(query, value, queryInIsolation);
recodeFromUser(query, value, queryInIsolation, mode);
}
// Host handling
@ -1286,6 +1293,8 @@ static const QChar *parseIp6(QString &host, const QChar *begin, const QChar *end
inline bool
QUrlPrivate::setHost(const QString &value, qsizetype from, qsizetype iend, QUrl::ParsingMode mode)
{
Q_ASSERT_X(mode != QUrl::DecodedMode, "setUserInfo",
"This function should only be called when parsing encoded components");
const QChar *begin = value.constData() + from;
const QChar *end = value.constData() + iend;
@ -1387,6 +1396,8 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode
// relative-part = "//" authority path-abempty
// / other path types here
Q_ASSERT_X(parsingMode != QUrl::DecodedMode, "parse",
"This function should only be called when parsing encoded URLs");
sectionIsPresent = 0;
flags = 0;
clearError();
@ -1443,7 +1454,7 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode
// even if we failed to set the authority properly, let's try to recover
pathStart = authorityEnd;
setPath(QStringView(url).sliced(pathStart, hierEnd - pathStart));
setPath(QStringView(url).sliced(pathStart, hierEnd - pathStart), parsingMode);
} else {
userName.clear();
password.clear();
@ -1452,16 +1463,17 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode
pathStart = hierStart;
if (hierStart < hierEnd)
setPath(QStringView(url).sliced(hierStart, hierEnd - hierStart));
setPath(QStringView(url).sliced(hierStart, hierEnd - hierStart), parsingMode);
else
path.clear();
}
if (size_t(question) < size_t(hash))
setQuery(QStringView(url).sliced(question + 1, qMin<size_t>(hash, len) - question - 1));
setQuery(QStringView(url).sliced(question + 1, qMin<size_t>(hash, len) - question - 1),
parsingMode);
if (hash != -1)
setFragment(QStringView(url).sliced(hash + 1, len - hash - 1));
setFragment(QStringView(url).sliced(hash + 1, len - hash - 1), parsingMode);
if (error || parsingMode == QUrl::TolerantMode)
return;
@ -2048,7 +2060,7 @@ void QUrl::setUserInfo(const QString &userInfo, ParsingMode mode)
return;
}
d->setUserInfo(trimmed);
d->setUserInfo(trimmed, mode);
if (userInfo.isNull()) {
// QUrlPrivate::setUserInfo cleared almost everything
// but it leaves the UserName bit set
@ -2114,13 +2126,7 @@ void QUrl::setUserName(const QString &userName, ParsingMode mode)
detach();
d->clearError();
QString data = userName;
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::UserName);
mode = TolerantMode;
}
d->setUserName(data);
d->setUserName(userName, mode);
if (userName.isNull())
d->sectionIsPresent &= ~QUrlPrivate::UserName;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::UserName, userName))
@ -2177,13 +2183,7 @@ void QUrl::setPassword(const QString &password, ParsingMode mode)
detach();
d->clearError();
QString data = password;
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Password);
mode = TolerantMode;
}
d->setPassword(data);
d->setPassword(password, mode);
if (password.isNull())
d->sectionIsPresent &= ~QUrlPrivate::Password;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Password, password))
@ -2241,7 +2241,7 @@ void QUrl::setHost(const QString &host, ParsingMode mode)
QString data = host;
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Host);
data.replace(u'%', "%25"_L1);
mode = TolerantMode;
}
@ -2364,13 +2364,7 @@ void QUrl::setPath(const QString &path, ParsingMode mode)
detach();
d->clearError();
QString data = path;
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Path);
mode = TolerantMode;
}
d->setPath(data);
d->setPath(path, mode);
// optimized out, since there is no path delimiter
// if (path.isNull())
@ -2500,13 +2494,7 @@ void QUrl::setQuery(const QString &query, ParsingMode mode)
detach();
d->clearError();
QString data = query;
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Query);
mode = TolerantMode;
}
d->setQuery(data);
d->setQuery(query, mode);
if (query.isNull())
d->sectionIsPresent &= ~QUrlPrivate::Query;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Query, query))
@ -2598,13 +2586,7 @@ void QUrl::setFragment(const QString &fragment, ParsingMode mode)
detach();
d->clearError();
QString data = fragment;
if (mode == DecodedMode) {
parseDecodedComponent(data, QUrlPrivate::Fragment);
mode = TolerantMode;
}
d->setFragment(data);
d->setFragment(fragment, mode);
if (fragment.isNull())
d->sectionIsPresent &= ~QUrlPrivate::Fragment;
else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Fragment, fragment))

View File

@ -25,6 +25,8 @@ QT_BEGIN_NAMESPACE
extern Q_AUTOTEST_EXPORT qsizetype qt_urlRecode(QString &appendTo, QStringView url,
QUrl::ComponentFormattingOptions encoding,
const ushort *tableModifications = nullptr);
qsizetype qt_encodeFromUser(QString &appendTo, const QString &input,
const ushort *tableModifications);
// in qurlidna.cpp
enum AceLeadingDot { AllowLeadingDot, ForbidLeadingDot };

View File

@ -660,4 +660,22 @@ qt_urlRecode(QString &appendTo, QStringView in,
reinterpret_cast<const char16_t *>(in.end()), encoding, actionTable, false);
}
qsizetype qt_encodeFromUser(QString &appendTo, const QString &in, const ushort *tableModifications)
{
uchar actionTable[sizeof defaultActionTable];
memcpy(actionTable, defaultActionTable, sizeof actionTable);
// Different defaults to the regular encoded-to-encoded recoding
actionTable['[' - ' '] = EncodeCharacter;
actionTable[']' - ' '] = EncodeCharacter;
if (tableModifications) {
for (const ushort *p = tableModifications; *p; ++p)
actionTable[uchar(*p) - ' '] = *p >> 8;
}
return recode(appendTo, reinterpret_cast<const char16_t *>(in.begin()),
reinterpret_cast<const char16_t *>(in.end()), {}, actionTable, true);
}
QT_END_NAMESPACE