QUrl: Implement UTS #46
UTS #46 (https://unicode.org/reports/tr46/) is a successor to IDNA 2003/2008 standards from Unicode. The current implementation uses nontransitional processing by default. An optional argument is added to QUrl::toAce() and QUrl::fromAce() to allow using transitional processing and to ignore the IDN whitelist. [ChangeLog][QtCore][QUrl] ACE processing is now performed according to the UTS #46 standard based on IDNA 2008 instead of IDNA 2003. Task-number: QTBUG-85371 Change-Id: I46b2e86792bc9699cb6961bae8e283fbff72f874 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
f5360b7c72
commit
4bf3010378
@ -389,6 +389,25 @@
|
|||||||
\sa fromUserInput()
|
\sa fromUserInput()
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\enum QUrl::AceProcessingOption
|
||||||
|
\since 6.3
|
||||||
|
|
||||||
|
The ACE processing options control the way URLs are transformed to and from
|
||||||
|
ASCII-Compatible Encoding.
|
||||||
|
|
||||||
|
\value IgnoreIDNWhitelist Ignore the IDN whitelist when converting URLs
|
||||||
|
to Unicode.
|
||||||
|
\value AceTransitionalProcessing Use transitional processing described in UTS #46.
|
||||||
|
This allows better compatibility with IDNA 2003
|
||||||
|
specification.
|
||||||
|
|
||||||
|
The default is to use nontransitional processing and to allow non-ASCII
|
||||||
|
characters only inside URLs whose top-level domains are listed in the IDN whitelist.
|
||||||
|
|
||||||
|
\sa toAce(), fromAce(), idnWhitelist()
|
||||||
|
*/
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\fn QUrl::QUrl(QUrl &&other)
|
\fn QUrl::QUrl(QUrl &&other)
|
||||||
|
|
||||||
@ -1177,7 +1196,7 @@ inline void QUrlPrivate::appendHost(QString &appendTo, QUrl::FormattingOptions o
|
|||||||
// this is either an IPv4Address or a reg-name
|
// this is either an IPv4Address or a reg-name
|
||||||
// if it is a reg-name, it is already stored in Unicode form
|
// if it is a reg-name, it is already stored in Unicode form
|
||||||
if (options & QUrl::EncodeUnicode && !(options & 0x4000000))
|
if (options & QUrl::EncodeUnicode && !(options & 0x4000000))
|
||||||
appendTo += qt_ACE_do(host, ToAceOnly, AllowLeadingDot);
|
appendTo += qt_ACE_do(host, ToAceOnly, AllowLeadingDot, {});
|
||||||
else
|
else
|
||||||
appendTo += host;
|
appendTo += host;
|
||||||
}
|
}
|
||||||
@ -1339,7 +1358,7 @@ inline bool QUrlPrivate::setHost(const QString &value, int from, int iend, QUrl:
|
|||||||
// Unicode encoding (some non-ASCII characters case-fold to digits
|
// Unicode encoding (some non-ASCII characters case-fold to digits
|
||||||
// when nameprepping is done)
|
// when nameprepping is done)
|
||||||
//
|
//
|
||||||
// The qt_ACE_do function below applies nameprepping and the STD3 check.
|
// The qt_ACE_do function below does IDNA normalization and the STD3 check.
|
||||||
// That means a Unicode string may become an IPv4 address, but it cannot
|
// That means a Unicode string may become an IPv4 address, but it cannot
|
||||||
// produce a '[' or a '%'.
|
// produce a '[' or a '%'.
|
||||||
|
|
||||||
@ -1358,7 +1377,7 @@ inline bool QUrlPrivate::setHost(const QString &value, int from, int iend, QUrl:
|
|||||||
return setHost(s, 0, s.length(), QUrl::StrictMode);
|
return setHost(s, 0, s.length(), QUrl::StrictMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
s = qt_ACE_do(QStringView(begin, len), NormalizeAce, ForbidLeadingDot);
|
s = qt_ACE_do(value.mid(from, iend - from), NormalizeAce, ForbidLeadingDot, {});
|
||||||
if (s.isEmpty()) {
|
if (s.isEmpty()) {
|
||||||
setError(InvalidRegNameError, value);
|
setError(InvalidRegNameError, value);
|
||||||
return false;
|
return false;
|
||||||
@ -3013,50 +3032,72 @@ QByteArray QUrl::toPercentEncoding(const QString &input, const QByteArray &exclu
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\since 4.2
|
\since 6.3
|
||||||
|
|
||||||
Returns the Unicode form of the given domain name
|
Returns the Unicode form of the given domain name
|
||||||
\a domain, which is encoded in the ASCII Compatible Encoding (ACE).
|
\a domain, which is encoded in the ASCII Compatible Encoding (ACE).
|
||||||
|
The output can be customized by passing flags with \a options.
|
||||||
The result of this function is considered equivalent to \a domain.
|
The result of this function is considered equivalent to \a domain.
|
||||||
|
|
||||||
If the value in \a domain cannot be encoded, it will be converted
|
If the value in \a domain cannot be encoded, it will be converted
|
||||||
to QString and returned.
|
to QString and returned.
|
||||||
|
|
||||||
The ASCII Compatible Encoding (ACE) is defined by RFC 3490, RFC 3491
|
The ASCII-Compatible Encoding (ACE) is defined by RFC 3490, RFC 3491
|
||||||
and RFC 3492. It is part of the Internationalizing Domain Names in
|
and RFC 3492 and updated by the Unicode Technical Standard #46. It is part
|
||||||
Applications (IDNA) specification, which allows for domain names
|
of the Internationalizing Domain Names in Applications (IDNA) specification,
|
||||||
(like \c "example.com") to be written using international
|
which allows for domain names (like \c "example.com") to be written using
|
||||||
characters.
|
non-US-ASCII characters.
|
||||||
|
*/
|
||||||
|
QString QUrl::fromAce(const QByteArray &domain, QUrl::AceProcessingOptions options)
|
||||||
|
{
|
||||||
|
return qt_ACE_do(QString::fromLatin1(domain), NormalizeAce,
|
||||||
|
ForbidLeadingDot /*FIXME: make configurable*/, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
|
||||||
|
/*!
|
||||||
|
\since 4.2
|
||||||
|
\overload
|
||||||
*/
|
*/
|
||||||
QString QUrl::fromAce(const QByteArray &domain)
|
QString QUrl::fromAce(const QByteArray &domain)
|
||||||
{
|
{
|
||||||
QVarLengthArray<char16_t> buffer;
|
return fromAce(domain, {});
|
||||||
buffer.resize(domain.size());
|
|
||||||
qt_from_latin1(buffer.data(), domain.data(), domain.size());
|
|
||||||
return qt_ACE_do(QStringView{buffer.data(), buffer.size()},
|
|
||||||
NormalizeAce, ForbidLeadingDot /*FIXME: make configurable*/);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\since 4.2
|
\since 6.3
|
||||||
|
|
||||||
Returns the ASCII Compatible Encoding of the given domain name \a domain.
|
Returns the ASCII Compatible Encoding of the given domain name \a domain.
|
||||||
|
The output can be customized by passing flags with \a options.
|
||||||
The result of this function is considered equivalent to \a domain.
|
The result of this function is considered equivalent to \a domain.
|
||||||
|
|
||||||
The ASCII-Compatible Encoding (ACE) is defined by RFC 3490, RFC 3491
|
The ASCII-Compatible Encoding (ACE) is defined by RFC 3490, RFC 3491
|
||||||
and RFC 3492. It is part of the Internationalizing Domain Names in
|
and RFC 3492 and updated by the Unicode Technical Standard #46. It is part
|
||||||
Applications (IDNA) specification, which allows for domain names
|
of the Internationalizing Domain Names in Applications (IDNA) specification,
|
||||||
(like \c "example.com") to be written using international
|
which allows for domain names (like \c "example.com") to be written using
|
||||||
characters.
|
non-US-ASCII characters.
|
||||||
|
|
||||||
This function returns an empty QByteArray if \a domain is not a valid
|
This function returns an empty QByteArray if \a domain is not a valid
|
||||||
hostname. Note, in particular, that IPv6 literals are not valid domain
|
hostname. Note, in particular, that IPv6 literals are not valid domain
|
||||||
names.
|
names.
|
||||||
*/
|
*/
|
||||||
|
QByteArray QUrl::toAce(const QString &domain, AceProcessingOptions options)
|
||||||
|
{
|
||||||
|
return qt_ACE_do(domain, ToAceOnly, ForbidLeadingDot /*FIXME: make configurable*/, options)
|
||||||
|
.toLatin1();
|
||||||
|
}
|
||||||
|
|
||||||
|
#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
|
||||||
|
/*!
|
||||||
|
\since 4.2
|
||||||
|
\overload
|
||||||
|
*/
|
||||||
QByteArray QUrl::toAce(const QString &domain)
|
QByteArray QUrl::toAce(const QString &domain)
|
||||||
{
|
{
|
||||||
return qt_ACE_do(domain, ToAceOnly, ForbidLeadingDot /*FIXME: make configurable*/).toLatin1();
|
return toAce(domain, {});
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\internal
|
\internal
|
||||||
|
@ -281,8 +281,22 @@ public:
|
|||||||
NSURL *toNSURL() const Q_DECL_NS_RETURNS_AUTORELEASED;
|
NSURL *toNSURL() const Q_DECL_NS_RETURNS_AUTORELEASED;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
enum AceProcessingOption : unsigned int {
|
||||||
|
IgnoreIDNWhitelist = 0x1,
|
||||||
|
AceTransitionalProcessing = 0x2,
|
||||||
|
};
|
||||||
|
Q_DECLARE_FLAGS(AceProcessingOptions, AceProcessingOption)
|
||||||
|
|
||||||
|
#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0)
|
||||||
|
static QString fromAce(const QByteArray &, AceProcessingOptions options = {});
|
||||||
|
static QByteArray toAce(const QString &, AceProcessingOptions options = {});
|
||||||
|
#else
|
||||||
static QString fromAce(const QByteArray &);
|
static QString fromAce(const QByteArray &);
|
||||||
static QByteArray toAce(const QString &);
|
static QByteArray toAce(const QString &);
|
||||||
|
static QString fromAce(const QByteArray &, AceProcessingOptions options);
|
||||||
|
static QByteArray toAce(const QString &, AceProcessingOptions options);
|
||||||
|
#endif
|
||||||
|
|
||||||
static QStringList idnWhitelist();
|
static QStringList idnWhitelist();
|
||||||
static QStringList toStringList(const QList<QUrl> &uris, FormattingOptions options = FormattingOptions(PrettyDecoded));
|
static QStringList toStringList(const QList<QUrl> &uris, FormattingOptions options = FormattingOptions(PrettyDecoded));
|
||||||
static QList<QUrl> fromStringList(const QStringList &uris, ParsingMode mode = TolerantMode);
|
static QList<QUrl> fromStringList(const QStringList &uris, ParsingMode mode = TolerantMode);
|
||||||
@ -302,6 +316,7 @@ public:
|
|||||||
Q_DECLARE_SHARED(QUrl)
|
Q_DECLARE_SHARED(QUrl)
|
||||||
Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::ComponentFormattingOptions)
|
Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::ComponentFormattingOptions)
|
||||||
//Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::FormattingOptions)
|
//Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::FormattingOptions)
|
||||||
|
Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::AceProcessingOptions)
|
||||||
|
|
||||||
#ifndef Q_QDOC
|
#ifndef Q_QDOC
|
||||||
constexpr inline QUrl::FormattingOptions operator|(QUrl::UrlFormattingOption f1, QUrl::UrlFormattingOption f2)
|
constexpr inline QUrl::FormattingOptions operator|(QUrl::UrlFormattingOption f1, QUrl::UrlFormattingOption f2)
|
||||||
|
@ -65,7 +65,8 @@ extern Q_AUTOTEST_EXPORT qsizetype qt_urlRecode(QString &appendTo, QStringView u
|
|||||||
// in qurlidna.cpp
|
// in qurlidna.cpp
|
||||||
enum AceLeadingDot { AllowLeadingDot, ForbidLeadingDot };
|
enum AceLeadingDot { AllowLeadingDot, ForbidLeadingDot };
|
||||||
enum AceOperation { ToAceOnly, NormalizeAce };
|
enum AceOperation { ToAceOnly, NormalizeAce };
|
||||||
extern QString qt_ACE_do(QStringView domain, AceOperation op, AceLeadingDot dot);
|
extern QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
|
||||||
|
QUrl::AceProcessingOptions options);
|
||||||
extern Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output);
|
extern Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output);
|
||||||
extern Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc);
|
extern Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc);
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user