QUrl: Improve Punycode overflow handling
Add more overflow checks from the sample code in RFC 3492. Also check if a code point to be inserted into output is in the allowable range for Unicode. Rewrite all overflow checks to use {add,mul}_overflow() functions. Do not try to process any inputs that are too long to be part of a valid domain name label. This fixes a test in tst_qurlinternal. Fixes: QTBUG-95689 Change-Id: Ice0b3cd640d8a688b63a791192ef2fa2f13444be Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> (cherry picked from commit fe9ddbe197d6ce4ff2634415c621c8fd9fe5810a) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
parent
b4a1265b15
commit
e4c90e81fe
@ -42,6 +42,7 @@
|
|||||||
|
|
||||||
#include <QtCore/qstringlist.h>
|
#include <QtCore/qstringlist.h>
|
||||||
#include <QtCore/private/qstringiterator_p.h>
|
#include <QtCore/private/qstringiterator_p.h>
|
||||||
|
#include <QtCore/private/qnumeric_p.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
@ -57,7 +58,6 @@
|
|||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
|
|
||||||
// needed by the punycode encoder/decoder
|
// needed by the punycode encoder/decoder
|
||||||
#define Q_MAXINT ((uint)((uint)(-1)>>1))
|
|
||||||
static const uint base = 36;
|
static const uint base = 36;
|
||||||
static const uint tmin = 1;
|
static const uint tmin = 1;
|
||||||
static const uint tmax = 26;
|
static const uint tmax = 26;
|
||||||
@ -66,6 +66,8 @@ static const uint damp = 700;
|
|||||||
static const uint initial_bias = 72;
|
static const uint initial_bias = 72;
|
||||||
static const uint initial_n = 128;
|
static const uint initial_n = 128;
|
||||||
|
|
||||||
|
static constexpr unsigned MaxDomainLabelLength = 63;
|
||||||
|
|
||||||
struct NameprepCaseFoldingEntry {
|
struct NameprepCaseFoldingEntry {
|
||||||
char32_t uc;
|
char32_t uc;
|
||||||
char16_t mapping[4];
|
char16_t mapping[4];
|
||||||
@ -2132,7 +2134,7 @@ static const QChar *qt_find_nonstd3(QStringView in, Qt::CaseSensitivity cs)
|
|||||||
const QChar * const uc = in.data();
|
const QChar * const uc = in.data();
|
||||||
const qsizetype len = in.size();
|
const qsizetype len = in.size();
|
||||||
|
|
||||||
if (len > 63)
|
if (len > MaxDomainLabelLength)
|
||||||
return uc;
|
return uc;
|
||||||
|
|
||||||
for (qsizetype i = 0; i < len; ++i) {
|
for (qsizetype i = 0; i < len; ++i) {
|
||||||
@ -2219,6 +2221,12 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
|
|||||||
uint delta = 0;
|
uint delta = 0;
|
||||||
uint bias = initial_bias;
|
uint bias = initial_bias;
|
||||||
|
|
||||||
|
// Do not try to encode strings that certainly will result in output
|
||||||
|
// that is longer than allowable domain name label length. Note that
|
||||||
|
// non-BMP codepoints are encoded as two QChars.
|
||||||
|
if (in.length() > MaxDomainLabelLength * 2)
|
||||||
|
return;
|
||||||
|
|
||||||
int outLen = output->length();
|
int outLen = output->length();
|
||||||
output->resize(outLen + in.length());
|
output->resize(outLen + in.length());
|
||||||
|
|
||||||
@ -2262,39 +2270,37 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
|
|||||||
// while there are still unprocessed non-basic code points left in
|
// while there are still unprocessed non-basic code points left in
|
||||||
// the input string...
|
// the input string...
|
||||||
while (h < inputLength) {
|
while (h < inputLength) {
|
||||||
// find the character in the input string with the lowest
|
// find the character in the input string with the lowest unprocessed value.
|
||||||
// unicode value.
|
uint m = std::numeric_limits<uint>::max();
|
||||||
uint m = Q_MAXINT;
|
|
||||||
for (QStringIterator iter(in); iter.hasNext();) {
|
for (QStringIterator iter(in); iter.hasNext();) {
|
||||||
auto c = iter.nextUnchecked();
|
auto c = iter.nextUnchecked();
|
||||||
|
static_assert(std::numeric_limits<decltype(m)>::max()
|
||||||
|
>= std::numeric_limits<decltype(c)>::max(),
|
||||||
|
"Punycode uint should be able to cover all codepoints");
|
||||||
if (c >= n && c < m)
|
if (c >= n && c < m)
|
||||||
m = c;
|
m = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
// reject out-of-bounds unicode characters
|
// delta = delta + (m - n) * (h + 1), fail on overflow
|
||||||
if (m - n > (Q_MAXINT - delta) / (h + 1)) {
|
uint tmp;
|
||||||
|
if (mul_overflow<uint>(m - n, h + 1, &tmp) || add_overflow<uint>(delta, tmp, &delta)) {
|
||||||
output->truncate(outLen);
|
output->truncate(outLen);
|
||||||
return; // punycode_overflow
|
return; // punycode_overflow
|
||||||
}
|
}
|
||||||
|
|
||||||
delta += (m - n) * (h + 1);
|
|
||||||
n = m;
|
n = m;
|
||||||
|
|
||||||
for (QStringIterator iter(in); iter.hasNext();) {
|
for (QStringIterator iter(in); iter.hasNext();) {
|
||||||
auto c = iter.nextUnchecked();
|
auto c = iter.nextUnchecked();
|
||||||
|
|
||||||
// increase delta until we reach the character with the
|
// increase delta until we reach the character processed in this iteration;
|
||||||
// lowest unicode code. fail if delta overflows.
|
// fail if delta overflows.
|
||||||
if (c < n) {
|
if (c < n) {
|
||||||
++delta;
|
if (add_overflow<uint>(delta, 1, &delta)) {
|
||||||
if (!delta) {
|
|
||||||
output->truncate(outLen);
|
output->truncate(outLen);
|
||||||
return; // punycode_overflow
|
return; // punycode_overflow
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if j is the index of the character with the lowest
|
|
||||||
// unicode code...
|
|
||||||
if (c == n) {
|
if (c == n) {
|
||||||
appendEncode(output, delta, bias);
|
appendEncode(output, delta, bias);
|
||||||
|
|
||||||
@ -2319,6 +2325,12 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
|
|||||||
uint i = 0;
|
uint i = 0;
|
||||||
uint bias = initial_bias;
|
uint bias = initial_bias;
|
||||||
|
|
||||||
|
// Do not try to decode strings longer than allowable for a domain label.
|
||||||
|
// Non-ASCII strings are not allowed here anyway, so there is no need
|
||||||
|
// to account for surrogates.
|
||||||
|
if (pc.length() > MaxDomainLabelLength)
|
||||||
|
return QString();
|
||||||
|
|
||||||
// strip any ACE prefix
|
// strip any ACE prefix
|
||||||
int start = pc.startsWith(QLatin1String("xn--")) ? 4 : 0;
|
int start = pc.startsWith(QLatin1String("xn--")) ? 4 : 0;
|
||||||
if (!start)
|
if (!start)
|
||||||
@ -2352,31 +2364,48 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
|
|||||||
else if (digit - 97 < 26) digit -= 97;
|
else if (digit - 97 < 26) digit -= 97;
|
||||||
else digit = base;
|
else digit = base;
|
||||||
|
|
||||||
// reject out of range digits
|
// Fail if the code point has no digit value
|
||||||
if (digit >= base || digit > (Q_MAXINT - i) / w)
|
if (digit >= base)
|
||||||
return QStringLiteral("");
|
return QString();
|
||||||
|
|
||||||
i += (digit * w);
|
// i = i + digit * w, fail on overflow
|
||||||
|
uint tmp;
|
||||||
|
if (mul_overflow<uint>(digit, w, &tmp) || add_overflow<uint>(i, tmp, &i))
|
||||||
|
return QString();
|
||||||
|
|
||||||
// detect threshold to stop reading delta digits
|
// detect threshold to stop reading delta digits
|
||||||
uint t;
|
uint t;
|
||||||
if (k <= bias) t = tmin;
|
if (k <= bias) t = tmin;
|
||||||
else if (k >= bias + tmax) t = tmax;
|
else if (k >= bias + tmax) t = tmax;
|
||||||
else t = k - bias;
|
else t = k - bias;
|
||||||
|
|
||||||
if (digit < t) break;
|
if (digit < t) break;
|
||||||
|
|
||||||
w *= (base - t);
|
// w = w * (base - t), fail on overflow
|
||||||
|
if (mul_overflow<uint>(w, base - t, &w))
|
||||||
|
return QString();
|
||||||
}
|
}
|
||||||
|
|
||||||
// find new bias and calculate the next non-basic code
|
// find new bias and calculate the next non-basic code
|
||||||
// character.
|
// character.
|
||||||
uint outputLength = static_cast<uint>(output.length());
|
uint outputLength = static_cast<uint>(output.length());
|
||||||
bias = adapt(i - oldi, outputLength + 1, oldi == 0);
|
bias = adapt(i - oldi, outputLength + 1, oldi == 0);
|
||||||
n += i / (outputLength + 1);
|
|
||||||
|
// n = n + i div (length(output) + 1), fail on overflow
|
||||||
|
if (add_overflow<uint>(n, i / (outputLength + 1), &n))
|
||||||
|
return QString();
|
||||||
|
|
||||||
// allow the deltas to wrap around
|
// allow the deltas to wrap around
|
||||||
i %= (outputLength + 1);
|
i %= (outputLength + 1);
|
||||||
|
|
||||||
|
// if n is a basic code point then fail; this should not happen with
|
||||||
|
// correct implementation of Punycode, but check just n case.
|
||||||
|
if (n < initial_n) {
|
||||||
|
// Don't use Q_ASSERT() to avoid possibility of DoS
|
||||||
|
qWarning("Attempt to insert a basic codepoint. Unhandled overflow?");
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
// Surrogates should normally be rejected later by other IDNA code.
|
// Surrogates should normally be rejected later by other IDNA code.
|
||||||
// But because of Qt's use of UTF-16 to represent strings the
|
// But because of Qt's use of UTF-16 to represent strings the
|
||||||
// IDNA code is not able to distinguish characters represented as pairs
|
// IDNA code is not able to distinguish characters represented as pairs
|
||||||
@ -2385,7 +2414,10 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
|
|||||||
//
|
//
|
||||||
// Allowing surrogates would lead to non-unique (after normalization)
|
// Allowing surrogates would lead to non-unique (after normalization)
|
||||||
// encoding of strings with non-BMP characters.
|
// encoding of strings with non-BMP characters.
|
||||||
if (QChar::isSurrogate(n))
|
//
|
||||||
|
// Punycode that encodes characters outside the Unicode range is also
|
||||||
|
// invalid and is rejected here.
|
||||||
|
if (QChar::isSurrogate(n) || n > QChar::LastValidCodePoint)
|
||||||
return QString();
|
return QString();
|
||||||
|
|
||||||
// insert the character n at position i
|
// insert the character n at position i
|
||||||
|
@ -703,8 +703,6 @@ void tst_QUrlInternal::ace_testsuite()
|
|||||||
|
|
||||||
QString domain = in + suffix;
|
QString domain = in + suffix;
|
||||||
QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
|
QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
|
||||||
QEXPECT_FAIL("punycode-overflow-2", "QTBUG-95689: Missing oweflow check in punycode decoder",
|
|
||||||
Abort);
|
|
||||||
if (fromace != ".")
|
if (fromace != ".")
|
||||||
QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
|
QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
|
||||||
QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
|
QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user