Relax QHttpHeaders value field checks to allow UTF-8
UTF-8 doesn't per se contain illegal characters from an HTTP header RFC point of view. UTF-8 is the dominant character encoding outside of ASCII, and is used directly at least with HTTP multipart headers. [ChangeLog][QtNetwork][QHttpHeaders] Allows UTF-8 in header values now. Task-number: QTBUG-125985 Pick-to: 6.7 Change-Id: I2d8d8f00855c96c1d2ba190f2e27e2d48fcd4975 Reviewed-by: Marc Mutz <marc.mutz@qt.io> (cherry picked from commit f39b39b8c72b5c3428396d5e74cf15cafd5bbc42) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
parent
41009cade5
commit
34f6210bb6
@ -988,50 +988,6 @@ QDebug operator<<(QDebug debug, const QHttpHeaders &headers)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// A clarification on string encoding:
|
|
||||||
// Setters and getters only accept names and values that are Latin-1 representable:
|
|
||||||
// Either they are directly ASCII/Latin-1, or if they are UTF-X, they only use first 256
|
|
||||||
// of the unicode points. For example using a '€' (U+20AC) in value would yield a warning
|
|
||||||
// and the call is ignored.
|
|
||||||
// Furthermore the 'name' has more strict rules than the 'value'
|
|
||||||
|
|
||||||
// TODO FIXME REMOVEME once this is merged:
|
|
||||||
// https://codereview.qt-project.org/c/qt/qtbase/+/508829
|
|
||||||
static bool isUtf8Latin1Representable(QUtf8StringView s) noexcept
|
|
||||||
{
|
|
||||||
// L1 encoded in UTF8 has at most the form
|
|
||||||
// - 0b0XXX'XXXX - US-ASCII
|
|
||||||
// - 0b1100'00XX 0b10XX'XXXX - at most 8 non-zero LSB bits allowed in L1
|
|
||||||
bool inMultibyte = false;
|
|
||||||
for (unsigned char c : s) {
|
|
||||||
if (c < 128) { // US-ASCII
|
|
||||||
if (inMultibyte)
|
|
||||||
return false; // invalid sequence
|
|
||||||
} else {
|
|
||||||
// decode as UTF-8:
|
|
||||||
if ((c & 0b1110'0000) == 0b1100'0000) { // two-octet UTF-8 leader
|
|
||||||
if (inMultibyte)
|
|
||||||
return false; // invalid sequence
|
|
||||||
inMultibyte = true;
|
|
||||||
const auto bits_7_to_11 = c & 0b0001'1111;
|
|
||||||
if (bits_7_to_11 < 0b10)
|
|
||||||
return false; // invalid sequence (US-ASCII encoded in two octets)
|
|
||||||
if (bits_7_to_11 > 0b11) // more than the two LSB
|
|
||||||
return false; // outside L1
|
|
||||||
} else if ((c & 0b1100'0000) == 0b1000'0000) { // trailing UTF-8 octet
|
|
||||||
if (!inMultibyte)
|
|
||||||
return false; // invalid sequence
|
|
||||||
inMultibyte = false; // only one continuation allowed
|
|
||||||
} else {
|
|
||||||
return false; // invalid sequence or outside of L1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inMultibyte)
|
|
||||||
return false; // invalid sequence: premature end
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr auto isValidHttpHeaderNameChar = [](uchar c) noexcept
|
static constexpr auto isValidHttpHeaderNameChar = [](uchar c) noexcept
|
||||||
{
|
{
|
||||||
// RFC 9110 Chapters "5.1 Field Names" and "5.6.2 Tokens"
|
// RFC 9110 Chapters "5.1 Field Names" and "5.6.2 Tokens"
|
||||||
@ -1106,8 +1062,10 @@ static bool headerValueValidImpl(QLatin1StringView value) noexcept
|
|||||||
|
|
||||||
static bool headerValueValidImpl(QUtf8StringView value) noexcept
|
static bool headerValueValidImpl(QUtf8StringView value) noexcept
|
||||||
{
|
{
|
||||||
if (!isUtf8Latin1Representable(value)) // TODO FIXME see the function
|
// UTF-8 byte sequences are also used as values directly
|
||||||
return false;
|
// => allow them as such. UTF-8 byte sequences for characters
|
||||||
|
// outside of ASCII should all fit into obs-text (>= 0x80)
|
||||||
|
// (see isValidHttpHeaderValueChar)
|
||||||
return std::all_of(value.begin(), value.end(), isValidHttpHeaderValueChar);
|
return std::all_of(value.begin(), value.end(), isValidHttpHeaderValueChar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -436,9 +436,6 @@ void tst_QHttpHeaders::headerValueField()
|
|||||||
QRegularExpression re("HTTP header value contained illegal character*");
|
QRegularExpression re("HTTP header value contained illegal character*");
|
||||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x08" "bar"); // BS
|
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x08" "bar"); // BS
|
||||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x1B" "bar"); // ESC
|
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x1B" "bar"); // ESC
|
||||||
// Disallowed UTF-8 characters
|
|
||||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"€");
|
|
||||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"𝒜𝒴𝟘𝟡𐎀𐎜𐒀𐒐𝓐𝓩𝔸𝔹𝕀𝕁𝕌𝕍𓂀𓂁𓃀𓃁𓇋𓇌𓉐𓉑𓋴𓋵𓎡𓎢𓎣𓏏");
|
|
||||||
// Disallowed UTF-16 characters
|
// Disallowed UTF-16 characters
|
||||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"€");
|
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"€");
|
||||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"𝒜𝒴𝟘𝟡𐎀𐎜𐒀𐒐𝓐𝓩𝔸𝔹𝕀𝕁𝕌𝕍𓂀𓂁𓃀𓃁𓇋𓇌𓉐𓉑𓋴𓋵𓎡𓎢𓎣𓏏");
|
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"𝒜𝒴𝟘𝟡𐎀𐎜𐒀𐒐𝓐𝓩𝔸𝔹𝕀𝕁𝕌𝕍𓂀𓂁𓃀𓃁𓇋𓇌𓉐𓉑𓋴𓋵𓎡𓎢𓎣𓏏");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user