Relax QHttpHeaders value field checks to allow UTF-8
UTF-8 doesn't per se contain illegal characters from an HTTP header RFC point of view. UTF-8 is the dominant character encoding outside of ASCII, and is used directly at least with HTTP multipart headers. [ChangeLog][QtNetwork][QHttpHeaders] Allows UTF-8 in header values now. Task-number: QTBUG-125985 Pick-to: 6.7 Change-Id: I2d8d8f00855c96c1d2ba190f2e27e2d48fcd4975 Reviewed-by: Marc Mutz <marc.mutz@qt.io> (cherry picked from commit f39b39b8c72b5c3428396d5e74cf15cafd5bbc42) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
parent
41009cade5
commit
34f6210bb6
@ -988,50 +988,6 @@ QDebug operator<<(QDebug debug, const QHttpHeaders &headers)
|
||||
}
|
||||
#endif
|
||||
|
||||
// A clarification on string encoding:
|
||||
// Setters and getters only accept names and values that are Latin-1 representable:
|
||||
// Either they are directly ASCII/Latin-1, or if they are UTF-X, they only use first 256
|
||||
// of the unicode points. For example using a '€' (U+20AC) in value would yield a warning
|
||||
// and the call is ignored.
|
||||
// Furthermore the 'name' has more strict rules than the 'value'
|
||||
|
||||
// TODO FIXME REMOVEME once this is merged:
|
||||
// https://codereview.qt-project.org/c/qt/qtbase/+/508829
|
||||
static bool isUtf8Latin1Representable(QUtf8StringView s) noexcept
|
||||
{
|
||||
// L1 encoded in UTF8 has at most the form
|
||||
// - 0b0XXX'XXXX - US-ASCII
|
||||
// - 0b1100'00XX 0b10XX'XXXX - at most 8 non-zero LSB bits allowed in L1
|
||||
bool inMultibyte = false;
|
||||
for (unsigned char c : s) {
|
||||
if (c < 128) { // US-ASCII
|
||||
if (inMultibyte)
|
||||
return false; // invalid sequence
|
||||
} else {
|
||||
// decode as UTF-8:
|
||||
if ((c & 0b1110'0000) == 0b1100'0000) { // two-octet UTF-8 leader
|
||||
if (inMultibyte)
|
||||
return false; // invalid sequence
|
||||
inMultibyte = true;
|
||||
const auto bits_7_to_11 = c & 0b0001'1111;
|
||||
if (bits_7_to_11 < 0b10)
|
||||
return false; // invalid sequence (US-ASCII encoded in two octets)
|
||||
if (bits_7_to_11 > 0b11) // more than the two LSB
|
||||
return false; // outside L1
|
||||
} else if ((c & 0b1100'0000) == 0b1000'0000) { // trailing UTF-8 octet
|
||||
if (!inMultibyte)
|
||||
return false; // invalid sequence
|
||||
inMultibyte = false; // only one continuation allowed
|
||||
} else {
|
||||
return false; // invalid sequence or outside of L1
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inMultibyte)
|
||||
return false; // invalid sequence: premature end
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr auto isValidHttpHeaderNameChar = [](uchar c) noexcept
|
||||
{
|
||||
// RFC 9110 Chapters "5.1 Field Names" and "5.6.2 Tokens"
|
||||
@ -1106,8 +1062,10 @@ static bool headerValueValidImpl(QLatin1StringView value) noexcept
|
||||
|
||||
static bool headerValueValidImpl(QUtf8StringView value) noexcept
|
||||
{
|
||||
if (!isUtf8Latin1Representable(value)) // TODO FIXME see the function
|
||||
return false;
|
||||
// UTF-8 byte sequences are also used as values directly
|
||||
// => allow them as such. UTF-8 byte sequences for characters
|
||||
// outside of ASCII should all fit into obs-text (>= 0x80)
|
||||
// (see isValidHttpHeaderValueChar)
|
||||
return std::all_of(value.begin(), value.end(), isValidHttpHeaderValueChar);
|
||||
}
|
||||
|
||||
|
@ -436,9 +436,6 @@ void tst_QHttpHeaders::headerValueField()
|
||||
QRegularExpression re("HTTP header value contained illegal character*");
|
||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x08" "bar"); // BS
|
||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x1B" "bar"); // ESC
|
||||
// Disallowed UTF-8 characters
|
||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"€");
|
||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"𝒜𝒴𝟘𝟡𐎀𐎜𐒀𐒐𝓐𝓩𝔸𝔹𝕀𝕁𝕌𝕍𓂀𓂁𓃀𓃁𓇋𓇌𓉐𓉑𓋴𓋵𓎡𓎢𓎣𓏏");
|
||||
// Disallowed UTF-16 characters
|
||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"€");
|
||||
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"𝒜𝒴𝟘𝟡𐎀𐎜𐒀𐒐𝓐𝓩𝔸𝔹𝕀𝕁𝕌𝕍𓂀𓂁𓃀𓃁𓇋𓇌𓉐𓉑𓋴𓋵𓎡𓎢𓎣𓏏");
|
||||
|
Loading…
x
Reference in New Issue
Block a user