QFormDataBuilder: add options

There are (at least) three RFCs, all non-obsolete, purporting to guard
the formatting of multipart/form-data filename parameters, and they
all disagree: RFC 6266, RFC 7578 and RFC 8187. There is also
considerable implementation divergence.

So, to not have to hard-code any one of the different strategies, add
Options controlling the formatting.

Found to be required in implementation review.

Task-number: QTBUG-125985
Change-Id: Ibc82ff8a3460580ee70eafcaf9b88de36751940d
Reviewed-by: Marc Mutz <marc.mutz@qt.io>
Reviewed-by: Mate Barany <mate.barany@qt.io>
(cherry picked from commit 49ab4f156b7d1a0593f25680d0b841d2dc4c348e)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
Marc Mutz 2024-06-18 18:22:53 +02:00 committed by Qt Cherry-pick Bot
parent 03a95f5f20
commit 678c839575
3 changed files with 185 additions and 42 deletions

View File

@ -34,7 +34,7 @@ class QFormDataPartBuilderPrivate
{
public:
explicit QFormDataPartBuilderPrivate(QAnyStringView name);
QHttpPart build();
QHttpPart build(QFormDataBuilder::Options options);
QString m_name;
QByteArray m_mimeType;
@ -260,10 +260,12 @@ QFormDataPartBuilder QFormDataPartBuilder::setHeaders(const QHttpHeaders &header
header.
*/
QHttpPart QFormDataPartBuilderPrivate::build()
QHttpPart QFormDataPartBuilderPrivate::build(QFormDataBuilder::Options options)
{
QHttpPart httpPart;
using Opt = QFormDataBuilder::Option;
QByteArray headerValue;
headerValue += "form-data; name=\"";
@ -271,18 +273,39 @@ QHttpPart QFormDataPartBuilderPrivate::build()
headerValue += "\"";
if (!m_originalBodyName.isNull()) {
const bool utf8 = !QtPrivate::isAscii(m_originalBodyName);
const auto enc = utf8 ? m_originalBodyName.toUtf8() : m_originalBodyName.toLatin1();
enum class Encoding { ASCII, Latin1, Utf8 } encoding = Encoding::ASCII;
for (QChar c : std::as_const(m_originalBodyName)) {
if (c > u'\xff') {
encoding = Encoding::Utf8;
break;
} else if (c > u'\x7f') {
encoding = Encoding::Latin1;
}
}
QByteArray enc;
if ((options & Opt::PreferLatin1EncodedFilename) && encoding != Encoding::Utf8)
enc = m_originalBodyName.toLatin1();
else
enc = m_originalBodyName.toUtf8();
headerValue += "; filename=\"";
if (options & Opt::UseRfc7578PercentEncodedFilename)
headerValue += enc.toPercentEncoding();
else
escapeNameAndAppend(headerValue, enc);
headerValue += "\"";
if (utf8) {
if (encoding != Encoding::ASCII && !(options & Opt::OmitRfc8187EncodedFilename)) {
// For 'filename*' production see
// https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1
// For providing both filename and filename* parameters see
// https://datatracker.ietf.org/doc/html/rfc6266#section-4.3 and
// https://datatracker.ietf.org/doc/html/rfc8187#section-4.2
headerValue += "; filename*=UTF-8''" + enc.toPercentEncoding();
if ((options & Opt::PreferLatin1EncodedFilename) && encoding == Encoding::Latin1)
headerValue += "; filename*=ISO-8859-1''";
else
headerValue += "; filename*=UTF-8''";
headerValue += enc.toPercentEncoding();
}
}
@ -366,6 +389,56 @@ const QFormDataPartBuilderPrivate* QFormDataPartBuilder::d_func() const
return &d->parts[m_index];
}
/*!
\enum QFormDataBuilder::Option
Options controlling buildMultiPart().
Several current RFCs disagree on how, exactly, to format
\c{multipart/form-data}. Instead of hard-coding any one RFC, these options
give you control over which RFC to follow.
\value Default The default values, designed to maximize interoperability in
general. All options named below are off.
\value OmitRfc8187EncodedFilename
When a body-part's file-name contains non-US-ASCII characters,
\l{https://datatracker.ietf.org/doc/html/rfc6266#section-4.3}
{RFC 6266 Section 4.3} suggests to use
\l{https://datatracker.ietf.org/doc/html/rfc8187}{RFC 8187}-style
encoding (\c{filename*=utf-8''...}). The more recent
\l{https://datatracker.ietf.org/doc/html/rfc7578#section-4.2}
{RFC 7578 Section 4.2}, however, bans the use of that mechanism.
Both RFCs are current as of this writing, so this option allows you
to choose which one to follow. The default is to include the
RFC 8187-encoded \c{filename*} alongside the unencoded \c{filename},
as suggested by RFC 6266.
\value UseRfc7578PercentEncodedFilename
When a body-part's file-name contains non-US-ASCII characters,
\l{https://datatracker.ietf.org/doc/html/rfc7578#section-4.2}
{RFC 7578 Section 4.2} suggests to use percent-encoding of the octets
of the UTF-8-encoded file-name. It goes on to note that many
implementations, however, do \e{not} percent-encode the UTF-8-encoded
file-name, but just emit "raw" UTF-8 (with \c{"} and \c{\} escaped
using \c{\}). This is the default of QFormDataBuilder, too.
\value PreferLatin1EncodedFilename
\l{https://datatracker.ietf.org/doc/html/rfc5987#section-3.2}
{RFC 5987 Section 3.2} required recipients to support ISO-8859-1
("Latin-1") encoding. When a body-part's file-name contains
non-US-ASCII characters that, however, fit into Latin-1, this option
prefers to use ISO-8859-1 encoding over UTF-8. The more recent
\{https://datatracker.ietf.org/doc/html/rfc8187#appendix-A}{RFC 8187}
no longer requires ISO-8859-1 support, so the default is to send all
non-US-ASCII file-names in UTF-8 encoding instead.
\value StrictRfc7578
This option combines other options to select strict
\l{https://datatracker.ietf.org/doc/html/rfc7578}{RFC 7578}
compliance.
*/
/*!
Constructs an empty QFormDataBuilder object.
*/
@ -419,19 +492,20 @@ QFormDataPartBuilder QFormDataBuilder::part(QAnyStringView name)
}
/*!
Constructs and returns a pointer to a QHttpMultipart object.
Constructs and returns a pointer to a QHttpMultipart object constructed
according to \a options.
\sa QHttpMultiPart
*/
std::unique_ptr<QHttpMultiPart> QFormDataBuilder::buildMultiPart()
std::unique_ptr<QHttpMultiPart> QFormDataBuilder::buildMultiPart(Options options)
{
Q_D(QFormDataBuilder);
auto multiPart = std::make_unique<QHttpMultiPart>(QHttpMultiPart::FormDataType);
for (auto &part : d->parts)
multiPart->append(part.build());
multiPart->append(part.build(options));
return multiPart;
}

View File

@ -9,6 +9,7 @@
#include <QtNetwork/qhttpmultipart.h>
#include <QtCore/qbytearray.h>
#include <QtCore/qflags.h>
#include <QtCore/qiodevice.h>
#include <QtCore/qstring.h>
@ -72,6 +73,16 @@ Q_DECLARE_SHARED(QFormDataPartBuilder)
class QFormDataBuilder
{
public:
enum class Option {
Default = 0x00,
OmitRfc8187EncodedFilename = 0x01,
UseRfc7578PercentEncodedFilename = 0x02,
PreferLatin1EncodedFilename = 0x04,
StrictRfc7578 = OmitRfc8187EncodedFilename | UseRfc7578PercentEncodedFilename,
};
Q_DECLARE_FLAGS(Options, Option)
Q_NETWORK_EXPORT QFormDataBuilder();
QFormDataBuilder(QFormDataBuilder &&other) noexcept : d_ptr(std::exchange(other.d_ptr, nullptr)) {}
@ -84,13 +95,14 @@ public:
Q_NETWORK_EXPORT ~QFormDataBuilder();
Q_NETWORK_EXPORT QFormDataPartBuilder part(QAnyStringView name);
Q_NETWORK_EXPORT std::unique_ptr<QHttpMultiPart> buildMultiPart();
Q_NETWORK_EXPORT std::unique_ptr<QHttpMultiPart> buildMultiPart(Options options = {});
private:
QFormDataBuilderPrivate *d_ptr;
Q_DECLARE_PRIVATE(QFormDataBuilder)
Q_DISABLE_COPY(QFormDataBuilder)
};
Q_DECLARE_OPERATORS_FOR_FLAGS(QFormDataBuilder::Options)
Q_DECLARE_SHARED(QFormDataBuilder)

View File

@ -18,6 +18,7 @@
#endif
#include <QtCore/qxpfunctional.h>
#include <string>
#include <type_traits>
using namespace Qt::StringLiterals;
@ -25,14 +26,15 @@ using namespace Qt::StringLiterals;
const auto CRLF = "\r\n"_ba;
Q_NEVER_INLINE static QByteArray
serialized_impl([[maybe_unused]] qxp::function_ref<QFormDataBuilder &(QFormDataBuilder &)> operations)
serialized_impl([[maybe_unused]] qxp::function_ref<QFormDataBuilder &(QFormDataBuilder &)> operations,
QFormDataBuilder::Options options = QFormDataBuilder::Option::Default)
{
#if defined(QT_UNDEFINED_SANITIZER) && !defined(QT_BUILD_INTERNAL)
QSKIP("This test requires -developer-build when --sanitize=undefined is active.");
#else
QFormDataBuilder builder;
const std::unique_ptr<QHttpMultiPart> mp = operations(builder).buildMultiPart();
const std::unique_ptr<QHttpMultiPart> mp = operations(builder).buildMultiPart(options);
auto *device = QHttpMultiPartPrivate::get(mp.get())->device;
QVERIFY(device->open(QIODeviceBase::ReadOnly));
@ -41,13 +43,14 @@ serialized_impl([[maybe_unused]] qxp::function_ref<QFormDataBuilder &(QFormDataB
}
template <typename Callable>
static QByteArray serialized(Callable operation)
static QByteArray serialized(Callable operation,
QFormDataBuilder::Options options = QFormDataBuilder::Option::Default)
{
if constexpr (std::is_void_v<std::invoke_result_t<Callable&, QFormDataBuilder&>>) {
return serialized_impl([&](auto &builder) {
operation(builder);
return std::ref(builder);
});
}, options);
} else {
return serialized_impl(std::move(operation));
}
@ -66,8 +69,8 @@ private Q_SLOTS:
void escapesBackslashAndQuotesInFilenameAndName_data();
void escapesBackslashAndQuotesInFilenameAndName();
void picksUtf8FilenameEncodingIfAsciiDontSuffice_data();
void picksUtf8FilenameEncodingIfAsciiDontSuffice();
void filenameEncoding_data();
void filenameEncoding();
void setHeadersDoesNotAffectHeaderFieldsManagedByBuilder_data();
void setHeadersDoesNotAffectHeaderFieldsManagedByBuilder();
@ -221,54 +224,108 @@ void tst_QFormDataBuilder::escapesBackslashAndQuotesInFilenameAndName()
QVERIFY(msg.contains(expected_content_disposition_data));
}
void tst_QFormDataBuilder::picksUtf8FilenameEncodingIfAsciiDontSuffice_data()
void tst_QFormDataBuilder::filenameEncoding_data()
{
static const auto contentType = "text/plain"_ba;
using Opts = QFormDataBuilder::Options;
using Opt = QFormDataBuilder::Option;
QTest::addColumn<QLatin1StringView>("name_data");
QTest::addColumn<QAnyStringView>("body_name_data");
QTest::addColumn<QByteArray>("expected_content_type_data");
QTest::addColumn<QByteArray>("expected_content_disposition_data");
QTest::addColumn<QByteArray>("content_disposition_must_not_contain_data");
QTest::addColumn<QFormDataBuilder::Options>("filename_options");
QTest::newRow("latin1-ascii") << "text"_L1 << QAnyStringView("rfc3252.txt"_L1) << "text/plain"_ba
auto addAsciiTestRows = [] (const std::string &rowName, Opts opts) {
QTest::newRow((rowName + "-L1").c_str())
<< "text"_L1 << QAnyStringView("rfc3252.txt"_L1) << contentType
<< R"(form-data; name="text"; filename="rfc3252.txt")"_ba
<< "filename*"_ba;
QTest::newRow("u8-ascii") << "text"_L1 << QAnyStringView(u8"rfc3252.txt") << "text/plain"_ba
<< "filename*"_ba << opts;
QTest::newRow((rowName + "-U8").c_str())
<< "text"_L1 << QAnyStringView(u8"rfc3252.txt") << contentType
<< R"(form-data; name="text"; filename="rfc3252.txt")"_ba
<< "filename*"_ba;
QTest::newRow("u-ascii") << "text"_L1 << QAnyStringView(u"rfc3252.txt") << "text/plain"_ba
<< "filename*"_ba << opts;
QTest::newRow((rowName + "-U").c_str())
<< "text"_L1 << QAnyStringView(u"rfc3252.txt") << contentType
<< R"(form-data; name="text"; filename="rfc3252.txt")"_ba
<< "filename*"_ba;
<< "filename*"_ba << opts;
};
addAsciiTestRows("default-ascii", Opt::Default);
addAsciiTestRows("omit-rfc8187-ascii", Opt::OmitRfc8187EncodedFilename);
addAsciiTestRows("use-rfc7578-ascii", Opt::UseRfc7578PercentEncodedFilename);
addAsciiTestRows("strict-rfc7578-ascii", Opt::StrictRfc7578);
addAsciiTestRows("prefer-latin1-ascii", Opt::PreferLatin1EncodedFilename);
auto addLatin1TestRows = [] (const std::string &rowName, const QByteArray &resultFilename,
const QByteArray &mustNotContain, Opts opts) {
// 0xF6 is 'ö', use hex value with Latin-1 to avoid interpretation as UTF-8 (0xC3 0xB6)
QTest::newRow("latin1-latin") << "text"_L1 << QAnyStringView("sz\xF6veg.txt"_L1) << "text/plain"_ba
<< R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba
<< ""_ba;
QTest::newRow("u8-latin") << "text"_L1 << QAnyStringView(u8"szöveg.txt") << "text/plain"_ba
<< R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba
<< ""_ba;
QTest::newRow("u-latin") << "text"_L1 << QAnyStringView(u"szöveg.txt") << "text/plain"_ba
<< R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba
<< ""_ba;
QTest::newRow((rowName + "-L1").c_str())
<< "text"_L1 << QAnyStringView("sz\xF6veg.txt"_L1) << contentType
<< resultFilename << mustNotContain << opts;
QTest::newRow((rowName + "-U8").c_str())
<< "text"_L1 << QAnyStringView(u8"szöveg.txt") << contentType
<< resultFilename << mustNotContain << opts;
QTest::newRow((rowName + "-U").c_str())
<< "text"_L1 << QAnyStringView(u"szöveg.txt") << contentType
<< resultFilename << mustNotContain << opts;
};
addLatin1TestRows("default-latin1",
R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba,
""_ba, Opt::Default);
addLatin1TestRows("omit-rfc8187-latin1",
R"(form-data; name="text"; filename="szöveg.txt")"_ba,
"filename*"_ba, Opt::OmitRfc8187EncodedFilename);
addLatin1TestRows("use-rfc7578-latin1",
R"(form-data; name="text"; filename="sz%C3%B6veg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba,
""_ba, Opt::UseRfc7578PercentEncodedFilename);
addLatin1TestRows("strict-rfc7578-latin1",
R"(form-data; name="text"; filename="sz%C3%B6veg.txt")"_ba,
"filename*"_ba, Opt::StrictRfc7578);
addLatin1TestRows("prefer-latin1-latin1",
"form-data; name=\"text\"; filename=\"sz\xF6veg.txt\"; filename*=ISO-8859-1''sz%F6veg.txt"_ba,
""_ba, Opt::PreferLatin1EncodedFilename);
QTest::newRow("u8-u8") << "text"_L1 << QAnyStringView(u8"テキスト.txt") << "text/plain"_ba
<< R"(form-data; name="text"; filename=".txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba
<< ""_ba;
auto addUtf8TestRows = [] (const std::string &rowName, const QByteArray &resultFilename,
const QByteArray &mustNotContain, Opts opts) {
QTest::newRow((rowName + "-U8").c_str())
<< "text"_L1 << QAnyStringView(u8"テキスト.txt") << contentType
<< resultFilename << mustNotContain << opts;
};
addUtf8TestRows("default-utf8",
R"(form-data; name="text"; filename=".txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba,
""_ba, Opt::Default);
addUtf8TestRows("omit-rfc8187-utf8",
R"(form-data; name="text"; filename=".txt")"_ba,
"filename*"_ba, Opt::OmitRfc8187EncodedFilename);
addUtf8TestRows("use-rfc7578-utf8",
R"(form-data; name="text"; filename="%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba,
""_ba, Opt::UseRfc7578PercentEncodedFilename);
addUtf8TestRows("strict-rfc7578-utf8",
R"(form-data; name="text"; filename="%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt")"_ba,
"filename*"_ba, Opt::StrictRfc7578);
addUtf8TestRows("strict-rfc7578-utf8",
R"(form-data; name="text"; filename="%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt")"_ba,
"filename*"_ba, Opt::StrictRfc7578);
addUtf8TestRows("prefer-latin1-utf8",
R"(form-data; name="text"; filename=".txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba,
""_ba, Opt::PreferLatin1EncodedFilename);
}
void tst_QFormDataBuilder::picksUtf8FilenameEncodingIfAsciiDontSuffice()
void tst_QFormDataBuilder::filenameEncoding()
{
QFETCH(const QLatin1StringView, name_data);
QFETCH(const QAnyStringView, body_name_data);
QFETCH(const QByteArray, expected_content_type_data);
QFETCH(const QByteArray, expected_content_disposition_data);
QFETCH(const QByteArray, content_disposition_must_not_contain_data);
QFETCH(const QFormDataBuilder::Options, filename_options);
QBuffer buff;
QVERIFY(buff.open(QIODevice::ReadOnly));
const auto msg = serialized([&](auto &builder) {
builder.part(name_data).setBodyDevice(&buff, body_name_data);
});
}, filename_options);
QVERIFY2(msg.contains(expected_content_type_data),
"content-type not found : " + expected_content_type_data);