From 678c839575d2624995e1d6b9e7a054972e68ea9b Mon Sep 17 00:00:00 2001 From: Marc Mutz Date: Tue, 18 Jun 2024 18:22:53 +0200 Subject: [PATCH] QFormDataBuilder: add options There are (at least) three RFCs, all non-obsolete, purporting to guard the formatting of multipart/form-data filename parameters, and they all disagree: RFC 6266, RFC 7578 and RFC 8187. There is also considerable implementation divergence. So, to not have to hard-code any one of the different strategies, add Options controlling the formatting. Found to be required in implementation review. Task-number: QTBUG-125985 Change-Id: Ibc82ff8a3460580ee70eafcaf9b88de36751940d Reviewed-by: Marc Mutz Reviewed-by: Mate Barany (cherry picked from commit 49ab4f156b7d1a0593f25680d0b841d2dc4c348e) Reviewed-by: Qt Cherry-pick Bot --- src/network/access/qformdatabuilder.cpp | 94 ++++++++++++-- src/network/access/qformdatabuilder.h | 14 ++- .../qformdatabuilder/tst_qformdatabuilder.cpp | 119 +++++++++++++----- 3 files changed, 185 insertions(+), 42 deletions(-) diff --git a/src/network/access/qformdatabuilder.cpp b/src/network/access/qformdatabuilder.cpp index 9f5cc683d0f..6ab8d669a35 100644 --- a/src/network/access/qformdatabuilder.cpp +++ b/src/network/access/qformdatabuilder.cpp @@ -34,7 +34,7 @@ class QFormDataPartBuilderPrivate { public: explicit QFormDataPartBuilderPrivate(QAnyStringView name); - QHttpPart build(); + QHttpPart build(QFormDataBuilder::Options options); QString m_name; QByteArray m_mimeType; @@ -260,10 +260,12 @@ QFormDataPartBuilder QFormDataPartBuilder::setHeaders(const QHttpHeaders &header header. */ -QHttpPart QFormDataPartBuilderPrivate::build() +QHttpPart QFormDataPartBuilderPrivate::build(QFormDataBuilder::Options options) { QHttpPart httpPart; + using Opt = QFormDataBuilder::Option; + QByteArray headerValue; headerValue += "form-data; name=\""; @@ -271,18 +273,39 @@ QHttpPart QFormDataPartBuilderPrivate::build() headerValue += "\""; if (!m_originalBodyName.isNull()) { - const bool utf8 = !QtPrivate::isAscii(m_originalBodyName); - const auto enc = utf8 ? m_originalBodyName.toUtf8() : m_originalBodyName.toLatin1(); + + enum class Encoding { ASCII, Latin1, Utf8 } encoding = Encoding::ASCII; + for (QChar c : std::as_const(m_originalBodyName)) { + if (c > u'\xff') { + encoding = Encoding::Utf8; + break; + } else if (c > u'\x7f') { + encoding = Encoding::Latin1; + } + } + QByteArray enc; + if ((options & Opt::PreferLatin1EncodedFilename) && encoding != Encoding::Utf8) + enc = m_originalBodyName.toLatin1(); + else + enc = m_originalBodyName.toUtf8(); + headerValue += "; filename=\""; - escapeNameAndAppend(headerValue, enc); + if (options & Opt::UseRfc7578PercentEncodedFilename) + headerValue += enc.toPercentEncoding(); + else + escapeNameAndAppend(headerValue, enc); headerValue += "\""; - if (utf8) { + if (encoding != Encoding::ASCII && !(options & Opt::OmitRfc8187EncodedFilename)) { // For 'filename*' production see // https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1 // For providing both filename and filename* parameters see // https://datatracker.ietf.org/doc/html/rfc6266#section-4.3 and // https://datatracker.ietf.org/doc/html/rfc8187#section-4.2 - headerValue += "; filename*=UTF-8''" + enc.toPercentEncoding(); + if ((options & Opt::PreferLatin1EncodedFilename) && encoding == Encoding::Latin1) + headerValue += "; filename*=ISO-8859-1''"; + else + headerValue += "; filename*=UTF-8''"; + headerValue += enc.toPercentEncoding(); } } @@ -366,6 +389,56 @@ const QFormDataPartBuilderPrivate* QFormDataPartBuilder::d_func() const return &d->parts[m_index]; } +/*! + \enum QFormDataBuilder::Option + + Options controlling buildMultiPart(). + + Several current RFCs disagree on how, exactly, to format + \c{multipart/form-data}. Instead of hard-coding any one RFC, these options + give you control over which RFC to follow. + + \value Default The default values, designed to maximize interoperability in + general. All options named below are off. + + \value OmitRfc8187EncodedFilename + When a body-part's file-name contains non-US-ASCII characters, + \l{https://datatracker.ietf.org/doc/html/rfc6266#section-4.3} + {RFC 6266 Section 4.3} suggests to use + \l{https://datatracker.ietf.org/doc/html/rfc8187}{RFC 8187}-style + encoding (\c{filename*=utf-8''...}). The more recent + \l{https://datatracker.ietf.org/doc/html/rfc7578#section-4.2} + {RFC 7578 Section 4.2}, however, bans the use of that mechanism. + Both RFCs are current as of this writing, so this option allows you + to choose which one to follow. The default is to include the + RFC 8187-encoded \c{filename*} alongside the unencoded \c{filename}, + as suggested by RFC 6266. + + \value UseRfc7578PercentEncodedFilename + When a body-part's file-name contains non-US-ASCII characters, + \l{https://datatracker.ietf.org/doc/html/rfc7578#section-4.2} + {RFC 7578 Section 4.2} suggests to use percent-encoding of the octets + of the UTF-8-encoded file-name. It goes on to note that many + implementations, however, do \e{not} percent-encode the UTF-8-encoded + file-name, but just emit "raw" UTF-8 (with \c{"} and \c{\} escaped + using \c{\}). This is the default of QFormDataBuilder, too. + + \value PreferLatin1EncodedFilename + \l{https://datatracker.ietf.org/doc/html/rfc5987#section-3.2} + {RFC 5987 Section 3.2} required recipients to support ISO-8859-1 + ("Latin-1") encoding. When a body-part's file-name contains + non-US-ASCII characters that, however, fit into Latin-1, this option + prefers to use ISO-8859-1 encoding over UTF-8. The more recent + \{https://datatracker.ietf.org/doc/html/rfc8187#appendix-A}{RFC 8187} + no longer requires ISO-8859-1 support, so the default is to send all + non-US-ASCII file-names in UTF-8 encoding instead. + + \value StrictRfc7578 + This option combines other options to select strict + \l{https://datatracker.ietf.org/doc/html/rfc7578}{RFC 7578} + compliance. +*/ + /*! Constructs an empty QFormDataBuilder object. */ @@ -419,19 +492,20 @@ QFormDataPartBuilder QFormDataBuilder::part(QAnyStringView name) } /*! - Constructs and returns a pointer to a QHttpMultipart object. + Constructs and returns a pointer to a QHttpMultipart object constructed + according to \a options. \sa QHttpMultiPart */ -std::unique_ptr QFormDataBuilder::buildMultiPart() +std::unique_ptr QFormDataBuilder::buildMultiPart(Options options) { Q_D(QFormDataBuilder); auto multiPart = std::make_unique(QHttpMultiPart::FormDataType); for (auto &part : d->parts) - multiPart->append(part.build()); + multiPart->append(part.build(options)); return multiPart; } diff --git a/src/network/access/qformdatabuilder.h b/src/network/access/qformdatabuilder.h index f1f148147f6..908381d649e 100644 --- a/src/network/access/qformdatabuilder.h +++ b/src/network/access/qformdatabuilder.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -72,6 +73,16 @@ Q_DECLARE_SHARED(QFormDataPartBuilder) class QFormDataBuilder { public: + enum class Option { + Default = 0x00, + OmitRfc8187EncodedFilename = 0x01, + UseRfc7578PercentEncodedFilename = 0x02, + PreferLatin1EncodedFilename = 0x04, + + StrictRfc7578 = OmitRfc8187EncodedFilename | UseRfc7578PercentEncodedFilename, + }; + Q_DECLARE_FLAGS(Options, Option) + Q_NETWORK_EXPORT QFormDataBuilder(); QFormDataBuilder(QFormDataBuilder &&other) noexcept : d_ptr(std::exchange(other.d_ptr, nullptr)) {} @@ -84,13 +95,14 @@ public: Q_NETWORK_EXPORT ~QFormDataBuilder(); Q_NETWORK_EXPORT QFormDataPartBuilder part(QAnyStringView name); - Q_NETWORK_EXPORT std::unique_ptr buildMultiPart(); + Q_NETWORK_EXPORT std::unique_ptr buildMultiPart(Options options = {}); private: QFormDataBuilderPrivate *d_ptr; Q_DECLARE_PRIVATE(QFormDataBuilder) Q_DISABLE_COPY(QFormDataBuilder) }; +Q_DECLARE_OPERATORS_FOR_FLAGS(QFormDataBuilder::Options) Q_DECLARE_SHARED(QFormDataBuilder) diff --git a/tests/auto/network/access/qformdatabuilder/tst_qformdatabuilder.cpp b/tests/auto/network/access/qformdatabuilder/tst_qformdatabuilder.cpp index 8cd2665d5ff..5789319f70f 100644 --- a/tests/auto/network/access/qformdatabuilder/tst_qformdatabuilder.cpp +++ b/tests/auto/network/access/qformdatabuilder/tst_qformdatabuilder.cpp @@ -18,6 +18,7 @@ #endif #include +#include #include using namespace Qt::StringLiterals; @@ -25,14 +26,15 @@ using namespace Qt::StringLiterals; const auto CRLF = "\r\n"_ba; Q_NEVER_INLINE static QByteArray -serialized_impl([[maybe_unused]] qxp::function_ref operations) +serialized_impl([[maybe_unused]] qxp::function_ref operations, + QFormDataBuilder::Options options = QFormDataBuilder::Option::Default) { #if defined(QT_UNDEFINED_SANITIZER) && !defined(QT_BUILD_INTERNAL) QSKIP("This test requires -developer-build when --sanitize=undefined is active."); #else QFormDataBuilder builder; - const std::unique_ptr mp = operations(builder).buildMultiPart(); + const std::unique_ptr mp = operations(builder).buildMultiPart(options); auto *device = QHttpMultiPartPrivate::get(mp.get())->device; QVERIFY(device->open(QIODeviceBase::ReadOnly)); @@ -41,13 +43,14 @@ serialized_impl([[maybe_unused]] qxp::function_ref -static QByteArray serialized(Callable operation) +static QByteArray serialized(Callable operation, + QFormDataBuilder::Options options = QFormDataBuilder::Option::Default) { if constexpr (std::is_void_v>) { return serialized_impl([&](auto &builder) { operation(builder); return std::ref(builder); - }); + }, options); } else { return serialized_impl(std::move(operation)); } @@ -66,8 +69,8 @@ private Q_SLOTS: void escapesBackslashAndQuotesInFilenameAndName_data(); void escapesBackslashAndQuotesInFilenameAndName(); - void picksUtf8FilenameEncodingIfAsciiDontSuffice_data(); - void picksUtf8FilenameEncodingIfAsciiDontSuffice(); + void filenameEncoding_data(); + void filenameEncoding(); void setHeadersDoesNotAffectHeaderFieldsManagedByBuilder_data(); void setHeadersDoesNotAffectHeaderFieldsManagedByBuilder(); @@ -221,54 +224,108 @@ void tst_QFormDataBuilder::escapesBackslashAndQuotesInFilenameAndName() QVERIFY(msg.contains(expected_content_disposition_data)); } -void tst_QFormDataBuilder::picksUtf8FilenameEncodingIfAsciiDontSuffice_data() +void tst_QFormDataBuilder::filenameEncoding_data() { + static const auto contentType = "text/plain"_ba; + using Opts = QFormDataBuilder::Options; + using Opt = QFormDataBuilder::Option; QTest::addColumn("name_data"); QTest::addColumn("body_name_data"); QTest::addColumn("expected_content_type_data"); QTest::addColumn("expected_content_disposition_data"); QTest::addColumn("content_disposition_must_not_contain_data"); + QTest::addColumn("filename_options"); - QTest::newRow("latin1-ascii") << "text"_L1 << QAnyStringView("rfc3252.txt"_L1) << "text/plain"_ba - << R"(form-data; name="text"; filename="rfc3252.txt")"_ba - << "filename*"_ba; - QTest::newRow("u8-ascii") << "text"_L1 << QAnyStringView(u8"rfc3252.txt") << "text/plain"_ba - << R"(form-data; name="text"; filename="rfc3252.txt")"_ba - << "filename*"_ba; - QTest::newRow("u-ascii") << "text"_L1 << QAnyStringView(u"rfc3252.txt") << "text/plain"_ba - << R"(form-data; name="text"; filename="rfc3252.txt")"_ba - << "filename*"_ba; + auto addAsciiTestRows = [] (const std::string &rowName, Opts opts) { + QTest::newRow((rowName + "-L1").c_str()) + << "text"_L1 << QAnyStringView("rfc3252.txt"_L1) << contentType + << R"(form-data; name="text"; filename="rfc3252.txt")"_ba + << "filename*"_ba << opts; + QTest::newRow((rowName + "-U8").c_str()) + << "text"_L1 << QAnyStringView(u8"rfc3252.txt") << contentType + << R"(form-data; name="text"; filename="rfc3252.txt")"_ba + << "filename*"_ba << opts; + QTest::newRow((rowName + "-U").c_str()) + << "text"_L1 << QAnyStringView(u"rfc3252.txt") << contentType + << R"(form-data; name="text"; filename="rfc3252.txt")"_ba + << "filename*"_ba << opts; + }; + addAsciiTestRows("default-ascii", Opt::Default); + addAsciiTestRows("omit-rfc8187-ascii", Opt::OmitRfc8187EncodedFilename); + addAsciiTestRows("use-rfc7578-ascii", Opt::UseRfc7578PercentEncodedFilename); + addAsciiTestRows("strict-rfc7578-ascii", Opt::StrictRfc7578); + addAsciiTestRows("prefer-latin1-ascii", Opt::PreferLatin1EncodedFilename); - // 0xF6 is 'ö', use hex value with Latin-1 to avoid interpretation as UTF-8 (0xC3 0xB6) - QTest::newRow("latin1-latin") << "text"_L1 << QAnyStringView("sz\xF6veg.txt"_L1) << "text/plain"_ba - << R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba - << ""_ba; - QTest::newRow("u8-latin") << "text"_L1 << QAnyStringView(u8"szöveg.txt") << "text/plain"_ba - << R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba - << ""_ba; - QTest::newRow("u-latin") << "text"_L1 << QAnyStringView(u"szöveg.txt") << "text/plain"_ba - << R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba - << ""_ba; + auto addLatin1TestRows = [] (const std::string &rowName, const QByteArray &resultFilename, + const QByteArray &mustNotContain, Opts opts) { + // 0xF6 is 'ö', use hex value with Latin-1 to avoid interpretation as UTF-8 (0xC3 0xB6) + QTest::newRow((rowName + "-L1").c_str()) + << "text"_L1 << QAnyStringView("sz\xF6veg.txt"_L1) << contentType + << resultFilename << mustNotContain << opts; + QTest::newRow((rowName + "-U8").c_str()) + << "text"_L1 << QAnyStringView(u8"szöveg.txt") << contentType + << resultFilename << mustNotContain << opts; + QTest::newRow((rowName + "-U").c_str()) + << "text"_L1 << QAnyStringView(u"szöveg.txt") << contentType + << resultFilename << mustNotContain << opts; + }; + addLatin1TestRows("default-latin1", + R"(form-data; name="text"; filename="szöveg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba, + ""_ba, Opt::Default); + addLatin1TestRows("omit-rfc8187-latin1", + R"(form-data; name="text"; filename="szöveg.txt")"_ba, + "filename*"_ba, Opt::OmitRfc8187EncodedFilename); + addLatin1TestRows("use-rfc7578-latin1", + R"(form-data; name="text"; filename="sz%C3%B6veg.txt"; filename*=UTF-8''sz%C3%B6veg.txt)"_ba, + ""_ba, Opt::UseRfc7578PercentEncodedFilename); + addLatin1TestRows("strict-rfc7578-latin1", + R"(form-data; name="text"; filename="sz%C3%B6veg.txt")"_ba, + "filename*"_ba, Opt::StrictRfc7578); + addLatin1TestRows("prefer-latin1-latin1", + "form-data; name=\"text\"; filename=\"sz\xF6veg.txt\"; filename*=ISO-8859-1''sz%F6veg.txt"_ba, + ""_ba, Opt::PreferLatin1EncodedFilename); - QTest::newRow("u8-u8") << "text"_L1 << QAnyStringView(u8"テキスト.txt") << "text/plain"_ba - << R"(form-data; name="text"; filename="テキスト.txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba - << ""_ba; + auto addUtf8TestRows = [] (const std::string &rowName, const QByteArray &resultFilename, + const QByteArray &mustNotContain, Opts opts) { + QTest::newRow((rowName + "-U8").c_str()) + << "text"_L1 << QAnyStringView(u8"テキスト.txt") << contentType + << resultFilename << mustNotContain << opts; + }; + addUtf8TestRows("default-utf8", + R"(form-data; name="text"; filename="テキスト.txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba, + ""_ba, Opt::Default); + addUtf8TestRows("omit-rfc8187-utf8", + R"(form-data; name="text"; filename="テキスト.txt")"_ba, + "filename*"_ba, Opt::OmitRfc8187EncodedFilename); + addUtf8TestRows("use-rfc7578-utf8", + R"(form-data; name="text"; filename="%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba, + ""_ba, Opt::UseRfc7578PercentEncodedFilename); + addUtf8TestRows("strict-rfc7578-utf8", + R"(form-data; name="text"; filename="%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt")"_ba, + "filename*"_ba, Opt::StrictRfc7578); + addUtf8TestRows("strict-rfc7578-utf8", + R"(form-data; name="text"; filename="%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt")"_ba, + "filename*"_ba, Opt::StrictRfc7578); + addUtf8TestRows("prefer-latin1-utf8", + R"(form-data; name="text"; filename="テキスト.txt"; filename*=UTF-8''%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88.txt)"_ba, + ""_ba, Opt::PreferLatin1EncodedFilename); } -void tst_QFormDataBuilder::picksUtf8FilenameEncodingIfAsciiDontSuffice() +void tst_QFormDataBuilder::filenameEncoding() { QFETCH(const QLatin1StringView, name_data); QFETCH(const QAnyStringView, body_name_data); QFETCH(const QByteArray, expected_content_type_data); QFETCH(const QByteArray, expected_content_disposition_data); QFETCH(const QByteArray, content_disposition_must_not_contain_data); + QFETCH(const QFormDataBuilder::Options, filename_options); QBuffer buff; QVERIFY(buff.open(QIODevice::ReadOnly)); const auto msg = serialized([&](auto &builder) { builder.part(name_data).setBodyDevice(&buff, body_name_data); - }); + }, filename_options); QVERIFY2(msg.contains(expected_content_type_data), "content-type not found : " + expected_content_type_data);