diff --git a/src/corelib/io/qdataurl.cpp b/src/corelib/io/qdataurl.cpp index ef468f2ea16..eca94443375 100644 --- a/src/corelib/io/qdataurl.cpp +++ b/src/corelib/io/qdataurl.cpp @@ -17,6 +17,15 @@ using namespace Qt::Literals; */ Q_CORE_EXPORT bool qDecodeDataUrl(const QUrl &uri, QString &mimeType, QByteArray &payload) { + /* https://www.rfc-editor.org/rfc/rfc2397.html + + data:[][;base64], + dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + mediatype := [ type "/" subtype ] *( ";" parameter ) + data := *urlchar + parameter := attribute "=" value + */ + if (uri.scheme() != "data"_L1 || !uri.host().isEmpty()) return false; @@ -48,20 +57,36 @@ Q_CORE_EXPORT bool qDecodeDataUrl(const QUrl &uri, QString &mimeType, QByteArray data.chop(base64.size()); } - QLatin1StringView textPlain; + QLatin1StringView mime; + QLatin1StringView charsetParam; constexpr auto charset = "charset"_L1; - if (data.startsWith(charset, Qt::CaseInsensitive)) { - QLatin1StringView copy = data.sliced(charset.size()); - while (copy.startsWith(u' ')) - copy.slice(1); - if (copy.startsWith(u'=')) - textPlain = "text/plain;"_L1; + bool first = true; + for (auto part : qTokenize(data, u';', Qt::SkipEmptyParts)) { + part = part.trimmed(); + if (first) { + if (part.contains(u'/')) + mime = part; + first = false; + } + // Minimal changes, e.g. if it's "charset=;" or "charset;" without + // an encoding, leave it as-is + if (part.startsWith(charset, Qt::CaseInsensitive)) + charsetParam = part; + + if (!mime.isEmpty() && !charsetParam.isEmpty()) + break; } - if (!data.isEmpty()) - mimeType = textPlain + data.trimmed(); + if (mime.isEmpty()) { + mime = "text/plain"_L1; + if (charsetParam.isEmpty()) + charsetParam = "charset=US-ASCII"_L1; + } + if (!charsetParam.isEmpty()) + mimeType = mime + u';' + charsetParam; else - mimeType = QStringLiteral("text/plain;charset=US-ASCII"); + mimeType = mime; + return true; } diff --git a/tests/auto/corelib/io/qdataurl/CMakeLists.txt b/tests/auto/corelib/io/qdataurl/CMakeLists.txt index 4157db90d83..654c1ca2c89 100644 --- a/tests/auto/corelib/io/qdataurl/CMakeLists.txt +++ b/tests/auto/corelib/io/qdataurl/CMakeLists.txt @@ -16,4 +16,5 @@ qt_internal_add_test(tst_qdataurl tst_qdataurl.cpp LIBRARIES Qt::CorePrivate + TESTDATA "arrow-down-16.png" ) diff --git a/tests/auto/corelib/io/qdataurl/arrow-down-16.png b/tests/auto/corelib/io/qdataurl/arrow-down-16.png new file mode 100644 index 00000000000..dc88e13a885 Binary files /dev/null and b/tests/auto/corelib/io/qdataurl/arrow-down-16.png differ diff --git a/tests/auto/corelib/io/qdataurl/tst_qdataurl.cpp b/tests/auto/corelib/io/qdataurl/tst_qdataurl.cpp index 9c2276d99fa..2936e20d15d 100644 --- a/tests/auto/corelib/io/qdataurl/tst_qdataurl.cpp +++ b/tests/auto/corelib/io/qdataurl/tst_qdataurl.cpp @@ -32,10 +32,14 @@ void tst_QDataUrl::decode_data() row("malformed-host2", "data://text/plain;charset=ISO-8859-1", false); row("malformed-host3", "data://test.com/,", false); row("malformed-no-comma", "data:text/plain", false); + + constexpr auto defaultMimeType = "text/plain;charset=US-ASCII"_L1; + row("emptyData-default-mimetype", "data:,", true, "text/plain;charset=US-ASCII"_L1, ""); row("emptyData-only-charset", "data:charset=ISO-8859-1,", true, "text/plain;charset=ISO-8859-1"_L1, ""); + row("alreadyPercentageEncoded", "data:text/plain,%E2%88%9A", true, "text/plain"_L1, QByteArray::fromPercentEncoding("%E2%88%9A")); row("everythingIsCaseInsensitive", "Data:texT/PlaiN;charSet=iSo-8859-1;Base64,SGVsbG8=", true, @@ -43,7 +47,28 @@ void tst_QDataUrl::decode_data() row("spacesAroundCharset", "data:%20charset%20%20=%20UTF-8,Hello", true, "text/plain;charset = UTF-8"_L1, QByteArrayLiteral("Hello")); row("prematureCharsetEnd", "data:charset,", true, - "charset", ""); // nonsense result, but don't crash + "text/plain;charset"_L1, ""); // nonsense result, but don't crash + row("prematureCharsetEnd-no-encoding", "data:charset=,", true, + "text/plain;charset="_L1, ""); // nonsense result, but don't crash + row("charset-value-as-quoted-string", "data:charset=%22UTF-8%22,Hello", true, + "text/plain;charset=\"UTF-8\""_L1, "Hello"_ba); + + row("extraparameter-before-charset", "data:;extraparameter=foo;charset=ISO-8859-1,Hello", true, + "text/plain;charset=ISO-8859-1", "Hello"); + row("extraparameter-after-charset", "data:charset=ISO-8859-1;extraparameter=foo,Hello", true, + "text/plain;charset=ISO-8859-1", "Hello"); + row("content-type-parsing-slash-in-mimetype", + "data:charset=UTF-8;alternate=\"application/octet-stream\";bar=baz,", true, + "text/plain;charset=UTF-8", ""); + row("not-real-charset", "data:incharsetter=true,", true, defaultMimeType, ""); + + QString path = QFINDTESTDATA("arrow-down-16.png"); + QFile img(path); + QVERIFY(img.open(QFile::ReadOnly)); + QByteArray imageData = img.readAll(); + QByteArray base64 = imageData.toBase64(); + row("image-png", "data:image/png;base64," + base64, true, + "image/png", imageData); } void tst_QDataUrl::decode()