From bdb713b1b7eb0b47c249b3d7e5edea17ccb39724 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Thu, 15 Feb 2024 15:28:58 -0800 Subject: [PATCH] QXmlStreamWriter: fix attempts to write bad QStrings We weren't doing the decoding from UTF-16 to UTF-32, so weren't catching invalid code sequences [ChangeLog][QtCore][QXmlStreamWriter] The class now rejects writing UTF-8 and UTF-16 invalid input (improper code unit sequences). Task-number: QTBUG-122241 Pick-to: 6.6 6.5 Change-Id: I83dda2d36c904517b3c0fffd17b42d17c637fdc4 Reviewed-by: Mate Barany (cherry picked from commit c25541e9ac4c7021c69c600906e5ca8c80f18401) Reviewed-by: Qt Cherry-pick Bot --- src/corelib/serialization/qxmlstream.cpp | 8 ++++++-- .../qxmlstream/tst_qxmlstream.cpp | 20 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp index 4061ebf97b8..c802720dd8f 100644 --- a/src/corelib/serialization/qxmlstream.cpp +++ b/src/corelib/serialization/qxmlstream.cpp @@ -22,6 +22,7 @@ #include "qxmlstream_p.h" #include "qxmlstreamparser_p.h" #include +#include QT_BEGIN_NAMESPACE @@ -2971,9 +2972,12 @@ void QXmlStreamWriterPrivate::writeEscaped(QAnyStringView s, bool escapeWhitespa } }; struct NextUtf16 { - char32_t operator()(const QChar *&it, const QChar *) const + char32_t operator()(const QChar *&it, const QChar *end) const { - return (it++)->unicode(); + QStringIterator decoder(it, end); + char32_t result = decoder.next(u'\0'); + it = decoder.position(); + return result; } }; diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp index 2ac0f0a9769..e9a6f46d041 100644 --- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp +++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp @@ -576,6 +576,8 @@ private slots: void writeAttribute() const; void writeBadCharactersUtf8_data() const; void writeBadCharactersUtf8() const; + void writeBadCharactersUtf16_data() const; + void writeBadCharactersUtf16() const; void entitiesAndWhitespace_1() const; void entitiesAndWhitespace_2() const; void testFalsePrematureError() const; @@ -1505,6 +1507,24 @@ void tst_QXmlStream::writeBadCharactersUtf8() const QVERIFY(writer.hasError()); } +void tst_QXmlStream::writeBadCharactersUtf16_data() const +{ + QTest::addColumn("input"); + QTest::addRow("low-surrogate") << u"\xdc00"_s; + QTest::addRow("high-surrogate") << u"\xd800"_s; + QTest::addRow("inverted-surrogate-pair") << u"\xdc00\xd800"_s; + QTest::addRow("high-surrogate+non-surrogate") << u"\xd800z"_s; +} + +void tst_QXmlStream::writeBadCharactersUtf16() const +{ + QFETCH(QString, input); + QString target; + QXmlStreamWriter writer(&target); + writer.writeTextElement("a", input); + QVERIFY(writer.hasError()); +} + void tst_QXmlStream::entitiesAndWhitespace_1() const { QXmlStreamReader reader(QLatin1String("&extEnt;"));