From ec8f043ec514d9556f5e8a1c98b99fa562e813a9 Mon Sep 17 00:00:00 2001 From: Ivan Solovev Date: Mon, 31 Mar 2025 16:52:20 +0200 Subject: [PATCH] QXmlStreamReader: fix addData() unnecessary conversion to UTF-8 The addData(QASV) overload was unconditionally converting UTF-16 and Latin1 data to UTF-8. However, if we already started reading the XML document, and we know that its encoding is UTF-16 or Latin1, then we know for sure that the new data has to be added as-is. Amends 6bc227a06a0d1392d220aa79ddb1cdc145d4f76e. [ChangeLog][QtCore][QXmlStreamReader] Fixed a bug when addData(QAnyStringView) was incorrectly recoding UTF-16 and Latin1 data to UTF-8, thus potentially mangling it. Fixes: QTBUG-135129 Pick-to: 6.8 6.5 Change-Id: Ie1171a5e5596b72a6f160031a4c5a9df3baae4fd Reviewed-by: Thiago Macieira (cherry picked from commit b6b725aef59390f403a1a39f49d1318c48f13c07) Reviewed-by: Qt Cherry-pick Bot --- src/corelib/serialization/qxmlstream.cpp | 23 +++++++++++++++++++ .../qxmlstream/tst_qxmlstream.cpp | 8 ++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp index 5d55c183a2c..a01e98469de 100644 --- a/src/corelib/serialization/qxmlstream.cpp +++ b/src/corelib/serialization/qxmlstream.cpp @@ -553,6 +553,15 @@ QIODevice *QXmlStreamReader::device() const \sa readNext(), clear() */ +static bool isDecoderForEncoding(const QStringDecoder &dec, QStringDecoder::Encoding enc) +{ + if (!dec.isValid()) + return false; + + const QAnyStringView nameView{dec.name()}; + return !nameView.empty() && nameView == QStringDecoder::nameForEncoding(enc); +} + /*! Adds more \a data for the reader to read. This function does nothing if the reader has a device(). @@ -567,11 +576,25 @@ void QXmlStreamReader::addData(QAnyStringView data) Q_D(QXmlStreamReader); data.visit([this, d](auto data) { if constexpr (std::is_same_v) { + if (d->lockEncoding && isDecoderForEncoding(d->decoder, QStringDecoder::Utf16)) { + // We already expect the data in the proper encoding, no need + // to recode the data. + addDataImpl(QByteArray{reinterpret_cast(data.utf16()), + data.size() * 2}); + return; + } + // keep the pre-existing behavior d->lockEncoding = true; if (!d->decoder.isValid()) d->decoder = QStringDecoder(QStringDecoder::Utf8); addDataImpl(data.toUtf8()); } else if constexpr (std::is_same_v) { + if (d->lockEncoding && isDecoderForEncoding(d->decoder, QStringDecoder::Latin1)) { + // We already expect the data in the proper encoding, no need + // to recode the data. + addDataImpl(QByteArray{data.data(), data.size()}); + return; + } // Conversion to a QString is required, to avoid breaking // pre-existing (before porting to QAnyStringView) behavior. d->lockEncoding = true; diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp index 76e53874480..79c804332e2 100644 --- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp +++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp @@ -1275,7 +1275,6 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding_data() QStringConverter::Utf16, u"M\u00E5rten"_s); row("l1+utf8", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"), QStringConverter::Utf8, QString::fromUtf8("M\xC3\xA5rten")); - // Even this fails, because we internally convert the second L1 to UTF-8! row("l1+l1", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"), QStringConverter::Latin1, QString::fromLatin1("M\xE5rten")); @@ -1317,11 +1316,14 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding() default: Q_UNREACHABLE(); } - QEXPECT_FAIL("utf16+utf16", "QTBUG-135129: Parser expected UTF-16, but got UTF-8", Abort); QVERIFY(reader.readNextStartElement()); // a text = reader.readElementText(); - QEXPECT_FAIL("", "Parser expects the data in the initial encoding, but we convert to UTF-8", + QEXPECT_FAIL("l1+utf16", + "Parser expects the data in the initial encoding, but we convert to UTF-8", + Continue); + QEXPECT_FAIL("l1+utf8", + "Parser expects the data in the initial encoding, but we convert to UTF-8", Continue); QCOMPARE(text, expectedNextElementText); }