QXmlStreamReader: fix addData() unnecessary conversion to UTF-8

The addData(QASV) overload was unconditionally converting UTF-16 and
Latin1 data to UTF-8.
However, if we already started reading the XML document, and we know
that its encoding is UTF-16 or Latin1, then we know for sure that the
new data has to be added as-is.

Amends 6bc227a06a0d1392d220aa79ddb1cdc145d4f76e.

[ChangeLog][QtCore][QXmlStreamReader] Fixed a bug when
addData(QAnyStringView) was incorrectly recoding UTF-16 and Latin1
data to UTF-8, thus potentially mangling it.

Fixes: QTBUG-135129
Pick-to: 6.8 6.5
Change-Id: Ie1171a5e5596b72a6f160031a4c5a9df3baae4fd
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
(cherry picked from commit b6b725aef59390f403a1a39f49d1318c48f13c07)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
Ivan Solovev 2025-03-31 16:52:20 +02:00 committed by Qt Cherry-pick Bot
parent a65a730ebf
commit ec8f043ec5
2 changed files with 28 additions and 3 deletions

View File

@ -553,6 +553,15 @@ QIODevice *QXmlStreamReader::device() const
\sa readNext(), clear()
*/
static bool isDecoderForEncoding(const QStringDecoder &dec, QStringDecoder::Encoding enc)
{
if (!dec.isValid())
return false;
const QAnyStringView nameView{dec.name()};
return !nameView.empty() && nameView == QStringDecoder::nameForEncoding(enc);
}
/*!
Adds more \a data for the reader to read. This function does
nothing if the reader has a device().
@ -567,11 +576,25 @@ void QXmlStreamReader::addData(QAnyStringView data)
Q_D(QXmlStreamReader);
data.visit([this, d](auto data) {
if constexpr (std::is_same_v<decltype(data), QStringView>) {
if (d->lockEncoding && isDecoderForEncoding(d->decoder, QStringDecoder::Utf16)) {
// We already expect the data in the proper encoding, no need
// to recode the data.
addDataImpl(QByteArray{reinterpret_cast<const char *>(data.utf16()),
data.size() * 2});
return;
}
// keep the pre-existing behavior
d->lockEncoding = true;
if (!d->decoder.isValid())
d->decoder = QStringDecoder(QStringDecoder::Utf8);
addDataImpl(data.toUtf8());
} else if constexpr (std::is_same_v<decltype(data), QLatin1StringView>) {
if (d->lockEncoding && isDecoderForEncoding(d->decoder, QStringDecoder::Latin1)) {
// We already expect the data in the proper encoding, no need
// to recode the data.
addDataImpl(QByteArray{data.data(), data.size()});
return;
}
// Conversion to a QString is required, to avoid breaking
// pre-existing (before porting to QAnyStringView) behavior.
d->lockEncoding = true;

View File

@ -1275,7 +1275,6 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding_data()
QStringConverter::Utf16, u"M\u00E5rten"_s);
row("l1+utf8", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
QStringConverter::Utf8, QString::fromUtf8("M\xC3\xA5rten"));
// Even this fails, because we internally convert the second L1 to UTF-8!
row("l1+l1", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
QStringConverter::Latin1, QString::fromLatin1("M\xE5rten"));
@ -1317,11 +1316,14 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding()
default:
Q_UNREACHABLE();
}
QEXPECT_FAIL("utf16+utf16", "QTBUG-135129: Parser expected UTF-16, but got UTF-8", Abort);
QVERIFY(reader.readNextStartElement()); // a
text = reader.readElementText();
QEXPECT_FAIL("", "Parser expects the data in the initial encoding, but we convert to UTF-8",
QEXPECT_FAIL("l1+utf16",
"Parser expects the data in the initial encoding, but we convert to UTF-8",
Continue);
QEXPECT_FAIL("l1+utf8",
"Parser expects the data in the initial encoding, but we convert to UTF-8",
Continue);
QCOMPARE(text, expectedNextElementText);
}