QXmlStreamReader: fix addData() unnecessary conversion to UTF-8
The addData(QASV) overload was unconditionally converting UTF-16 and Latin1 data to UTF-8. However, if we already started reading the XML document, and we know that its encoding is UTF-16 or Latin1, then we know for sure that the new data has to be added as-is. Amends 6bc227a06a0d1392d220aa79ddb1cdc145d4f76e. [ChangeLog][QtCore][QXmlStreamReader] Fixed a bug when addData(QAnyStringView) was incorrectly recoding UTF-16 and Latin1 data to UTF-8, thus potentially mangling it. Fixes: QTBUG-135129 Pick-to: 6.8 6.5 Change-Id: Ie1171a5e5596b72a6f160031a4c5a9df3baae4fd Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> (cherry picked from commit b6b725aef59390f403a1a39f49d1318c48f13c07) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
parent
a65a730ebf
commit
ec8f043ec5
@ -553,6 +553,15 @@ QIODevice *QXmlStreamReader::device() const
|
|||||||
\sa readNext(), clear()
|
\sa readNext(), clear()
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static bool isDecoderForEncoding(const QStringDecoder &dec, QStringDecoder::Encoding enc)
|
||||||
|
{
|
||||||
|
if (!dec.isValid())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const QAnyStringView nameView{dec.name()};
|
||||||
|
return !nameView.empty() && nameView == QStringDecoder::nameForEncoding(enc);
|
||||||
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Adds more \a data for the reader to read. This function does
|
Adds more \a data for the reader to read. This function does
|
||||||
nothing if the reader has a device().
|
nothing if the reader has a device().
|
||||||
@ -567,11 +576,25 @@ void QXmlStreamReader::addData(QAnyStringView data)
|
|||||||
Q_D(QXmlStreamReader);
|
Q_D(QXmlStreamReader);
|
||||||
data.visit([this, d](auto data) {
|
data.visit([this, d](auto data) {
|
||||||
if constexpr (std::is_same_v<decltype(data), QStringView>) {
|
if constexpr (std::is_same_v<decltype(data), QStringView>) {
|
||||||
|
if (d->lockEncoding && isDecoderForEncoding(d->decoder, QStringDecoder::Utf16)) {
|
||||||
|
// We already expect the data in the proper encoding, no need
|
||||||
|
// to recode the data.
|
||||||
|
addDataImpl(QByteArray{reinterpret_cast<const char *>(data.utf16()),
|
||||||
|
data.size() * 2});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// keep the pre-existing behavior
|
||||||
d->lockEncoding = true;
|
d->lockEncoding = true;
|
||||||
if (!d->decoder.isValid())
|
if (!d->decoder.isValid())
|
||||||
d->decoder = QStringDecoder(QStringDecoder::Utf8);
|
d->decoder = QStringDecoder(QStringDecoder::Utf8);
|
||||||
addDataImpl(data.toUtf8());
|
addDataImpl(data.toUtf8());
|
||||||
} else if constexpr (std::is_same_v<decltype(data), QLatin1StringView>) {
|
} else if constexpr (std::is_same_v<decltype(data), QLatin1StringView>) {
|
||||||
|
if (d->lockEncoding && isDecoderForEncoding(d->decoder, QStringDecoder::Latin1)) {
|
||||||
|
// We already expect the data in the proper encoding, no need
|
||||||
|
// to recode the data.
|
||||||
|
addDataImpl(QByteArray{data.data(), data.size()});
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Conversion to a QString is required, to avoid breaking
|
// Conversion to a QString is required, to avoid breaking
|
||||||
// pre-existing (before porting to QAnyStringView) behavior.
|
// pre-existing (before porting to QAnyStringView) behavior.
|
||||||
d->lockEncoding = true;
|
d->lockEncoding = true;
|
||||||
|
@ -1275,7 +1275,6 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding_data()
|
|||||||
QStringConverter::Utf16, u"M\u00E5rten"_s);
|
QStringConverter::Utf16, u"M\u00E5rten"_s);
|
||||||
row("l1+utf8", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
|
row("l1+utf8", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
|
||||||
QStringConverter::Utf8, QString::fromUtf8("M\xC3\xA5rten"));
|
QStringConverter::Utf8, QString::fromUtf8("M\xC3\xA5rten"));
|
||||||
// Even this fails, because we internally convert the second L1 to UTF-8!
|
|
||||||
row("l1+l1", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
|
row("l1+l1", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
|
||||||
QStringConverter::Latin1, QString::fromLatin1("M\xE5rten"));
|
QStringConverter::Latin1, QString::fromLatin1("M\xE5rten"));
|
||||||
|
|
||||||
@ -1317,11 +1316,14 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding()
|
|||||||
default:
|
default:
|
||||||
Q_UNREACHABLE();
|
Q_UNREACHABLE();
|
||||||
}
|
}
|
||||||
QEXPECT_FAIL("utf16+utf16", "QTBUG-135129: Parser expected UTF-16, but got UTF-8", Abort);
|
|
||||||
QVERIFY(reader.readNextStartElement()); // a
|
QVERIFY(reader.readNextStartElement()); // a
|
||||||
text = reader.readElementText();
|
text = reader.readElementText();
|
||||||
|
|
||||||
QEXPECT_FAIL("", "Parser expects the data in the initial encoding, but we convert to UTF-8",
|
QEXPECT_FAIL("l1+utf16",
|
||||||
|
"Parser expects the data in the initial encoding, but we convert to UTF-8",
|
||||||
|
Continue);
|
||||||
|
QEXPECT_FAIL("l1+utf8",
|
||||||
|
"Parser expects the data in the initial encoding, but we convert to UTF-8",
|
||||||
Continue);
|
Continue);
|
||||||
QCOMPARE(text, expectedNextElementText);
|
QCOMPARE(text, expectedNextElementText);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user