QXmlStreamReader: check appending data with unexpected encoding
If we add the first part of the XML document using a QBA overload of the constructor or addData() method, the encoding of the document will be extracted from the "encoding" attribute of the XML prolog. This way, if the encoding is different from UTF-8, appending the data with the QASV overload of addData() will always result in reading the data incorrectly, because the implementation converts the input data to UTF-8. This test explicitly highlights this behavior. In some cases the current behavior is probably fine. However, cases like appending Latin1 to Latin1-encoded document or appending UTF-16 to UTF-16-encoded document should just work instead of doing the unwanted conversion to UTF-8 and failing. Task-number: QTBUG-124636 Task-number: QTBUG-135129 Pick-to: 6.9 6.8 6.5 Change-Id: Idf0571083e56032145478631538f09d251cb1022 Reviewed-by: Marc Mutz <marc.mutz@qt.io>
This commit is contained in:
parent
633278fe1c
commit
046b8523f2
@ -571,6 +571,8 @@ private slots:
|
||||
void readFromQBufferInvalid() const;
|
||||
void readFromLatin1String() const;
|
||||
void readLatin1Document() const;
|
||||
void appendToRawDocumentWithNonUtf8Encoding_data();
|
||||
void appendToRawDocumentWithNonUtf8Encoding();
|
||||
void readNextStartElement() const;
|
||||
void readElementText() const;
|
||||
void readElementText_data() const;
|
||||
@ -1251,6 +1253,79 @@ void tst_QXmlStream::readLatin1Document() const
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding_data()
|
||||
{
|
||||
QTest::addColumn<QByteArray>("rawDocumentStart");
|
||||
QTest::addColumn<QString>("expectedFirstElementText");
|
||||
QTest::addColumn<QString>("nextData");
|
||||
QTest::addColumn<QStringConverter::Encoding>("nextEncoding");
|
||||
QTest::addColumn<QString>("expectedNextElementText");
|
||||
|
||||
auto row = [](const char *name, const QByteArray &encoding,
|
||||
const QByteArray &firstData, const QString &expectedFirstString,
|
||||
QStringConverter::Encoding nextEncoding, const QString &nextString) {
|
||||
const QByteArray docStart = "<?xml version=\"1.0\" encoding=\"" + encoding
|
||||
+ "\"?><foo><a>" + firstData + "</a>";
|
||||
const QString nextElement = u"<a>"_s + nextString + u"</a>"_s;
|
||||
QTest::newRow(name) << docStart << expectedFirstString << nextElement
|
||||
<< nextEncoding << nextString;
|
||||
};
|
||||
|
||||
row("l1+utf16", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
|
||||
QStringConverter::Utf16, u"M\u00E5rten"_s);
|
||||
row("l1+utf8", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
|
||||
QStringConverter::Utf8, QString::fromUtf8("M\xC3\xA5rten"));
|
||||
// Even this fails, because we internally convert the second L1 to UTF-8!
|
||||
row("l1+l1", "iso-8859-1"_ba, "M\xE5rten"_ba, QString::fromLatin1("M\xE5rten"),
|
||||
QStringConverter::Latin1, QString::fromLatin1("M\xE5rten"));
|
||||
|
||||
const QString utf16Str = u"<?xml version=\"1.0\" encoding=\"utf-16\"?>"
|
||||
"<foo><a>M\u00E5rten</a>"_s;
|
||||
const QByteArray utf16Data{reinterpret_cast<const char *>(utf16Str.utf16()),
|
||||
utf16Str.size() * 2};
|
||||
|
||||
QTest::newRow("utf16+utf16") << utf16Data << u"M\u00E5rten"_s
|
||||
<< u"<a>M\u00E5rten</a>"_s
|
||||
<< QStringConverter::Utf16
|
||||
<< u"M\u00E5rten"_s;
|
||||
}
|
||||
|
||||
void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding()
|
||||
{
|
||||
QFETCH(const QByteArray, rawDocumentStart);
|
||||
QFETCH(const QString, expectedFirstElementText);
|
||||
QFETCH(const QString, nextData);
|
||||
QFETCH(const QStringConverter::Encoding, nextEncoding);
|
||||
QFETCH(const QString, expectedNextElementText);
|
||||
|
||||
QXmlStreamReader reader(rawDocumentStart);
|
||||
QVERIFY(reader.readNextStartElement()); // foo
|
||||
QVERIFY(reader.readNextStartElement()); // a
|
||||
QString text = reader.readElementText();
|
||||
QCOMPARE(text, expectedFirstElementText);
|
||||
|
||||
switch (nextEncoding) {
|
||||
case QStringConverter::Utf16:
|
||||
reader.addData(nextData);
|
||||
break;
|
||||
case QStringConverter::Utf8:
|
||||
reader.addData(QUtf8StringView{nextData.toUtf8()});
|
||||
break;
|
||||
case QStringConverter::Latin1:
|
||||
reader.addData(QLatin1StringView{nextData.toLatin1()});
|
||||
break;
|
||||
default:
|
||||
Q_UNREACHABLE();
|
||||
}
|
||||
QEXPECT_FAIL("utf16+utf16", "QTBUG-135129: Parser expected UTF-16, but got UTF-8", Abort);
|
||||
QVERIFY(reader.readNextStartElement()); // a
|
||||
text = reader.readElementText();
|
||||
|
||||
QEXPECT_FAIL("", "Parser expects the data in the initial encoding, but we convert to UTF-8",
|
||||
Continue);
|
||||
QCOMPARE(text, expectedNextElementText);
|
||||
}
|
||||
|
||||
void tst_QXmlStream::readNextStartElement() const
|
||||
{
|
||||
QLatin1String in("<?xml version=\"1.0\"?><A><!-- blah --><B><C/></B><B attr=\"value\"/>text</A>");
|
||||
|
Loading…
x
Reference in New Issue
Block a user