Port QDomDocument to QXmlStreamReader

Reimplement QDomDocument using QXmlStreamReader and switch to the new
implementation starting from Qt 6.

The changes in the behavior are reflected in tests: some test cases
which were marked as "expected to fail" are now passing.

Task-number: QTBUG-76178
Change-Id: I5ace2f13c036a9a778de922b47a1ce35957ce5f6
Reviewed-by: Kai Koehne <kai.koehne@qt.io>
This commit is contained in:
Sona Kurazyan 2019-10-25 12:05:27 +02:00
parent f0443984b8
commit ccc2133c64
7 changed files with 444 additions and 6 deletions

View File

@ -59,6 +59,7 @@
#include <qvariant.h>
#include <qshareddata.h>
#include <qdebug.h>
#include <qxmlstream.h>
#include <stdio.h>
QT_BEGIN_NAMESPACE
@ -5734,6 +5735,34 @@ bool QDomDocumentPrivate::setContent(QXmlInputSource *source, QXmlReader *reader
return true;
}
bool QDomDocumentPrivate::setContent(QXmlStreamReader *reader, bool namespaceProcessing,
QString *errorMsg, int *errorLine, int *errorColumn)
{
clear();
impl = new QDomImplementationPrivate;
type = new QDomDocumentTypePrivate(this, this);
type->ref.deref();
if (!reader) {
qWarning("Failed to set content, XML reader is not initialized");
return false;
}
QDomParser domParser(this, reader, namespaceProcessing);
if (!domParser.parse()) {
if (errorMsg)
*errorMsg = std::get<0>(domParser.errorInfo());
if (errorLine)
*errorLine = std::get<1>(domParser.errorInfo());
if (errorColumn)
*errorColumn = std::get<2>(domParser.errorInfo());
return false;
}
return true;
}
QDomNodePrivate* QDomDocumentPrivate::cloneNode(bool deep)
{
QDomNodePrivate *p = new QDomDocumentPrivate(this, deep);
@ -6153,9 +6182,16 @@ bool QDomDocument::setContent(const QString& text, bool namespaceProcessing, QSt
{
if (!impl)
impl = new QDomDocumentPrivate();
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QXmlInputSource source;
source.setData(text);
return IMPL->setContent(&source, namespaceProcessing, errorMsg, errorLine, errorColumn);
#else
QXmlStreamReader streamReader(text);
streamReader.setNamespaceProcessing(namespaceProcessing);
return IMPL->setContent(&streamReader, namespaceProcessing, errorMsg, errorLine, errorColumn);
#endif
}
/*!
@ -6215,10 +6251,17 @@ bool QDomDocument::setContent(const QByteArray &data, bool namespaceProcessing,
{
if (!impl)
impl = new QDomDocumentPrivate();
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QBuffer buf;
buf.setData(data);
QXmlInputSource source(&buf);
return IMPL->setContent(&source, namespaceProcessing, errorMsg, errorLine, errorColumn);
#else
QXmlStreamReader streamReader(data);
streamReader.setNamespaceProcessing(namespaceProcessing);
return IMPL->setContent(&streamReader, namespaceProcessing, errorMsg, errorLine, errorColumn);
#endif
}
/*!
@ -6231,8 +6274,15 @@ bool QDomDocument::setContent(QIODevice* dev, bool namespaceProcessing, QString
{
if (!impl)
impl = new QDomDocumentPrivate();
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QXmlInputSource source(dev);
return IMPL->setContent(&source, namespaceProcessing, errorMsg, errorLine, errorColumn);
#else
QXmlStreamReader streamReader(dev);
streamReader.setNamespaceProcessing(namespaceProcessing);
return IMPL->setContent(&streamReader, namespaceProcessing, errorMsg, errorLine, errorColumn);
#endif
}
/*!
@ -6314,6 +6364,33 @@ bool QDomDocument::setContent(QXmlInputSource *source, QXmlReader *reader, QStri
return IMPL->setContent(source, reader, nullptr, errorMsg, errorLine, errorColumn);
}
/*!
\overload
\since 5.15
This function reads the XML document from the QXmlStreamReader \a reader
and parses it. Returns \c true if the content was successfully parsed;
otherwise returns \c false.
If \a namespaceProcessing is \c true, the parser recognizes namespaces in the XML
file and sets the prefix name, local name and namespace URI to appropriate values.
If \a namespaceProcessing is \c false, the parser does no namespace processing when
it reads the XML file.
If a parse error occurs, the error message is placed in \c{*}\a{errorMsg}, the line
number in \c{*}\a{errorLine} and the column number in \c{*}\a{errorColumn} (unless
the associated pointer is set to 0).
\sa QXmlStreamReader
*/
bool QDomDocument::setContent(QXmlStreamReader *reader, bool namespaceProcessing, QString *errorMsg,
int *errorLine, int *errorColumn)
{
if (!impl)
impl = new QDomDocumentPrivate();
return IMPL->setContent(reader, namespaceProcessing, errorMsg, errorLine, errorColumn);
}
/*!
Converts the parsed document back to its textual representation.

View File

@ -91,6 +91,7 @@ class QDomNode;
class QDomEntity;
class QDomNotation;
class QDomCharacterData;
class QXmlStreamReader;
class Q_XML_EXPORT QDomImplementation
{
@ -343,6 +344,8 @@ public:
bool setContent(const QString& text, QString *errorMsg=nullptr, int *errorLine=nullptr, int *errorColumn=nullptr );
bool setContent(QIODevice* dev, QString *errorMsg=nullptr, int *errorLine=nullptr, int *errorColumn=nullptr );
bool setContent(QXmlInputSource *source, QXmlReader *reader, QString *errorMsg=nullptr, int *errorLine=nullptr, int *errorColumn=nullptr );
bool setContent(QXmlStreamReader *reader, bool namespaceProcessing, QString *errorMsg = nullptr,
int *errorLine = nullptr, int *errorColumn = nullptr);
// Qt extensions
QString toString(int = 1) const;

View File

@ -465,6 +465,8 @@ public:
int *errorLine, int *errorColumn);
bool setContent(QXmlInputSource *source, QXmlReader *reader, QXmlSimpleReader *simpleReader,
QString *errorMsg, int *errorLine, int *errorColumn);
bool setContent(QXmlStreamReader *reader, bool namespaceProcessing, QString *errorMsg,
int *errorLine, int *errorColumn);
// Attributes
QDomDocumentTypePrivate *doctype() { return type.data(); }

View File

@ -39,6 +39,7 @@
#include "qdomhelpers_p.h"
#include "qdom_p.h"
#include "qxmlstream.h"
#include "private/qxml_p.h"
QT_BEGIN_NAMESPACE
@ -166,6 +167,18 @@ QDomBuilder::ErrorInfo QDomHandler::errorInfo() const
*
**************************************************************/
int QDomDocumentLocator::column() const
{
Q_ASSERT(reader);
return static_cast<int>(reader->columnNumber());
}
int QDomDocumentLocator::line() const
{
Q_ASSERT(reader);
return static_cast<int>(reader->lineNumber());
}
void QSAXDocumentLocator::setLocator(QXmlLocator *l)
{
locator = l;
@ -252,6 +265,44 @@ bool QDomBuilder::startElement(const QString &nsURI, const QString &qName,
return true;
}
inline QString stringRefToString(const QStringRef &stringRef)
{
// Calling QStringRef::toString() on a NULL QStringRef in some cases returns
// an empty string (i.e. QString("")) instead of a NULL string (i.e. QString()).
// QDom implementation differentiates between NULL and empty strings, so
// we need this as workaround to keep the current behavior unchanged.
return stringRef.isNull() ? QString() : stringRef.toString();
}
bool QDomBuilder::startElement(const QString &nsURI, const QString &qName,
const QXmlStreamAttributes &atts)
{
QDomNodePrivate *n =
nsProcessing ? doc->createElementNS(nsURI, qName) : doc->createElement(qName);
if (!n)
return false;
n->setLocation(locator->line(), locator->column());
node->appendChild(n);
node = n;
// attributes
for (const auto &attr : atts) {
auto domElement = static_cast<QDomElementPrivate *>(node);
if (nsProcessing) {
domElement->setAttributeNS(stringRefToString(attr.namespaceUri()),
stringRefToString(attr.qualifiedName()),
stringRefToString(attr.value()));
} else {
domElement->setAttribute(stringRefToString(attr.qualifiedName()),
stringRefToString(attr.value()));
}
}
return true;
}
bool QDomBuilder::endElement()
{
if (!node || node == doc)
@ -368,4 +419,225 @@ bool QDomBuilder::notationDecl(const QString &name, const QString &publicId,
return true;
}
/**************************************************************
*
* QDomParser
*
**************************************************************/
QDomParser::QDomParser(QDomDocumentPrivate *d, QXmlStreamReader *r, bool namespaceProcessing)
: reader(r), locator(r), domBuilder(d, &locator, namespaceProcessing)
{
}
bool QDomParser::parse()
{
return parseProlog() && parseBody();
}
QDomBuilder::ErrorInfo QDomParser::errorInfo() const
{
return domBuilder.error();
}
bool QDomParser::parseProlog()
{
Q_ASSERT(reader);
bool foundDtd = false;
while (!reader->atEnd()) {
reader->readNext();
if (reader->hasError()) {
domBuilder.fatalError(reader->errorString());
return false;
}
switch (reader->tokenType()) {
case QXmlStreamReader::StartDocument:
if (!reader->documentVersion().isEmpty()) {
QString value(QLatin1String("version='"));
value += reader->documentVersion();
value += QLatin1Char('\'');
if (!reader->documentEncoding().isEmpty()) {
value += QLatin1String(" encoding='");
value += reader->documentEncoding();
value += QLatin1Char('\'');
}
if (reader->isStandaloneDocument()) {
value += QLatin1String(" standalone='yes'");
} else {
// TODO: Add standalone='no', if 'standalone' is specified. With the current
// QXmlStreamReader there is no way to figure out if it was specified or not.
// QXmlStreamReader needs to be modified for handling that case correctly.
}
if (!domBuilder.processingInstruction(QLatin1String("xml"), value)) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing XML declaration"));
return false;
}
}
break;
case QXmlStreamReader::DTD:
if (foundDtd) {
domBuilder.fatalError(QDomParser::tr("Multiple DTD sections are not allowed"));
return false;
}
foundDtd = true;
if (!domBuilder.startDTD(stringRefToString(reader->dtdName()),
stringRefToString(reader->dtdPublicId()),
stringRefToString(reader->dtdSystemId()))) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing document type declaration"));
return false;
}
if (!parseMarkupDecl())
return false;
break;
case QXmlStreamReader::Comment:
if (!domBuilder.comment(reader->text().toString())) {
domBuilder.fatalError(QDomParser::tr("Error occurred while processing comment"));
return false;
}
break;
case QXmlStreamReader::ProcessingInstruction:
if (!domBuilder.processingInstruction(reader->processingInstructionTarget().toString(),
reader->processingInstructionData().toString())) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing a processing instruction"));
return false;
}
break;
default:
// If the token is none of the above, prolog processing is done.
return true;
}
}
return true;
}
bool QDomParser::parseBody()
{
Q_ASSERT(reader);
std::stack<QStringRef> tagStack;
while (!reader->atEnd() && !reader->hasError()) {
switch (reader->tokenType()) {
case QXmlStreamReader::StartElement:
tagStack.push(reader->qualifiedName());
if (!domBuilder.startElement(stringRefToString(reader->namespaceUri()),
stringRefToString(reader->qualifiedName()),
reader->attributes())) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing a start element"));
return false;
}
break;
case QXmlStreamReader::EndElement:
if (tagStack.empty() || reader->qualifiedName() != tagStack.top()) {
domBuilder.fatalError(
QDomParser::tr("Unexpected end element '%1'").arg(reader->name()));
return false;
}
tagStack.pop();
if (!domBuilder.endElement()) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing an end element"));
return false;
}
break;
case QXmlStreamReader::Characters:
if (!reader->isWhitespace()) { // Skip the content consisting of only whitespaces
if (!reader->text().toString().trimmed().isEmpty()) {
if (!domBuilder.characters(reader->text().toString(), reader->isCDATA())) {
domBuilder.fatalError(QDomParser::tr(
"Error occurred while processing the element content"));
return false;
}
}
}
break;
case QXmlStreamReader::Comment:
if (!domBuilder.comment(reader->text().toString())) {
domBuilder.fatalError(QDomParser::tr("Error occurred while processing comments"));
return false;
}
break;
case QXmlStreamReader::ProcessingInstruction:
if (!domBuilder.processingInstruction(reader->processingInstructionTarget().toString(),
reader->processingInstructionData().toString())) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing a processing instruction"));
return false;
}
break;
case QXmlStreamReader::EntityReference:
if (!domBuilder.skippedEntity(reader->name().toString())) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing an entity reference"));
return false;
}
break;
default:
domBuilder.fatalError(QDomParser::tr("Unexpected token"));
return false;
}
reader->readNext();
}
if (reader->hasError()) {
domBuilder.fatalError(reader->errorString());
reader->readNext();
return false;
}
if (!tagStack.empty()) {
domBuilder.fatalError(QDomParser::tr("Tag mismatch"));
return false;
}
return true;
}
bool QDomParser::parseMarkupDecl()
{
Q_ASSERT(reader);
const auto entities = reader->entityDeclarations();
for (const auto &entityDecl : entities) {
// Entity declarations are created only for Extrenal Entities. Internal Entities
// are parsed, and QXmlStreamReader handles the parsing itself and returns the
// parsed result. So we don't need to do anything for the Internal Entities.
if (!entityDecl.publicId().isEmpty() || !entityDecl.systemId().isEmpty()) {
// External Entity
if (!domBuilder.unparsedEntityDecl(stringRefToString(entityDecl.name()),
stringRefToString(entityDecl.publicId()),
stringRefToString(entityDecl.systemId()),
stringRefToString(entityDecl.notationName()))) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing entity declaration"));
return false;
}
}
}
const auto notations = reader->notationDeclarations();
for (const auto &notationDecl : notations) {
if (!domBuilder.notationDecl(stringRefToString(notationDecl.name()),
stringRefToString(notationDecl.publicId()),
stringRefToString(notationDecl.systemId()))) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing notation declaration"));
return false;
}
}
return true;
}
QT_END_NAMESPACE

View File

@ -39,6 +39,7 @@
#ifndef QDOMHELPERS_P_H
#define QDOMHELPERS_P_H
#include <qcoreapplication.h>
#include <qglobal.h>
#include <qxml.h>
@ -57,6 +58,8 @@ QT_BEGIN_NAMESPACE
class QDomDocumentPrivate;
class QDomNodePrivate;
class QXmlStreamReader;
class QXmlStreamAttributes;
/**************************************************************
*
@ -77,6 +80,19 @@ public:
virtual int line() const = 0;
};
class QDomDocumentLocator : public QXmlDocumentLocator
{
public:
QDomDocumentLocator(QXmlStreamReader *r) : reader(r) {}
~QDomDocumentLocator() override = default;
int column() const override;
int line() const override;
private:
QXmlStreamReader *reader;
};
class QSAXDocumentLocator : public QXmlDocumentLocator
{
public:
@ -105,6 +121,7 @@ public:
bool endDocument();
bool startElement(const QString &nsURI, const QString &qName, const QXmlAttributes &atts);
bool startElement(const QString &nsURI, const QString &qName, const QXmlStreamAttributes &atts);
bool endElement();
bool characters(const QString &characters, bool cdata = false);
bool processingInstruction(const QString &target, const QString &data);
@ -188,6 +205,31 @@ private:
QDomBuilder domBuilder;
};
/**************************************************************
*
* QDomParser
*
**************************************************************/
class QDomParser
{
Q_DECLARE_TR_FUNCTIONS(QDomParser)
public:
QDomParser(QDomDocumentPrivate *d, QXmlStreamReader *r, bool namespaceProcessing);
bool parse();
QDomBuilder::ErrorInfo errorInfo() const;
private:
bool parseProlog();
bool parseBody();
bool parseMarkupDecl();
QXmlStreamReader *reader;
QDomDocumentLocator locator;
QDomBuilder domBuilder;
};
QT_END_NAMESPACE
#endif // QDOMHELPERS_P_H

View File

@ -1793,6 +1793,12 @@ void tst_QTextDocument::toHtml()
QCOMPARE(output, expectedOutput);
QDomDocument document;
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
QEXPECT_FAIL("charfmt-for-list-item",
"The attribute \"style\" is redefined in the generated HTML, which is not valid "
"according to XML standard. The new QDomDocument implementation follows the XML "
"standard.", Continue);
#endif
QVERIFY2(document.setContent(output), "Output was not valid XML");
}

View File

@ -173,6 +173,8 @@ void tst_QDom::setContent_data()
" </b3>\n"
"</a1>\n");
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
// These configurations cannot be supported by the QXmlStreamReader-based implementation
QTest::newRow( "02" ) << doc01
<< QString("http://trolltech.com/xml/features/report-whitespace-only-CharData").split(' ')
<< QStringList()
@ -227,6 +229,7 @@ void tst_QDom::setContent_data()
" <c1/>\n"
" </b3>\n"
"</a1>\n");
#endif
QTest::newRow("05") << QString("<message>\n"
" <body>&lt;b&gt;foo&lt;/b&gt;>]]&gt;</body>\n"
@ -242,6 +245,8 @@ void tst_QDom::setContent()
{
QFETCH( QString, doc );
QDomDocument domDoc;
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QXmlInputSource source;
source.setData( doc );
@ -258,8 +263,11 @@ void tst_QDom::setContent()
reader.setFeature( *it, false );
}
QDomDocument domDoc;
QVERIFY( domDoc.setContent( &source, &reader ) );
#else
QXmlStreamReader reader(doc);
QVERIFY(domDoc.setContent(&reader, true));
#endif
QString eRes;
QTextStream ts( &eRes, QIODevice::WriteOnly );
@ -1475,8 +1483,9 @@ void tst_QDom::normalizeAttributes() const
QDomDocument doc;
QVERIFY(doc.setContent(&buffer, true));
// ### Qt 5: fix this, if we keep QDom at all
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QEXPECT_FAIL("", "The parser doesn't perform Attribute Value Normalization. Fixing that would change behavior.", Continue);
#endif
QCOMPARE(doc.documentElement().attribute(QLatin1String("attribute")), QString::fromLatin1("a a"));
}
@ -1517,9 +1526,10 @@ void tst_QDom::serializeNamespaces() const
"<b:element b:name=''/>"
"</doc>";
QDomDocument doc;
QByteArray ba(input);
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QBuffer buffer(&ba);
QVERIFY(buffer.open(QIODevice::ReadOnly));
QXmlInputSource source(&buffer);
@ -1527,8 +1537,11 @@ void tst_QDom::serializeNamespaces() const
reader.setFeature("http://xml.org/sax/features/namespaces", true);
reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
QDomDocument doc;
QVERIFY(doc.setContent(&source, &reader));
#else
QXmlStreamReader streamReader(input);
QVERIFY(doc.setContent(&streamReader, true));
#endif
const QByteArray serialized(doc.toByteArray());
@ -1552,7 +1565,9 @@ void tst_QDom::flagInvalidNamespaces() const
QDomDocument doc;
QVERIFY(!doc.setContent(QString::fromLatin1(input, true)));
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QEXPECT_FAIL("", "The parser doesn't flag identical qualified attribute names. Fixing this would change behavior.", Continue);
#endif
QVERIFY(!doc.setContent(QString::fromLatin1(input)));
}
@ -1563,7 +1578,9 @@ void tst_QDom::flagUndeclaredNamespace() const
"<b:element b:name=''/>"
"</a:doc>";
QDomDocument doc;
QByteArray ba(input);
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QBuffer buffer(&ba);
QVERIFY(buffer.open(QIODevice::ReadOnly));
@ -1573,9 +1590,12 @@ void tst_QDom::flagUndeclaredNamespace() const
reader.setFeature("http://xml.org/sax/features/namespaces", true);
reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
QDomDocument doc;
QEXPECT_FAIL("", "The parser doesn't flag not declared prefixes. Fixing this would change behavior.", Continue);
QVERIFY(!doc.setContent(&source, &reader));
#else
QXmlStreamReader streamReader(ba);
QVERIFY(!doc.setContent(&streamReader, true));
#endif
}
void tst_QDom::indentComments() const
@ -1642,7 +1662,9 @@ void tst_QDom::reportDuplicateAttributes() const
QDomDocument dd;
bool isSuccess = dd.setContent(QLatin1String("<test x=\"1\" x=\"2\"/>"));
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QEXPECT_FAIL("", "The parser doesn't flag duplicate attributes. Fixing this would change behavior.", Continue);
#endif
QVERIFY2(!isSuccess, "Duplicate attributes are well-formedness errors, and should be reported as such.");
}
@ -1842,10 +1864,15 @@ void tst_QDom::doubleNamespaceDeclarations() const
QFile file(testFile);
QVERIFY(file.open(QIODevice::ReadOnly));
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QXmlSimpleReader reader;
QXmlInputSource source(&file);
QVERIFY(doc.setContent(&source, &reader));
#else
QXmlStreamReader streamReader(&file);
QVERIFY(doc.setContent(&streamReader, true));
#endif
// tst_QDom relies on a specific QHash ordering, see QTBUG-25071
QString docAsString = doc.toString(0);
@ -1862,11 +1889,15 @@ void tst_QDom::setContentQXmlReaderOverload() const
{
QDomDocument doc;
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QXmlSimpleReader reader;
QXmlInputSource data;
data.setData(QByteArray("<e/>"));
doc.setContent(&data, true);
#else
QXmlStreamReader streamReader(QByteArray("<e/>"));
doc.setContent(&streamReader, true);
#endif
QCOMPARE(doc.documentElement().nodeName(), QString::fromLatin1("e"));
}
@ -1961,6 +1992,10 @@ void tst_QDom::setContentWhitespace_data() const
void tst_QDom::taskQTBUG4595_dontAssertWhenDocumentSpecifiesUnknownEncoding() const
{
// QXmlStreamReader fails to read XML documents with unknown encoding. It
// needs to be modified if we want to support this case with the QXmlStreamReader-based
// implementation.
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
QString xmlWithUnknownEncoding("<?xml version='1.0' encoding='unknown-encoding'?>"
"<foo>"
" <bar>How will this sentence be handled?</bar>"
@ -1970,6 +2005,7 @@ void tst_QDom::taskQTBUG4595_dontAssertWhenDocumentSpecifiesUnknownEncoding() co
QString dontAssert = d.toString(); // this should not assert
QVERIFY(true);
#endif
}
void tst_QDom::cloneDTD_QTBUG8398() const