From a4eea312ed7b020b1bb686ec1d619b4d1f85abd4 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Tue, 28 Apr 2020 15:35:58 +0200 Subject: [PATCH] Correctly parse non BMP char refs in the sax parser Update the auto test accordingly, and at the same time remove all uses of QTextStream (as they aren't required). Change-Id: I71b7cf6a6b54ea59507f27d5d2d04cc5ae5885fc Reviewed-by: Sona Kurazyan --- src/xml/sax/qxml.cpp | 14 ++++++++++++-- .../xml/sax/qxmlsimplereader/parser/main.cpp | 17 +++++------------ .../xml/sax/qxmlsimplereader/parser/parser.cpp | 11 ++++++----- .../qxmlsimplereader/tst_qxmlsimplereader.cpp | 14 ++++---------- .../xmldocs/not-wf/sa/142.xml.ref | Bin 309 -> 311 bytes .../xmldocs/valid/sa/064.xml.ref | Bin 312 -> 316 bytes .../xmldocs/valid/sa/089.xml.ref | Bin 381 -> 378 bytes 7 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp index f5d283d5ac7..e98c86979b5 100644 --- a/src/xml/sax/qxml.cpp +++ b/src/xml/sax/qxml.cpp @@ -7475,7 +7475,12 @@ bool QXmlSimpleReaderPrivate::parseReference() case DoneD: tmp = ref().toUInt(&ok, 10); if (ok) { - stringAddC(QChar(tmp)); + if (tmp > 0xffff) { + stringAddC(QChar::highSurrogate(tmp)); + stringAddC(QChar::lowSurrogate(tmp)); + } else { + stringAddC(QChar(tmp)); + } } else { reportParseError(QLatin1String(XMLERR_ERRORPARSINGREFERENCE)); return false; @@ -7486,7 +7491,12 @@ bool QXmlSimpleReaderPrivate::parseReference() case DoneH: tmp = ref().toUInt(&ok, 16); if (ok) { - stringAddC(QChar(tmp)); + if (tmp > 0xffff) { + stringAddC(QChar::highSurrogate(tmp)); + stringAddC(QChar::lowSurrogate(tmp)); + } else { + stringAddC(QChar(tmp)); + } } else { reportParseError(QLatin1String(XMLERR_ERRORPARSINGREFERENCE)); return false; diff --git a/tests/auto/xml/sax/qxmlsimplereader/parser/main.cpp b/tests/auto/xml/sax/qxmlsimplereader/parser/main.cpp index b5d9fea315c..c5597c6f8eb 100644 --- a/tests/auto/xml/sax/qxmlsimplereader/parser/main.cpp +++ b/tests/auto/xml/sax/qxmlsimplereader/parser/main.cpp @@ -37,7 +37,6 @@ #include "parser.h" -static QTextStream qout(stdout, QIODevice::WriteOnly); static QTextStream qerr(stderr, QIODevice::WriteOnly); static void usage() @@ -79,19 +78,15 @@ int main(int argc, const char *argv[]) if (out_file_name.isEmpty()) out_file_name = file_name + ".ref"; - QFile _out_file; - QTextStream _out_stream; - QTextStream *out_stream; + QFile out_file; if (out_file_name == "-") { - out_stream = &qout; + out_file.open(stdout, QFile::WriteOnly); } else { - _out_file.setFileName(out_file_name); - if (!_out_file.open(QIODevice::WriteOnly | QIODevice::Truncate)) { + out_file.setFileName(out_file_name); + if (!out_file.open(QIODevice::WriteOnly | QIODevice::Truncate)) { qerr << "Could not open " << out_file_name << ": " << strerror(errno) << Qt::endl; return 1; } - _out_stream.setDevice(&_out_file); - out_stream = &_out_stream; } Parser parser; @@ -102,9 +97,7 @@ int main(int argc, const char *argv[]) parser.parseFile(&in_file); - out_stream->setCodec("utf8"); - - *out_stream << parser.result(); + out_file.write(parser.result().toUtf8()); return 0; } diff --git a/tests/auto/xml/sax/qxmlsimplereader/parser/parser.cpp b/tests/auto/xml/sax/qxmlsimplereader/parser/parser.cpp index 44f81019555..24aa9376da8 100644 --- a/tests/auto/xml/sax/qxmlsimplereader/parser/parser.cpp +++ b/tests/auto/xml/sax/qxmlsimplereader/parser/parser.cpp @@ -142,11 +142,12 @@ bool ContentHandler::startElement(const QString &namespaceURI, QString ContentHandler::escapeStr(const QString &s) { QString result = s; - result.replace(QRegularExpression("\""), "\\\""); - result.replace(QRegularExpression("\\"), "\\\\"); - result.replace(QRegularExpression("\n"), "\\n"); - result.replace(QRegularExpression("\r"), "\\r"); - result.replace(QRegularExpression("\t"), "\\t"); + result.replace(QChar(0), "\\0"); + result.replace("\\", "\\\\"); + result.replace("\"", "\\\""); + result.replace("\n", "\\n"); + result.replace("\r", "\\r"); + result.replace("\t", "\\t"); return result; } diff --git a/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp b/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp index cea4e3c8b81..df158cae0ff 100644 --- a/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp +++ b/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp @@ -311,14 +311,12 @@ void tst_QXmlSimpleReader::testGoodXmlFile() QVERIFY(file.open(QIODevice::ReadOnly)); Parser parser; - QEXPECT_FAIL(QFINDTESTDATA("xmldocs/valid/sa/089.xml").toLocal8Bit().constData(), "a form feed character is not accepted in XML", Continue); QVERIFY(parser.parseFile(&file)); QFile ref_file(file_name + ".ref"); QVERIFY(ref_file.open(QIODevice::ReadOnly | QIODevice::Text)); - QTextStream ref_stream(&ref_file); - ref_stream.setCodec("UTF-8"); - QString ref_file_contents = ref_stream.readAll(); + QByteArray data = ref_file.readAll(); + QString ref_file_contents = QString::fromUtf8(data.constData(), data.size()); QCOMPARE(parser.result(), ref_file_contents); } @@ -393,9 +391,7 @@ void tst_QXmlSimpleReader::testBadXmlFile() QFile ref_file(file_name + ".ref"); QVERIFY(ref_file.open(QIODevice::ReadOnly | QIODevice::Text)); - QTextStream ref_stream(&ref_file); - ref_stream.setCodec("UTF-8"); - QString ref_file_contents = ref_stream.readAll(); + QString ref_file_contents = QString::fromUtf8(ref_file.readAll()); QEXPECT_FAIL(QFINDTESTDATA("xmldocs/not-wf/sa/145.xml").toLocal8Bit().constData(), "Surrogate code point 0xD800 should be rejected", Continue); @@ -469,9 +465,7 @@ void tst_QXmlSimpleReader::testIncrementalParsing() QFile ref_file(file_name + ".ref"); QVERIFY(ref_file.open(QIODevice::ReadOnly | QIODevice::Text)); - QTextStream ref_stream(&ref_file); - ref_stream.setCodec("UTF-8"); - QString ref_file_contents = ref_stream.readAll(); + QString ref_file_contents = QString::fromUtf8(ref_file.readAll()); QCOMPARE(parser.result(), ref_file_contents); } diff --git a/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/142.xml.ref b/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/142.xml.ref index 7ce4da6a06aeb1758075e2e3fac4e4e936889eae..0684cfa9434a12fb0780654aea38d3b0b0028278 100644 GIT binary patch delta 26 icmdnWw4G_fGoF|j10_u^1qFrFyp)L(O(&}|3IG6fWC!N} delta 11 Scmdnaw3TVXGe(AqFZ}=;l?1Q= diff --git a/tests/auto/xml/sax/qxmlsimplereader/xmldocs/valid/sa/064.xml.ref b/tests/auto/xml/sax/qxmlsimplereader/xmldocs/valid/sa/064.xml.ref index 579aeb52f649fe003fe4508d7930561a064a5c38..0b806c96a84e8d9513b0b15a9e479b69ba3a7a99 100644 GIT binary patch delta 16 YcmdnNw1;WJGqw*C8XCUzPka#o07Wqf!2kdN delta 12 UcmdnPw1a8FGbV=j6W;^?03x9UtpET3 diff --git a/tests/auto/xml/sax/qxmlsimplereader/xmldocs/valid/sa/089.xml.ref b/tests/auto/xml/sax/qxmlsimplereader/xmldocs/valid/sa/089.xml.ref index 7c68c32286609b9586ed64a0f2113e5dca338f86..f09bc2bd0930aee2cc2f4c575a48db6d74d5ff90 100644 GIT binary patch delta 77 zcmey%^owc23a$?m8XCUz@8A2Sf8yGBmE?@XqQvBq)S_aI3JR%t YDHD&G3TScx87}$BrManjB^sJs0M`>De*gdg delta 107 zcmeyx^p|PE3RZ^q`}e+|xGCN!EwLmq$F-;^zeppsB0067Br`wHwmLaKr!+UuuQWF) zwaC^&M