diff --git a/src/corelib/serialization/qxmlutils.cpp b/src/corelib/serialization/qxmlutils.cpp index 74a0cf0c23d..778e8de72de 100644 --- a/src/corelib/serialization/qxmlutils.cpp +++ b/src/corelib/serialization/qxmlutils.cpp @@ -235,13 +235,16 @@ bool QXmlUtils::isLetter(const QChar c) \sa {http://www.w3.org/TR/REC-xml/#NT-Char}, {Extensible Markup Language (XML) 1.0 (Fourth Edition), [2] Char} */ -bool QXmlUtils::isChar(const QChar c) +bool QXmlUtils::isChar(const char32_t c) { - return (c.unicode() >= 0x0020 && c.unicode() <= 0xD7FF) - || c.unicode() == 0x0009 - || c.unicode() == 0x000A - || c.unicode() == 0x000D - || (c.unicode() >= 0xE000 && c.unicode() <= 0xFFFD); + // The valid range is defined by https://www.w3.org/TR/REC-xml/#NT-Char as following: + // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + return (c >= 0x0020 && c <= 0xD7FF) + || c == 0x0009 + || c == 0x000A + || c == 0x000D + || (c >= 0xE000 && c <= 0xFFFD) + || (c >= 0x10000 && c <= 0x10FFFF); } /*! diff --git a/src/corelib/serialization/qxmlutils_p.h b/src/corelib/serialization/qxmlutils_p.h index 2e709e8323c..0ad17589799 100644 --- a/src/corelib/serialization/qxmlutils_p.h +++ b/src/corelib/serialization/qxmlutils_p.h @@ -33,7 +33,7 @@ class Q_CORE_EXPORT QXmlUtils { public: static bool isEncName(QStringView encName); - static bool isChar(const QChar c); + static bool isChar(const char32_t c); static bool isNameChar(const QChar c); static bool isLetter(const QChar c); static bool isNCName(QStringView ncName); diff --git a/src/xml/dom/qdom.cpp b/src/xml/dom/qdom.cpp index 486894019c7..e0e9a465ef5 100644 --- a/src/xml/dom/qdom.cpp +++ b/src/xml/dom/qdom.cpp @@ -22,7 +22,7 @@ #include #include #include - +#include #include #include @@ -156,10 +156,11 @@ static QString fixedCharData(const QString &data, bool *ok) } QString result; - for (int i = 0; i < data.size(); ++i) { - QChar c = data.at(i); + QStringIterator it(data); + while (it.hasNext()) { + const char32_t c = it.next(QChar::Null); if (QXmlUtils::isChar(c)) { - result.append(c); + result.append(QChar::fromUcs4(c)); } else if (QDomImplementationPrivate::invalidDataPolicy == QDomImplementation::ReturnNullNode) { *ok = false; return QString(); diff --git a/tests/auto/xml/dom/qdom/tst_qdom.cpp b/tests/auto/xml/dom/qdom/tst_qdom.cpp index e1c2b12ab5e..f05020f61ca 100644 --- a/tests/auto/xml/dom/qdom/tst_qdom.cpp +++ b/tests/auto/xml/dom/qdom/tst_qdom.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,7 @@ private slots: void invalidQualifiedName(); void invalidCharData_data(); void invalidCharData(); + void nonBMPCharacters(); void roundTripAttributes() const; void roundTripCDATA() const; @@ -1342,6 +1344,10 @@ void tst_QDom::invalidCharData_data() QTest::newRow( "fSupplementary Plane: 𝄞 😂 🀄 🀶 🃪 🃋"_qs; + + QString errorMsg; + QDomDocument doc; + doc.setContent(input, &errorMsg); + QVERIFY(errorMsg.isEmpty()); + QCOMPARE(doc.toString(-1), input); +} + void tst_QDom::roundTripAttributes() const { /* Create an attribute via the QDom API with weird whitespace content. */