Remove QTextCodec dependency in the old SAX parser

Just so we can get this cleaned up as well and remove it from Qt Core. Change-Id: I2b5b821b039ce2c024ec3cb7338a1a9becdd2157 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-04-30 15:06:27 +02:00 · 2020-04-30 15:06:27 +02:00 · 1a6bf6c549
commit 1a6bf6c549
parent b88720eb82
3 changed files with 23 additions and 143 deletions
--- a/src/xml/sax/qxml.cpp
+++ b/src/xml/sax/qxml.cpp
@ -244,9 +244,7 @@ public:
    int pos;
    int length;
    bool nextReturnedEndOfData;
-#if QT_CONFIG(textcodec)
-    QTextDecoder *encMapper;
-#endif
+    QStringDecoder toUnicode;

    QByteArray encodingDeclBytes;
    QString encodingDeclChars;
@ -1090,9 +1088,6 @@ void QXmlInputSource::init()
        d->inputStream = nullptr;

        setData(QString());
-#if QT_CONFIG(textcodec)
-        d->encMapper = nullptr;
-#endif
        d->nextReturnedEndOfData = true; // first call to next() will call fetchData()

        d->encodingDeclBytes.clear();
@ -1136,9 +1131,6 @@ QXmlInputSource::QXmlInputSource(QIODevice *dev)
 QXmlInputSource::~QXmlInputSource()
 {
    // ### close the input device.
-#if QT_CONFIG(textcodec)
-    delete d->encMapper;
-#endif
    delete d;
 }

@ -1356,77 +1348,47 @@ QString QXmlInputSource::fromRawData(const QByteArray &data, bool beginning)
 {
    if (data.size() == 0)
        return QString();
-    if (beginning) {
-        delete d->encMapper;
-        d->encMapper = nullptr;
-    }

-    int mib = 106; // UTF-8
+    if (beginning)
+        d->toUnicode = QStringDecoder();

    // This is the initial UTF codec we will read the encoding declaration with
-    if (d->encMapper == nullptr) {
+    if (!d->toUnicode.isValid()) {
        d->encodingDeclBytes.clear();
        d->encodingDeclChars.clear();
        d->lookingForEncodingDecl = true;

-        // look for byte order mark and read the first 5 characters
-        if (data.size() >= 4) {
-            uchar ch1 = data.at(0);
-            uchar ch2 = data.at(1);
-            uchar ch3 = data.at(2);
-            uchar ch4 = data.at(3);
-
-            if ((ch1 == 0 && ch2 == 0 && ch3 == 0xfe && ch4 == 0xff) ||
-                (ch1 == 0xff && ch2 == 0xfe && ch3 == 0 && ch4 == 0))
-                mib = 1017; // UTF-32 with byte order mark
-            else if (ch1 == 0x3c && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x00)
-                mib = 1019; // UTF-32LE
-            else if (ch1 == 0x00 && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x3c)
-                mib = 1018; // UTF-32BE
+        auto encoding = QStringConverter::encodingForData(data.constData(), data.size(), char16_t('<'));
+        if (encoding) {
+            d->lookingForEncodingDecl = false;
+            d->toUnicode = QStringDecoder(*encoding);
+        } else {
+            d->toUnicode = QStringDecoder(QStringDecoder::Utf8);
        }
-        if (mib == 106 && data.size() >= 2) {
-            uchar ch1 = data.at(0);
-            uchar ch2 = data.at(1);
-
-            if ((ch1 == 0xfe && ch2 == 0xff) || (ch1 == 0xff && ch2 == 0xfe))
-                mib = 1015; // UTF-16 with byte order mark
-            else if (ch1 == 0x3c && ch2 == 0x00)
-                mib = 1014; // UTF-16LE
-            else if (ch1 == 0x00 && ch2 == 0x3c)
-                mib = 1013; // UTF-16BE
-        }
-
-        QTextCodec *codec = QTextCodec::codecForMib(mib);
-        Q_ASSERT(codec);
-
-        d->encMapper = codec->makeDecoder();
    }

-    QString input = d->encMapper->toUnicode(data.constData(), data.size());
+    QString input = d->toUnicode(data.constData(), data.size());

    if (d->lookingForEncodingDecl) {
        d->encodingDeclChars += input;

        bool needMoreText;
-        QString encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText);
+        QByteArray encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText).toLatin1();

        if (!encoding.isEmpty()) {
-            if (QTextCodec *codec = QTextCodec::codecForName(std::move(encoding).toLatin1())) {
-                /* If the encoding is the same, we don't have to do toUnicode() all over again. */
-                if(codec->mibEnum() != mib) {
-                    delete d->encMapper;
-                    d->encMapper = codec->makeDecoder();
+            auto e = QStringDecoder::encodingForData(encoding.constData(), encoding.size());
+            if (e && *e != QStringDecoder::Utf8) {
+                d->toUnicode = QStringDecoder(*e);

-                    /* The variable input can potentially be large, so we deallocate
-                     * it before calling toUnicode() in order to avoid having two
-                     * large QStrings in memory simultaneously. */
-                    input.clear();
+                /* The variable input can potentially be large, so we deallocate
+                 * it before calling toUnicode() in order to avoid having two
+                 * large QStrings in memory simultaneously. */
+                input.clear();

-                    // prime the decoder with the data so far
-                    d->encMapper->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size());
-                    // now feed it the new data
-                    input = d->encMapper->toUnicode(data.constData(), data.size());
-                }
+                // prime the decoder with the data so far
+                d->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size());
+                // now feed it the new data
+                input = d->toUnicode(data.constData(), data.size());
            }
        }

--- a/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_euc-jp.xml
+++ b/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_euc-jp.xml
@ -1,78 +0,0 @@
-<?xml version="1.0" encoding="euc-jp"?>
-<!DOCTYPE 週報 SYSTEM "weekly-euc-jp.dtd">
-<!-- 週報サンプル -->
-<週報>
-  <年月週>
-    <年度>1997</年度>
-    <月度>1</月度>
-    <週>1</週>
-  </年月週>
-
-  <氏名>
-    <氏>山田</氏>
-    <名>太郎</名>
-  </氏名>
-
-  <業務報告リスト>
-    <業務報告>
-      <業務名>XMLエディターの作成</業務名>
-      <業務コード>X3355-23</業務コード>
-      <工数管理>
-        <見積もり工数>1600</見積もり工数>
-        <実績工数>320</実績工数>
-        <当月見積もり工数>160</当月見積もり工数>
-        <当月実績工数>24</当月実績工数>
-      </工数管理>
-      <予定項目リスト>
-        <予定項目>
-          <P>XMLエディターの基本仕様の作成</P>
-        </予定項目>
-      </予定項目リスト>
-      <実施事項リスト>
-        <実施事項>
-          <P>XMLエディターの基本仕様の作成</P>
-        </実施事項>
-        <実施事項>
-          <P>競合他社製品の機能調査</P>
-        </実施事項>
-      </実施事項リスト>
-      <上長への要請事項リスト>
-        <上長への要請事項>
-          <P>特になし</P>
-        </上長への要請事項>
-      </上長への要請事項リスト>
-      <問題点対策>
-        <P>XMLとは何かわからない。</P>
-      </問題点対策>
-    </業務報告>
-
-    <業務報告>
-      <業務名>検索エンジンの開発</業務名>
-      <業務コード>S8821-76</業務コード>
-      <工数管理>
-        <見積もり工数>120</見積もり工数>
-        <実績工数>6</実績工数>
-        <当月見積もり工数>32</当月見積もり工数>
-        <当月実績工数>2</当月実績工数>
-      </工数管理>
-      <予定項目リスト>
-        <予定項目>
-          <P><A href="http://www.goo.ne.jp">goo</A>の機能を調べてみる</P>
-        </予定項目>
-      </予定項目リスト>
-      <実施事項リスト>
-        <実施事項>
-          <P>更に、どういう検索エンジンがあるか調査する</P>
-        </実施事項>
-      </実施事項リスト>
-      <上長への要請事項リスト>
-        <上長への要請事項>
-          <P>開発をするのはめんどうなので、Yahoo!を買収して下さい。</P>
-        </上長への要請事項>
-      </上長への要請事項リスト>
-      <問題点対策>
-        <P>検索エンジンで車を走らせることができない。（要調査）</P>
-      </問題点対策>
-    </業務報告>
-  </業務報告リスト>
-</週報>
--- a/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_iso-2022-jp.xml.ref
+++ b/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_iso-2022-jp.xml.ref
@ -1,4 +0,0 @@
-setDocumentLocator(locator={columnNumber=1, lineNumber=1})
-startDocument()
-   processingInstruction(target="xml", data="version='1.0' encoding='iso-2022-jp'")
-   fatalError(exception={columnNumber=11, lineNumber=2, publicId="", systemId="", message="letter is expected"})