diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp index 1898f4fbe8a..d1af7e42daf 100644 --- a/src/xml/sax/qxml.cpp +++ b/src/xml/sax/qxml.cpp @@ -244,9 +244,7 @@ public: int pos; int length; bool nextReturnedEndOfData; -#if QT_CONFIG(textcodec) - QTextDecoder *encMapper; -#endif + QStringDecoder toUnicode; QByteArray encodingDeclBytes; QString encodingDeclChars; @@ -1090,9 +1088,6 @@ void QXmlInputSource::init() d->inputStream = nullptr; setData(QString()); -#if QT_CONFIG(textcodec) - d->encMapper = nullptr; -#endif d->nextReturnedEndOfData = true; // first call to next() will call fetchData() d->encodingDeclBytes.clear(); @@ -1136,9 +1131,6 @@ QXmlInputSource::QXmlInputSource(QIODevice *dev) QXmlInputSource::~QXmlInputSource() { // ### close the input device. -#if QT_CONFIG(textcodec) - delete d->encMapper; -#endif delete d; } @@ -1356,77 +1348,47 @@ QString QXmlInputSource::fromRawData(const QByteArray &data, bool beginning) { if (data.size() == 0) return QString(); - if (beginning) { - delete d->encMapper; - d->encMapper = nullptr; - } - int mib = 106; // UTF-8 + if (beginning) + d->toUnicode = QStringDecoder(); // This is the initial UTF codec we will read the encoding declaration with - if (d->encMapper == nullptr) { + if (!d->toUnicode.isValid()) { d->encodingDeclBytes.clear(); d->encodingDeclChars.clear(); d->lookingForEncodingDecl = true; - // look for byte order mark and read the first 5 characters - if (data.size() >= 4) { - uchar ch1 = data.at(0); - uchar ch2 = data.at(1); - uchar ch3 = data.at(2); - uchar ch4 = data.at(3); - - if ((ch1 == 0 && ch2 == 0 && ch3 == 0xfe && ch4 == 0xff) || - (ch1 == 0xff && ch2 == 0xfe && ch3 == 0 && ch4 == 0)) - mib = 1017; // UTF-32 with byte order mark - else if (ch1 == 0x3c && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x00) - mib = 1019; // UTF-32LE - else if (ch1 == 0x00 && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x3c) - mib = 1018; // UTF-32BE + auto encoding = QStringConverter::encodingForData(data.constData(), data.size(), char16_t('<')); + if (encoding) { + d->lookingForEncodingDecl = false; + d->toUnicode = QStringDecoder(*encoding); + } else { + d->toUnicode = QStringDecoder(QStringDecoder::Utf8); } - if (mib == 106 && data.size() >= 2) { - uchar ch1 = data.at(0); - uchar ch2 = data.at(1); - - if ((ch1 == 0xfe && ch2 == 0xff) || (ch1 == 0xff && ch2 == 0xfe)) - mib = 1015; // UTF-16 with byte order mark - else if (ch1 == 0x3c && ch2 == 0x00) - mib = 1014; // UTF-16LE - else if (ch1 == 0x00 && ch2 == 0x3c) - mib = 1013; // UTF-16BE - } - - QTextCodec *codec = QTextCodec::codecForMib(mib); - Q_ASSERT(codec); - - d->encMapper = codec->makeDecoder(); } - QString input = d->encMapper->toUnicode(data.constData(), data.size()); + QString input = d->toUnicode(data.constData(), data.size()); if (d->lookingForEncodingDecl) { d->encodingDeclChars += input; bool needMoreText; - QString encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText); + QByteArray encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText).toLatin1(); if (!encoding.isEmpty()) { - if (QTextCodec *codec = QTextCodec::codecForName(std::move(encoding).toLatin1())) { - /* If the encoding is the same, we don't have to do toUnicode() all over again. */ - if(codec->mibEnum() != mib) { - delete d->encMapper; - d->encMapper = codec->makeDecoder(); + auto e = QStringDecoder::encodingForData(encoding.constData(), encoding.size()); + if (e && *e != QStringDecoder::Utf8) { + d->toUnicode = QStringDecoder(*e); - /* The variable input can potentially be large, so we deallocate - * it before calling toUnicode() in order to avoid having two - * large QStrings in memory simultaneously. */ - input.clear(); + /* The variable input can potentially be large, so we deallocate + * it before calling toUnicode() in order to avoid having two + * large QStrings in memory simultaneously. */ + input.clear(); - // prime the decoder with the data so far - d->encMapper->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size()); - // now feed it the new data - input = d->encMapper->toUnicode(data.constData(), data.size()); - } + // prime the decoder with the data so far + d->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size()); + // now feed it the new data + input = d->toUnicode(data.constData(), data.size()); } } diff --git a/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_euc-jp.xml b/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_euc-jp.xml deleted file mode 100644 index 887ab5ec30d..00000000000 --- a/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_euc-jp.xml +++ /dev/null @@ -1,78 +0,0 @@ - - - -<週報> - <年月週> - <年度>1997 - <月度>1 - <週>1 - - - <氏名> - <氏>山田 - <名>太郎 - - - <業務報告リスト> - <業務報告> - <業務名>XMLエディターの作成 - <業務コード>X3355-23 - <工数管理> - <見積もり工数>1600 - <実績工数>320 - <当月見積もり工数>160 - <当月実績工数>24 - - <予定項目リスト> - <予定項目> -

XMLエディターの基本仕様の作成

- - - <実施事項リスト> - <実施事項> -

XMLエディターの基本仕様の作成

- - <実施事項> -

競合他社製品の機能調査

- - - <上長への要請事項リスト> - <上長への要請事項> -

特になし

- - - <問題点対策> -

XMLとは何かわからない。

- - - - <業務報告> - <業務名>検索エンジンの開発 - <業務コード>S8821-76 - <工数管理> - <見積もり工数>120 - <実績工数>6 - <当月見積もり工数>32 - <当月実績工数>2 - - <予定項目リスト> - <予定項目> -

gooの機能を調べてみる

- - - <実施事項リスト> - <実施事項> -

更に、どういう検索エンジンがあるか調査する

- - - <上長への要請事項リスト> - <上長への要請事項> -

開発をするのはめんどうなので、Yahoo!を買収して下さい。

- - - <問題点対策> -

検索エンジンで車を走らせることができない。(要調査)

- - - - diff --git a/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_iso-2022-jp.xml.ref b/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_iso-2022-jp.xml.ref deleted file mode 100644 index 28dcdd98c4f..00000000000 --- a/tests/auto/xml/sax/qxmlsimplereader/encodings/doc_iso-2022-jp.xml.ref +++ /dev/null @@ -1,4 +0,0 @@ -setDocumentLocator(locator={columnNumber=1, lineNumber=1}) -startDocument() - processingInstruction(target="xml", data="version='1.0' encoding='iso-2022-jp'") - fatalError(exception={columnNumber=11, lineNumber=2, publicId="", systemId="", message="letter is expected"})