Add QStringDecoder::decoderForHtml()
Now that QStringConverter can handle non UTF encodings through ICU, add a way to get a decoder for arbitrary HTML code. Opposed to QStringConverter::encodingForHtml(), this method will try to create a valid string decoder also for non unicode codecs. Pick-to: 6.4 Change-Id: I343584da1b114396c744f482d9b433c9cedcc511 Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io> Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
d531c4b65d
commit
9c1f3b6d4d
@ -127,10 +127,9 @@ QVariant QMimeDataPrivate::retrieveTypedData(const QString &format, QMetaType ty
|
|||||||
if (ba.isNull())
|
if (ba.isNull())
|
||||||
return QVariant();
|
return QVariant();
|
||||||
if (format == "text/html"_L1) {
|
if (format == "text/html"_L1) {
|
||||||
auto encoding = QStringConverter::encodingForHtml(ba);
|
QStringDecoder decoder = QStringDecoder::decoderForHtml(ba);
|
||||||
if (encoding) {
|
if (decoder.isValid()) {
|
||||||
QStringDecoder toUtf16(*encoding);
|
return QString(decoder(ba));
|
||||||
return QString(toUtf16(ba));
|
|
||||||
}
|
}
|
||||||
// fall back to utf8
|
// fall back to utf8
|
||||||
}
|
}
|
||||||
|
@ -2049,20 +2049,8 @@ QStringConverter::encodingForData(QByteArrayView data, char16_t expectedFirstCha
|
|||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
static QByteArray parseHtmlMetaForEncoding(QByteArrayView data)
|
||||||
Tries to determine the encoding of the HTML in \a data by looking at leading byte
|
|
||||||
order marks or a charset specifier in the HTML meta tag. If the optional is empty,
|
|
||||||
the encoding specified is not supported by QStringConverter. If no encoding is
|
|
||||||
detected, the method returns Utf8.
|
|
||||||
*/
|
|
||||||
std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByteArrayView data)
|
|
||||||
{
|
{
|
||||||
// determine charset
|
|
||||||
auto encoding = encodingForData(data);
|
|
||||||
if (encoding)
|
|
||||||
// trust the initial BOM
|
|
||||||
return encoding;
|
|
||||||
|
|
||||||
static constexpr auto metaSearcher = qMakeStaticByteArrayMatcher("meta ");
|
static constexpr auto metaSearcher = qMakeStaticByteArrayMatcher("meta ");
|
||||||
static constexpr auto charsetSearcher = qMakeStaticByteArrayMatcher("charset=");
|
static constexpr auto charsetSearcher = qMakeStaticByteArrayMatcher("charset=");
|
||||||
|
|
||||||
@ -2089,14 +2077,62 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByt
|
|||||||
if (name == "unicode") // QTBUG-41998, ICU will return UTF-16.
|
if (name == "unicode") // QTBUG-41998, ICU will return UTF-16.
|
||||||
name = QByteArrayLiteral("UTF-8");
|
name = QByteArrayLiteral("UTF-8");
|
||||||
if (!name.isEmpty())
|
if (!name.isEmpty())
|
||||||
return encodingForName(name);
|
return name;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return QByteArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Tries to determine the encoding of the HTML in \a data by looking at leading byte
|
||||||
|
order marks or a charset specifier in the HTML meta tag. If the optional is empty,
|
||||||
|
the encoding specified is not supported by QStringConverter. If no encoding is
|
||||||
|
detected, the method returns Utf8.
|
||||||
|
|
||||||
|
\sa QStringDecoder::decoderForHtml()
|
||||||
|
*/
|
||||||
|
std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByteArrayView data)
|
||||||
|
{
|
||||||
|
// determine charset
|
||||||
|
std::optional<QStringConverter::Encoding> encoding = encodingForData(data);
|
||||||
|
if (encoding)
|
||||||
|
// trust the initial BOM
|
||||||
|
return encoding;
|
||||||
|
|
||||||
|
QByteArray encodingTag = parseHtmlMetaForEncoding(data);
|
||||||
|
if (!encodingTag.isEmpty())
|
||||||
|
return encodingForName(encodingTag);
|
||||||
|
|
||||||
return Utf8;
|
return Utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Tries to determine the encoding of the HTML in \a data by looking at leading byte
|
||||||
|
order marks or a charset specifier in the HTML meta tag and returns a QStringDecoder
|
||||||
|
matching the encoding. If the returned decoder is not valid,
|
||||||
|
the encoding specified is not supported by QStringConverter. If no encoding is
|
||||||
|
detected, the method returns a decoder for Utf8.
|
||||||
|
|
||||||
|
\sa isValid()
|
||||||
|
*/
|
||||||
|
QStringDecoder QStringDecoder::decoderForHtml(QByteArrayView data)
|
||||||
|
{
|
||||||
|
// determine charset
|
||||||
|
std::optional<QStringConverter::Encoding> encoding = encodingForData(data);
|
||||||
|
if (encoding)
|
||||||
|
// trust the initial BOM
|
||||||
|
return QStringDecoder(encoding.value());
|
||||||
|
|
||||||
|
QByteArray encodingTag = parseHtmlMetaForEncoding(data);
|
||||||
|
if (!encodingTag.isEmpty())
|
||||||
|
return QStringDecoder(encodingTag);
|
||||||
|
|
||||||
|
return QStringDecoder(Utf8);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Returns the canonical name for encoding \a e.
|
Returns the canonical name for encoding \a e.
|
||||||
*/
|
*/
|
||||||
|
@ -140,6 +140,9 @@ public:
|
|||||||
}
|
}
|
||||||
return iface->toUtf16(out, ba, &state);
|
return iface->toUtf16(out, ba, &state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Q_CORE_EXPORT static QStringDecoder decoderForHtml(QByteArrayView data);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
QString decodeAsString(QByteArrayView in)
|
QString decodeAsString(QByteArrayView in)
|
||||||
{
|
{
|
||||||
|
@ -288,12 +288,11 @@ void QTextBrowserPrivate::setSource(const QUrl &url, QTextDocument::ResourceType
|
|||||||
} else if (data.userType() == QMetaType::QByteArray) {
|
} else if (data.userType() == QMetaType::QByteArray) {
|
||||||
QByteArray ba = data.toByteArray();
|
QByteArray ba = data.toByteArray();
|
||||||
if (type == QTextDocument::HtmlResource) {
|
if (type == QTextDocument::HtmlResource) {
|
||||||
auto encoding = QStringConverter::encodingForHtml(ba);
|
auto decoder = QStringDecoder::decoderForHtml(ba);
|
||||||
if (!encoding)
|
if (!decoder.isValid())
|
||||||
// fall back to utf8
|
// fall back to utf8
|
||||||
encoding = QStringDecoder::Utf8;
|
decoder = QStringDecoder(QStringDecoder::Utf8);
|
||||||
QStringDecoder toUtf16(*encoding);
|
txt = decoder(ba);
|
||||||
txt = toUtf16(ba);
|
|
||||||
} else {
|
} else {
|
||||||
txt = QString::fromUtf8(ba);
|
txt = QString::fromUtf8(ba);
|
||||||
}
|
}
|
||||||
|
@ -2256,65 +2256,102 @@ void tst_QStringConverter::encodingForHtml_data()
|
|||||||
{
|
{
|
||||||
QTest::addColumn<QByteArray>("html");
|
QTest::addColumn<QByteArray>("html");
|
||||||
QTest::addColumn<std::optional<QStringConverter::Encoding>>("encoding");
|
QTest::addColumn<std::optional<QStringConverter::Encoding>>("encoding");
|
||||||
|
QTest::addColumn<QByteArray>("name"); // ICU name if we have ICU support
|
||||||
|
|
||||||
QByteArray html = "<html><head></head><body>blah</body></html>";
|
QByteArray html = "<html><head></head><body>blah</body></html>";
|
||||||
QTest::newRow("no charset") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("no charset") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-15\" /></head></html>";
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-15\" /></head></html>";
|
||||||
QTest::newRow("latin 15") << html << std::optional<QStringConverter::Encoding>();
|
QTest::newRow("latin 15") << html << std::optional<QStringConverter::Encoding>() << QByteArray("ISO-8859-15");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=SJIS\" /></head></html>";
|
||||||
|
QTest::newRow("sjis") << html << std::optional<QStringConverter::Encoding>() << QByteArray("Shift_JIS");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-2022-JP\" /></head></html>";
|
||||||
|
QTest::newRow("ISO-2022-JP") << html << std::optional<QStringConverter::Encoding>() << QByteArray("ISO-2022-JP");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-2022\" /></head></html>";
|
||||||
|
QTest::newRow("ISO-2022") << html << std::optional<QStringConverter::Encoding>() << QByteArray("ISO-2022-JP");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=GB2312\" /></head></html>";
|
||||||
|
QTest::newRow("GB2312") << html << std::optional<QStringConverter::Encoding>() << QByteArray("GB2312");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=Big5\" /></head></html>";
|
||||||
|
QTest::newRow("Big5") << html << std::optional<QStringConverter::Encoding>() << QByteArray("Big5");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=GB18030\" /></head></html>";
|
||||||
|
QTest::newRow("GB18030") << html << std::optional<QStringConverter::Encoding>() << QByteArray("GB18030");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=GB2312-HKSCS\" /></head></html>";
|
||||||
|
QTest::newRow("GB2312-HKSCS") << html << std::optional<QStringConverter::Encoding>() << QByteArray("GB2312-HKSCS");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=Big5-HKSCS\" /></head></html>";
|
||||||
|
QTest::newRow("Big5-HKSCS") << html << std::optional<QStringConverter::Encoding>() << QByteArray("Big5-HKSCS");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=EucJP\" /></head></html>";
|
||||||
|
QTest::newRow("EucJP") << html << std::optional<QStringConverter::Encoding>() << QByteArray("EUC-JP");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=EucKR\" /></head></html>";
|
||||||
|
QTest::newRow("EucKR") << html << std::optional<QStringConverter::Encoding>() << QByteArray("EUC-KR");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=KOI8-R\" /></head></html>";
|
||||||
|
QTest::newRow("KOI8-R") << html << std::optional<QStringConverter::Encoding>() << QByteArray("KOI8-R");
|
||||||
|
|
||||||
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=KOI8-U\" /></head></html>";
|
||||||
|
QTest::newRow("KOI8-U") << html << std::optional<QStringConverter::Encoding>() << QByteArray("KOI8-U");
|
||||||
|
|
||||||
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-1\" /></head></html>";
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-1\" /></head></html>";
|
||||||
QTest::newRow("latin 1") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1);
|
QTest::newRow("latin 1") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1) << QByteArray("ISO-8859-1");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta charset=\"ISO_8859-1:1987\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta charset=\"ISO_8859-1:1987\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>";
|
||||||
QTest::newRow("latin 1 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1);
|
QTest::newRow("latin 1 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1) << QByteArray("ISO-8859-1");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta charset=\"utf-8\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta charset=\"utf-8\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>";
|
||||||
QTest::newRow("UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8\"><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8\"><title>Test</title></head>";
|
||||||
QTest::newRow("UTF-8 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("UTF-8 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8/></head></html>";
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8/></head></html>";
|
||||||
QTest::newRow("UTF-8, no quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("UTF-8, no quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset='UTF-8'/></head></html>";
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset='UTF-8'/></head></html>";
|
||||||
QTest::newRow("UTF-8, single quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("UTF-8, single quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta charset=utf-8><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta charset=utf-8><title>Test</title></head>";
|
||||||
QTest::newRow("UTF-8, > terminator") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("UTF-8, > terminator") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta charset= utf-8 ><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta charset= utf-8 ><title>Test</title></head>";
|
||||||
QTest::newRow("UTF-8, > terminator with spaces") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("UTF-8, > terminator with spaces") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
// Test invalid charsets.
|
// Test invalid charsets.
|
||||||
html = "<!DOCTYPE html><html><head><meta charset= utf/8 ><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta charset= utf/8 ><title>Test</title></head>";
|
||||||
QTest::newRow("utf/8") << html << std::optional<QStringConverter::Encoding>();
|
QTest::newRow("utf/8") << html << std::optional<QStringConverter::Encoding>() << QByteArray();
|
||||||
|
|
||||||
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=invalid-foo\" /></head></html>";
|
html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=invalid-foo\" /></head></html>";
|
||||||
QTest::newRow("invalid charset, no default") << html << std::optional<QStringConverter::Encoding>();
|
QTest::newRow("invalid charset, no default") << html << std::optional<QStringConverter::Encoding>() << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"";
|
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"";
|
||||||
html.prepend(QByteArray().fill(' ', 512 - html.size()));
|
html.prepend(QByteArray().fill(' ', 512 - html.size()));
|
||||||
QTest::newRow("invalid charset (large header)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("invalid charset (large header)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8";
|
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8";
|
||||||
QTest::newRow("invalid charset (no closing double quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("invalid charset (no closing double quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset='utf-8";
|
html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset='utf-8";
|
||||||
QTest::newRow("invalid charset (no closing single quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("invalid charset (no closing single quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta charset=utf-8 foo=bar><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta charset=utf-8 foo=bar><title>Test</title></head>";
|
||||||
QTest::newRow("invalid (space terminator)") << html << std::optional<QStringConverter::Encoding>();
|
QTest::newRow("invalid (space terminator)") << html << std::optional<QStringConverter::Encoding>() << QByteArray();
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta charset=\" utf' 8 /><title>Test</title></head>";
|
html = "<!DOCTYPE html><html><head><meta charset=\" utf' 8 /><title>Test</title></head>";
|
||||||
QTest::newRow("invalid charset, early terminator (')") << html << std::optional<QStringConverter::Encoding>();
|
QTest::newRow("invalid charset, early terminator (')") << html << std::optional<QStringConverter::Encoding>() << QByteArray();
|
||||||
|
|
||||||
const char src[] = { char(0xff), char(0xfe), char(0x7a), char(0x03), 0, 0 };
|
const char src[] = { char(0xff), char(0xfe), char(0x7a), char(0x03), 0, 0 };
|
||||||
html = src;
|
html = src;
|
||||||
QTest::newRow("greek text UTF-16LE") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16LE);
|
QTest::newRow("greek text UTF-16LE") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16LE) << QByteArray("UTF-16LE");
|
||||||
|
|
||||||
|
|
||||||
html = "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><span style=\"color: rgb(0, 0, 0); font-family: "
|
html = "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><span style=\"color: rgb(0, 0, 0); font-family: "
|
||||||
@ -2322,19 +2359,26 @@ void tst_QStringConverter::encodingForHtml_data()
|
|||||||
"line-height: normal; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: "
|
"line-height: normal; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: "
|
||||||
"auto; word-spacing: 0px; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; display: inline !important; float: "
|
"auto; word-spacing: 0px; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; display: inline !important; float: "
|
||||||
"none;\">ͻ</span>\000";
|
"none;\">ͻ</span>\000";
|
||||||
QTest::newRow("greek text UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("greek text UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
|
|
||||||
html = "<!DOCTYPE html><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=unicode\">"
|
html = "<!DOCTYPE html><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=unicode\">"
|
||||||
"<head/><body><p>bla</p></body></html>"; // QTBUG-41998, ICU will return UTF-16.
|
"<head/><body><p>bla</p></body></html>"; // QTBUG-41998, ICU will return UTF-16.
|
||||||
QTest::newRow("legacy unicode UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
|
QTest::newRow("legacy unicode UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8");
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QStringConverter::encodingForHtml()
|
void tst_QStringConverter::encodingForHtml()
|
||||||
{
|
{
|
||||||
QFETCH(QByteArray, html);
|
QFETCH(QByteArray, html);
|
||||||
QFETCH(std::optional<QStringConverter::Encoding>, encoding);
|
QFETCH(std::optional<QStringConverter::Encoding>, encoding);
|
||||||
|
QFETCH(QByteArray, name);
|
||||||
|
|
||||||
QCOMPARE(QStringConverter::encodingForHtml(html), encoding);
|
QCOMPARE(QStringConverter::encodingForHtml(html), encoding);
|
||||||
|
|
||||||
|
QStringDecoder decoder = QStringDecoder::decoderForHtml(html);
|
||||||
|
if (encoding || // we should have a valid decoder independent of ICU support
|
||||||
|
decoder.isValid()) { // we got a valid decoder through ICU
|
||||||
|
QCOMPARE(decoder.name(), name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class LoadAndConvert: public QRunnable
|
class LoadAndConvert: public QRunnable
|
||||||
|
Loading…
x
Reference in New Issue
Block a user