Always encode HTML as utf-8

When converting a text document to HTML, always convert it to utf-8,
as required by the HTML standard. This also means that we remove
the optional encoding parameter.

Change-Id: I0bd2fc9df2d06734e1c5b8053b964fbfbb6881e1
Reviewed-by: Simon Hausmann <simon.hausmann@qt.io>
This commit is contained in:
Lars Knoll 2020-04-07 10:49:17 +02:00
parent a23cb5cd06
commit 50916edd9d
8 changed files with 23 additions and 41 deletions

View File

@ -2202,7 +2202,7 @@ static QStringList resolvedFontFamilies(const QTextCharFormat &format)
perfect, especially for complex documents, due to the limitations
of HTML.
*/
QString QTextHtmlExporter::toHtml(const QByteArray &encoding, ExportMode mode)
QString QTextHtmlExporter::toHtml(ExportMode mode)
{
html = QLatin1String("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" "
"\"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
@ -2211,8 +2211,7 @@ QString QTextHtmlExporter::toHtml(const QByteArray &encoding, ExportMode mode)
fragmentMarkers = (mode == ExportFragment);
if (!encoding.isEmpty())
html += QString::fromLatin1("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%1\" />").arg(QString::fromLatin1(encoding));
html += QString::fromLatin1("<meta charset=\"utf-8\" />");
QString title = doc->metaInformation(QTextDocument::DocumentTitle);
if (!title.isEmpty())
@ -3239,23 +3238,17 @@ void QTextHtmlExporter::emitFrameStyle(const QTextFrameFormat &format, FrameType
/*!
Returns a string containing an HTML representation of the document.
The \a encoding parameter specifies the value for the charset attribute
in the html header. For example if 'utf-8' is specified then the
beginning of the generated html will look like this:
\snippet code/src_gui_text_qtextdocument.cpp 0
If no encoding is specified then no such meta information is generated.
The content of the document specifies its encoding to be UTF-8.
If you later on convert the returned html string into a byte array for
transmission over a network or when saving to disk you should specify
the encoding you're going to use for the conversion to a byte array here.
transmission over a network or when saving to disk you should use
QString::toUtf8() to convert the string to a QByteArray.
\sa {Supported HTML Subset}
*/
#ifndef QT_NO_TEXTHTMLPARSER
QString QTextDocument::toHtml(const QByteArray &encoding) const
QString QTextDocument::toHtml() const
{
return QTextHtmlExporter(this).toHtml(encoding);
return QTextHtmlExporter(this).toHtml();
}
#endif // QT_NO_TEXTHTMLPARSER

View File

@ -147,7 +147,7 @@ public:
QString metaInformation(MetaInformation info) const;
#ifndef QT_NO_TEXTHTMLPARSER
QString toHtml(const QByteArray &encoding = QByteArray()) const;
QString toHtml() const;
void setHtml(const QString &html);
#endif

View File

@ -404,7 +404,7 @@ public:
ExportFragment
};
QString toHtml(const QByteArray &encoding, ExportMode mode = ExportEntireDocument);
QString toHtml(ExportMode mode = ExportEntireDocument);
private:
enum StyleMode { EmitStyleTag, OmitStyleTag };

View File

@ -376,17 +376,16 @@ QString QTextDocumentFragment::toPlainText() const
/*!
\since 4.2
Returns the contents of the document fragment as HTML,
using the specified \a encoding (e.g., "UTF-8", "ISO 8859-1").
Returns the contents of the document fragment as HTML.
\sa toPlainText(), QTextDocument::toHtml(), QTextCodec
\sa toPlainText(), QTextDocument::toHtml()
*/
QString QTextDocumentFragment::toHtml(const QByteArray &encoding) const
QString QTextDocumentFragment::toHtml() const
{
if (!d)
return QString();
return QTextHtmlExporter(d->doc).toHtml(encoding, QTextHtmlExporter::ExportFragment);
return QTextHtmlExporter(d->doc).toHtml(QTextHtmlExporter::ExportFragment);
}
#endif // QT_NO_TEXTHTMLPARSER
@ -1268,12 +1267,6 @@ void QTextHtmlImporter::appendBlock(const QTextBlockFormat &format, QTextCharFor
*/
#ifndef QT_NO_TEXTHTMLPARSER
QTextDocumentFragment QTextDocumentFragment::fromHtml(const QString &html)
{
return fromHtml(html, nullptr);
}
/*!
\fn QTextDocumentFragment QTextDocumentFragment::fromHtml(const QString &text, const QTextDocument *resourceProvider)
\since 4.2

View File

@ -65,13 +65,12 @@ public:
QString toPlainText() const;
#ifndef QT_NO_TEXTHTMLPARSER
QString toHtml(const QByteArray &encoding = QByteArray()) const;
QString toHtml() const;
#endif // QT_NO_TEXTHTMLPARSER
static QTextDocumentFragment fromPlainText(const QString &plainText);
#ifndef QT_NO_TEXTHTMLPARSER
static QTextDocumentFragment fromHtml(const QString &html);
static QTextDocumentFragment fromHtml(const QString &html, const QTextDocument *resourceProvider);
static QTextDocumentFragment fromHtml(const QString &html, const QTextDocument *resourceProvider = nullptr);
#endif // QT_NO_TEXTHTMLPARSER
private:

View File

@ -276,11 +276,7 @@ bool QTextDocumentWriter::write(const QTextDocument *document)
qWarning("QTextDocumentWriter::write: the device cannot be opened for writing");
return false;
}
QTextStream ts(d->device);
#if QT_CONFIG(textcodec)
ts.setCodec("utf-8");
ts << document->toHtml("utf-8");
#endif
d->device->write(document->toHtml().toUtf8());
d->device->close();
return true;
}

View File

@ -3424,7 +3424,7 @@ void QTextEditMimeData::setup() const
{
QTextEditMimeData *that = const_cast<QTextEditMimeData *>(this);
#ifndef QT_NO_TEXTHTMLPARSER
that->setData(QLatin1String("text/html"), fragment.toHtml("utf-8").toUtf8());
that->setData(QLatin1String("text/html"), fragment.toHtml().toUtf8());
#endif
#ifndef QT_NO_TEXTODFWRITER
{

View File

@ -283,7 +283,7 @@ void tst_QTextDocument::init()
htmlHead = QString("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" "
"\"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><meta charset=\"utf-8\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head>"
"<body style=\" font-family:'%1'; font-size:%2; font-weight:%3; font-style:%4;\">\n");
@ -1848,7 +1848,7 @@ void tst_QTextDocument::toHtmlBodyBgColor()
QString expectedHtml("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" "
"\"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><meta charset=\"utf-8\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head>"
"<body style=\" font-family:'%1'; font-size:%2; font-weight:%3; font-style:%4;\""
@ -1877,7 +1877,7 @@ void tst_QTextDocument::toHtmlBodyBgColorRgba()
QString expectedHtml("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" "
"\"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><meta charset=\"utf-8\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head>"
"<body style=\" font-family:'%1'; font-size:%2; font-weight:%3; font-style:%4;\""
@ -1905,7 +1905,7 @@ void tst_QTextDocument::toHtmlBodyBgColorTransparent()
QString expectedHtml("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" "
"\"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><meta charset=\"utf-8\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head>"
"<body style=\" font-family:'%1'; font-size:%2; font-weight:%3; font-style:%4;\""
@ -2324,6 +2324,7 @@ void tst_QTextDocument::clonePreservesFormatsWhenEmpty()
QCOMPARE(cloneCursor.blockCharFormat().fontWeight(), charFormat.fontWeight());
QCOMPARE(cloneCursor.blockFormat().alignment(), blockFormat.alignment());
QCOMPARE(cloneCursor.blockFormat().indent(), blockFormat.indent());
delete clone;
}
void tst_QTextDocument::blockCount()
@ -2730,7 +2731,7 @@ void tst_QTextDocument::backgroundImage_checkExpectedHtml(const QTextDocument &d
{
QString expectedHtml("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" "
"\"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><meta charset=\"utf-8\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head>"
"<body style=\" font-family:'%1'; font-size:%2; font-weight:%3; font-style:%4;\">\n"