QTextMarkdownImporter: Fix heap-buffer-overflow

After finding the end marker `---`, the code expected more characters
beyond: typically at least a trailing newline. But QStringView::sliced()
crashes if asked for a substring that starts at or beyond the end.

Now it's restructured into a separate splitFrontMatter() function, and
we're stricter, tolerating only `---\n` or `---\r\n` as marker lines.
So the code is easier to prove correct, and we don't need to check
characters between the end of the marker and the end of the line
(to allow inadvertent whitespace, for example). If the markers are
not valid, the Markdown parser will see them as thematic breaks,
as it would have done if we were not extracting the Front Matter
beforehand.

Amends e10c9b5c0f8f194a79ce12dcf9b6b5cb19976942 and
bffddc6a993c4b6b64922e8d327bdf32e0d4975a

Credit to OSS-Fuzz which found this as issue 42533775.

[ChangeLog][QtGui][Text] Fixed a heap buffer overflow in
QTextMarkdownImporter. The first marker for Front Matter
must begin at the first character of a Markdown document,
and both markers must be exactly ---\n or ---\r\n.

Done-with: Marc Mutz <marc.mutz@qt.io>
Fixes: QTBUG-135284
Pick-to: dev 6.9 6.8
Change-Id: I66412d21ecc0c4eabde443d70865ed2abad86d89
Reviewed-by: Marc Mutz <marc.mutz@qt.io>
This commit is contained in:
Shawn Rutledge 2025-03-27 15:17:21 +01:00
parent b5e47fa433
commit 2598674694
7 changed files with 84 additions and 17 deletions

View File

@ -27,7 +27,8 @@ Q_STATIC_LOGGING_CATEGORY(lcMD, "qt.text.markdown")
static const QChar qtmi_Newline = u'\n';
static const QChar qtmi_Space = u' ';
static constexpr auto markerString() noexcept { return "---"_L1; }
static constexpr auto lfMarkerString() noexcept { return "---\n"_L1; }
static constexpr auto crlfMarkerString() noexcept { return "---r\n"_L1; }
// TODO maybe eliminate the margins after all views recognize BlockQuoteLevel, CSS can format it, etc.
static const int qtmi_BlockQuoteIndent =
@ -119,6 +120,47 @@ QTextMarkdownImporter::QTextMarkdownImporter(QTextDocument *doc, QTextDocument::
{
}
/*! \internal
Split any Front Matter from the Markdown document \a md.
Returns a pair of QStringViews: if \a md begins with qualifying Front Matter
(according to the specification at https://jekyllrb.com/docs/front-matter/ ),
put it into the \c frontMatter view, omitting both markers; and put the remaining
Markdown into \c rest. If no Front Matter is found, return all of \a md in \c rest.
*/
static auto splitFrontMatter(QStringView md)
{
struct R {
QStringView frontMatter, rest;
explicit operator bool() const noexcept { return !frontMatter.isEmpty(); }
};
const auto NotFound = R{{}, md};
/* Front Matter must start with '---\n' or '---\r\n' on the very first line,
and Front Matter must end with another such line.
If that is not the case, we return NotFound: then the whole document is
to be passed on to the Markdown parser, in which '---\n' is interpreted
as a "thematic break" (like <hr/> in HTML). */
QLatin1StringView marker;
if (md.startsWith(lfMarkerString()))
marker = lfMarkerString();
else if (md.startsWith(crlfMarkerString()))
marker = crlfMarkerString();
else
return NotFound;
const auto frontMatterStart = marker.size();
const auto endMarkerPos = md.indexOf(marker, frontMatterStart);
if (endMarkerPos < 0 || md[endMarkerPos - 1] != QChar::LineFeed)
return NotFound;
Q_ASSERT(frontMatterStart < md.size());
Q_ASSERT(endMarkerPos < md.size());
const auto frontMatter = md.sliced(frontMatterStart, endMarkerPos - frontMatterStart);
return R{frontMatter, md.sliced(endMarkerPos + marker.size())};
}
void QTextMarkdownImporter::import(const QString &markdown)
{
MD_PARSER callbacks = {
@ -143,21 +185,14 @@ void QTextMarkdownImporter::import(const QString &markdown)
qCDebug(lcMD) << "default font" << defaultFont << "mono font" << m_monoFont;
QStringView md = markdown;
if (m_features.testFlag(QTextMarkdownImporter::FeatureFrontMatter) && md.startsWith(markerString())) {
qsizetype endMarkerPos = md.indexOf(markerString(), markerString().size() + 1);
if (endMarkerPos > 4) {
qsizetype firstLinePos = 4; // first line of yaml
while (md.at(firstLinePos) == '\n'_L1 || md.at(firstLinePos) == '\r'_L1)
++firstLinePos;
auto frontMatter = md.sliced(firstLinePos, endMarkerPos - firstLinePos);
firstLinePos = endMarkerPos + 4; // first line of markdown after yaml
while (md.size() > firstLinePos && (md.at(firstLinePos) == '\n'_L1 || md.at(firstLinePos) == '\r'_L1))
++firstLinePos;
md = md.sliced(firstLinePos);
doc->setMetaInformation(QTextDocument::FrontMatter, frontMatter.toString());
qCDebug(lcMD) << "extracted FrontMatter: size" << frontMatter.size();
if (m_features.testFlag(QTextMarkdownImporter::FeatureFrontMatter)) {
if (const auto split = splitFrontMatter(md)) {
doc->setMetaInformation(QTextDocument::FrontMatter, split.frontMatter.toString());
qCDebug(lcMD) << "extracted FrontMatter: size" << split.frontMatter.size();
md = split.rest;
}
}
const auto mdUtf8 = md.toUtf8();
m_cursor.beginEditBlock();
md_parse(mdUtf8.constData(), MD_SIZE(mdUtf8.size()), &callbacks, this);

View File

@ -0,0 +1,3 @@
---
name: "Pluto"---
Pluto may not be a planet. And this document does not contain Front Matter.

View File

@ -0,0 +1,5 @@
---
name: "Sloppy"
---
This document has trailing whitespace after its second Front Matter marker.
Therefore the marker does not qualify, and the document does not have Front Matter.

View File

@ -0,0 +1,4 @@
---
name: "Aborted YAML"
description: "The ending marker does not end with a newline, so it's invalid."
---

View File

@ -0,0 +1 @@
--- ---

View File

@ -0,0 +1,10 @@
---
name: "Venus"
discoverer: "Galileo Galilei"
title: "A description of the planet Venus"
keywords:
- planets
- solar system
- astronomy
---
*Venus* is the second planet from the Sun, orbiting it every 224.7 Earth days.

View File

@ -548,6 +548,7 @@ void tst_QTextMarkdownImporter::pathological_data()
QTest::addColumn<QString>("warning");
QTest::newRow("fuzz20450") << "attempted to insert into a list that no longer exists";
QTest::newRow("fuzz20580") << "";
QTest::newRow("oss-fuzz-42533775") << ""; // caused a heap-buffer-overflow
}
void tst_QTextMarkdownImporter::pathological() // avoid crashing on crazy input
@ -644,15 +645,21 @@ void tst_QTextMarkdownImporter::fencedCodeBlocks()
void tst_QTextMarkdownImporter::frontMatter_data()
{
QTest::addColumn<QString>("inputFile");
QTest::addColumn<int>("expectedFrontMatterSize");
QTest::addColumn<int>("expectedBlockCount");
QTest::newRow("yaml + markdown") << QFINDTESTDATA("data/yaml.md") << 1;
QTest::newRow("yaml only") << QFINDTESTDATA("data/yaml-only.md") << 0;
QTest::newRow("yaml + markdown") << QFINDTESTDATA("data/yaml.md") << 140 << 1;
QTest::newRow("yaml + markdown with CRLFs") << QFINDTESTDATA("data/yaml-crlf.md") << 140 << 1;
QTest::newRow("yaml only") << QFINDTESTDATA("data/yaml-only.md") << 59 << 0;
QTest::newRow("malformed 1") << QFINDTESTDATA("data/front-marker-malformed1.md") << 0 << 1;
QTest::newRow("malformed 2") << QFINDTESTDATA("data/front-marker-malformed2.md") << 0 << 2;
QTest::newRow("malformed 3") << QFINDTESTDATA("data/front-marker-malformed3.md") << 0 << 1;
}
void tst_QTextMarkdownImporter::frontMatter()
{
QFETCH(QString, inputFile);
QFETCH(int, expectedFrontMatterSize);
QFETCH(int, expectedBlockCount);
QFile f(inputFile);
@ -672,7 +679,9 @@ void tst_QTextMarkdownImporter::frontMatter()
++blockCount;
}
QCOMPARE(blockCount, expectedBlockCount); // yaml is not part of the markdown text
QCOMPARE(doc.metaInformation(QTextDocument::FrontMatter), yaml); // without fences
if (expectedFrontMatterSize)
QCOMPARE(doc.metaInformation(QTextDocument::FrontMatter), yaml); // without fences
QCOMPARE(doc.metaInformation(QTextDocument::FrontMatter).size(), expectedFrontMatterSize);
}
void tst_QTextMarkdownImporter::toRawText_data()