Extract and re-write "front matter" in markdown documents
It's increasingly common for YAML to be used as metadata in front of markdown documents. md4c does not handle this, so we need to remove it ahead of time, lest md4c misinterpret it as heading text or so. The --- fences are expected to be consistent regardless of the format of what's between them, and the yaml (or whatever) parser does not need to see them. So we remove them while reading, and QTextMarkdownWriter writes them around the front matter if there is any. If your application needs to parse this "front matter", just call qtd->metaInformation(QTextDocument::FrontMatter).toUtf8() and feed that to some parser that you've linked in, such as yaml-cpp. Since YAML is used with GitHub Docs, we consider this feature to be part of the GitHub dialect: https://docs.github.com/en/contributing/writing-for-github-docs/using-yaml-frontmatter [ChangeLog][QtGui][Text] Markdown "front matter" (usually YAML) is now extracted during parsing (GitHub dialect) and can be retrieved from QTextDocument::metaInformation(FrontMatter). QTextMarkdownWriter also writes front matter (if any) to the output. Fixes: QTBUG-120722 Change-Id: I220ddcd2b94c99453853643516ca7a36bb2bcd6f Reviewed-by: Axel Spoerl <axel.spoerl@qt.io>
This commit is contained in:
parent
216af5d7f9
commit
bffddc6a99
@ -1159,6 +1159,8 @@ QString QTextDocument::metaInformation(MetaInformation info) const
|
||||
return d->url;
|
||||
case CssMedia:
|
||||
return d->cssMedia;
|
||||
case FrontMatter:
|
||||
return d->frontMatter;
|
||||
}
|
||||
return QString();
|
||||
}
|
||||
@ -1182,6 +1184,9 @@ void QTextDocument::setMetaInformation(MetaInformation info, const QString &stri
|
||||
case CssMedia:
|
||||
d->cssMedia = string;
|
||||
break;
|
||||
case FrontMatter:
|
||||
d->frontMatter = string;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1327,6 +1332,10 @@ void QTextDocument::setHtml(const QString &html)
|
||||
\value CssMedia This value is used to select the corresponding '@media'
|
||||
rule, if any, from a specified CSS stylesheet when setHtml()
|
||||
is called. This enum value has been introduced in Qt 6.3.
|
||||
\value FrontMatter This value is used to select header material, if any was
|
||||
extracted during parsing of the source file (currently
|
||||
only from Markdown format). This enum value has been
|
||||
introduced in Qt 6.8.
|
||||
|
||||
\sa metaInformation(), setMetaInformation(), setHtml()
|
||||
*/
|
||||
|
@ -105,7 +105,8 @@ public:
|
||||
enum MetaInformation {
|
||||
DocumentTitle,
|
||||
DocumentUrl,
|
||||
CssMedia
|
||||
CssMedia,
|
||||
FrontMatter,
|
||||
};
|
||||
void setMetaInformation(MetaInformation info, const QString &);
|
||||
QString metaInformation(MetaInformation info) const;
|
||||
@ -119,7 +120,7 @@ public:
|
||||
enum MarkdownFeature {
|
||||
MarkdownNoHTML = 0x0020 | 0x0040,
|
||||
MarkdownDialectCommonMark = 0,
|
||||
MarkdownDialectGitHub = 0x0004 | 0x0008 | 0x0400 | 0x0100 | 0x0200 | 0x0800 | 0x4000
|
||||
MarkdownDialectGitHub = 0x0004 | 0x0008 | 0x0400 | 0x0100 | 0x0200 | 0x0800 | 0x4000 | 0x100000
|
||||
};
|
||||
Q_DECLARE_FLAGS(MarkdownFeatures, MarkdownFeature)
|
||||
Q_FLAG(MarkdownFeatures)
|
||||
|
@ -356,6 +356,7 @@ public:
|
||||
QString title;
|
||||
QString url;
|
||||
QString cssMedia;
|
||||
QString frontMatter;
|
||||
qreal indentWidth;
|
||||
qreal documentMargin;
|
||||
QUrl baseUrl;
|
||||
|
@ -46,7 +46,8 @@ static_assert(int(QTextMarkdownImporter::FeaturePermissiveAutoLinks) == MD_FLAG_
|
||||
static_assert(int(QTextMarkdownImporter::FeatureTasklists) == MD_FLAG_TASKLISTS);
|
||||
static_assert(int(QTextMarkdownImporter::FeatureNoHTML) == MD_FLAG_NOHTML);
|
||||
static_assert(int(QTextMarkdownImporter::DialectCommonMark) == MD_DIALECT_COMMONMARK);
|
||||
static_assert(int(QTextMarkdownImporter::DialectGitHub) == (MD_DIALECT_GITHUB | MD_FLAG_UNDERLINE));
|
||||
static_assert(int(QTextMarkdownImporter::DialectGitHub) ==
|
||||
(MD_DIALECT_GITHUB | MD_FLAG_UNDERLINE | QTextMarkdownImporter::FeatureFrontMatter));
|
||||
|
||||
// --------------------------------------------------------
|
||||
// MD4C callback function wrappers
|
||||
@ -139,6 +140,21 @@ void QTextMarkdownImporter::import(const QString &markdown)
|
||||
m_monoFont.setPixelSize(defaultFont.pixelSize());
|
||||
qCDebug(lcMD) << "default font" << defaultFont << "mono font" << m_monoFont;
|
||||
QByteArray md = markdown.toUtf8();
|
||||
if (md.startsWith("---") && m_features.testFlag(QTextMarkdownImporter::FeatureFrontMatter)) {
|
||||
qsizetype endMarkerPos = md.indexOf("---", 4);
|
||||
if (endMarkerPos > 4) {
|
||||
qsizetype firstLinePos = 4; // first line of yaml
|
||||
while (md.at(firstLinePos) == '\n' || md.at(firstLinePos) == '\r')
|
||||
++firstLinePos;
|
||||
QByteArray frontMatter = md.sliced(firstLinePos, endMarkerPos - firstLinePos);
|
||||
firstLinePos = endMarkerPos + 4; // first line of markdown after yaml
|
||||
while (md.at(firstLinePos) == '\n' || md.at(firstLinePos) == '\r')
|
||||
++firstLinePos;
|
||||
md.remove(0, firstLinePos);
|
||||
doc->setMetaInformation(QTextDocument::FrontMatter, QString::fromUtf8(frontMatter));
|
||||
qCDebug(lcMD) << "extracted FrontMatter: size" << frontMatter.size();
|
||||
}
|
||||
}
|
||||
m_cursor.beginEditBlock();
|
||||
md_parse(md.constData(), MD_SIZE(md.size()), &callbacks, this);
|
||||
m_cursor.endEditBlock();
|
||||
|
@ -46,6 +46,7 @@ public:
|
||||
FeaturePermissiveWWWAutoLinks = 0x0400,
|
||||
FeatureTasklists = 0x0800,
|
||||
FeatureUnderline = 0x4000,
|
||||
FeatureFrontMatter = 0x100000, // Qt feature, not yet in MD4C
|
||||
// composite flags
|
||||
FeaturePermissiveAutoLinks = FeaturePermissiveMailAutoLinks
|
||||
| FeaturePermissiveURLAutoLinks | FeaturePermissiveWWWAutoLinks,
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "qtexttable.h"
|
||||
#include "qtextcursor.h"
|
||||
#include "qtextimagehandler_p.h"
|
||||
#include "qtextmarkdownimporter_p.h"
|
||||
#include "qloggingcategory.h"
|
||||
#if QT_CONFIG(itemmodel)
|
||||
#include "qabstractitemmodel.h"
|
||||
@ -38,6 +39,7 @@ QTextMarkdownWriter::QTextMarkdownWriter(QTextStream &stream, QTextDocument::Mar
|
||||
|
||||
bool QTextMarkdownWriter::writeAll(const QTextDocument *document)
|
||||
{
|
||||
writeFrontMatter(document->metaInformation(QTextDocument::FrontMatter));
|
||||
writeFrame(document->rootFrame());
|
||||
return true;
|
||||
}
|
||||
@ -76,6 +78,17 @@ void QTextMarkdownWriter::writeTable(const QAbstractItemModel *table)
|
||||
}
|
||||
#endif
|
||||
|
||||
void QTextMarkdownWriter::writeFrontMatter(const QString &fm)
|
||||
{
|
||||
if (fm.isEmpty() || !m_features.testFlag(static_cast<QTextDocument::MarkdownFeature>(
|
||||
QTextMarkdownImporter::FeatureFrontMatter)))
|
||||
return;
|
||||
m_stream << "---\n"_L1 << fm;
|
||||
if (!fm.endsWith(qtmw_Newline))
|
||||
m_stream << qtmw_Newline;
|
||||
m_stream << "---\n"_L1;
|
||||
}
|
||||
|
||||
void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
|
||||
{
|
||||
Q_ASSERT(frame);
|
||||
|
@ -36,6 +36,7 @@ public:
|
||||
|
||||
int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat, bool ignoreEmpty);
|
||||
void writeFrame(const QTextFrame *frame);
|
||||
void writeFrontMatter(const QString &fm);
|
||||
|
||||
private:
|
||||
struct ListInfo {
|
||||
|
11
tests/auto/gui/text/qtextmarkdownimporter/data/yaml.md
Normal file
11
tests/auto/gui/text/qtextmarkdownimporter/data/yaml.md
Normal file
@ -0,0 +1,11 @@
|
||||
---
|
||||
name: "Venus"
|
||||
discoverer: "Galileo Galilei"
|
||||
title: "A description of the planet Venus"
|
||||
keywords:
|
||||
- planets
|
||||
- solar system
|
||||
- astronomy
|
||||
---
|
||||
*Venus* is the second planet from the Sun, orbiting it every 224.7 Earth days.
|
||||
|
@ -43,6 +43,7 @@ private slots:
|
||||
void pathological();
|
||||
void fencedCodeBlocks_data();
|
||||
void fencedCodeBlocks();
|
||||
void frontMatter();
|
||||
|
||||
private:
|
||||
bool isMainFontFixed();
|
||||
@ -595,5 +596,27 @@ void tst_QTextMarkdownImporter::fencedCodeBlocks()
|
||||
QCOMPARE(doc.toMarkdown(), rewrite);
|
||||
}
|
||||
|
||||
void tst_QTextMarkdownImporter::frontMatter()
|
||||
{
|
||||
QFile f(QFINDTESTDATA("data/yaml.md"));
|
||||
QVERIFY(f.open(QFile::ReadOnly | QIODevice::Text));
|
||||
QString md = QString::fromUtf8(f.readAll());
|
||||
f.close();
|
||||
const int yamlBegin = md.indexOf("name:");
|
||||
const int yamlEnd = md.indexOf("---", yamlBegin);
|
||||
const QString yaml = md.sliced(yamlBegin, yamlEnd - yamlBegin);
|
||||
|
||||
QTextDocument doc;
|
||||
QTextMarkdownImporter(&doc, QTextMarkdownImporter::DialectGitHub).import(md);
|
||||
int blockCount = 0;
|
||||
for (QTextFrame::iterator iterator = doc.rootFrame()->begin(); !iterator.atEnd(); ++iterator) {
|
||||
// Check whether the block is text or a horizontal rule
|
||||
if (!iterator.currentBlock().text().isEmpty())
|
||||
++blockCount;
|
||||
}
|
||||
QCOMPARE(blockCount, 1); // yaml is not part of the markdown text
|
||||
QCOMPARE(doc.metaInformation(QTextDocument::FrontMatter), yaml); // without fences
|
||||
}
|
||||
|
||||
QTEST_MAIN(tst_QTextMarkdownImporter)
|
||||
#include "tst_qtextmarkdownimporter.moc"
|
||||
|
11
tests/auto/gui/text/qtextmarkdownwriter/data/yaml.md
Normal file
11
tests/auto/gui/text/qtextmarkdownwriter/data/yaml.md
Normal file
@ -0,0 +1,11 @@
|
||||
---
|
||||
name: "Venus"
|
||||
discoverer: "Galileo Galilei"
|
||||
title: "A description of the planet Venus"
|
||||
keywords:
|
||||
- planets
|
||||
- solar system
|
||||
- astronomy
|
||||
---
|
||||
*Venus* is the second planet from the Sun, orbiting it every 224.7 Earth days.
|
||||
|
@ -36,6 +36,7 @@ private slots:
|
||||
void testWriteNestedNumericLists();
|
||||
void testWriteNumericListWithStart();
|
||||
void testWriteTable();
|
||||
void frontMatter();
|
||||
void rewriteDocument_data();
|
||||
void rewriteDocument();
|
||||
void fromHtml_data();
|
||||
@ -525,6 +526,16 @@ void tst_QTextMarkdownWriter::testWriteTable()
|
||||
QCOMPARE(md, expected);
|
||||
}
|
||||
|
||||
void tst_QTextMarkdownWriter::frontMatter()
|
||||
{
|
||||
QTextCursor cursor(document);
|
||||
cursor.insertText("bar");
|
||||
document->setMetaInformation(QTextDocument::FrontMatter, "foo");
|
||||
|
||||
const QString output = documentToUnixMarkdown();
|
||||
QCOMPARE(output, "---\nfoo\n---\nbar\n\n");
|
||||
}
|
||||
|
||||
void tst_QTextMarkdownWriter::rewriteDocument_data()
|
||||
{
|
||||
QTest::addColumn<QString>("inputFile");
|
||||
@ -535,6 +546,7 @@ void tst_QTextMarkdownWriter::rewriteDocument_data()
|
||||
QTest::newRow("word wrap") << "wordWrap.md";
|
||||
QTest::newRow("links") << "links.md";
|
||||
QTest::newRow("lists and code blocks") << "listsAndCodeBlocks.md";
|
||||
QTest::newRow("front matter") << "yaml.md";
|
||||
}
|
||||
|
||||
void tst_QTextMarkdownWriter::rewriteDocument()
|
||||
|
Loading…
x
Reference in New Issue
Block a user