diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp
index 3c05fed6a69..1acee65db69 100644
--- a/src/gui/text/qtextmarkdownwriter.cpp
+++ b/src/gui/text/qtextmarkdownwriter.cpp
@@ -301,25 +301,20 @@ static void maybeEscapeFirstChar(QString &s)
}
/*! \internal
- Escape unescaped backslashes. Then escape any special character that stands
+ Escape all backslashes. Then escape any special character that stands
alone or prefixes a "word", including the \c < that starts an HTML tag.
https://spec.commonmark.org/0.31.2/#backslash-escapes
*/
static void escapeSpecialCharacters(QString &s)
{
- static const QRegularExpression backslashRe(uR"([^\\]\\)"_s);
static const QRegularExpression spaceRe(uR"(\s+)"_s);
static const QRegularExpression specialRe(uR"([= 0) {
- if (int j = s.indexOf(backslashRe, i); j >= 0) {
- ++j; // we found some char before the backslash that needs escaping
- if (s.size() == j + 1 || s.at(j + 1) != qtmw_Backslash)
- s.insert(j, qtmw_Backslash);
- i = j + 3;
- }
- if (int j = s.indexOf(specialRe, i); j >= 0 && (j == 0 || s.at(j - 1) != u'\\')) {
+ if (int j = s.indexOf(specialRe, i); j >= 0) {
s.insert(j, qtmw_Backslash);
i = j + 3;
}
diff --git a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
index 0c889947a83..c0aa37286af 100644
--- a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
@@ -43,6 +43,8 @@ private slots:
void pathological();
void fencedCodeBlocks_data();
void fencedCodeBlocks();
+ void toRawText_data();
+ void toRawText();
private:
bool isMainFontFixed();
@@ -536,6 +538,10 @@ void tst_QTextMarkdownImporter::fencedCodeBlocks_data()
<< "```pseudocode\nprint('hello world\\n')\n```\n"
<< 1 << 0 << "pseudocode" << "`"
<< "```pseudocode\nprint('hello world\\n')\n```\n\n";
+ QTest::newRow("backtick fence with punctuated language")
+ << "```html+js\n
blah\n```\n"
+ << 1 << 0 << "html+js" << "`"
+ << "```html+js\nblah\n```\n\n";
QTest::newRow("tilde fence with language")
<< "~~~pseudocode\nprint('hello world\\n')\n~~~\n"
<< 1 << 0 << "pseudocode" << "~"
@@ -595,5 +601,101 @@ void tst_QTextMarkdownImporter::fencedCodeBlocks()
QCOMPARE(doc.toMarkdown(), rewrite);
}
+void tst_QTextMarkdownImporter::toRawText_data()
+{
+ QTest::addColumn("input");
+ QTest::addColumn("expectedRawText");
+
+ // tests to verify that fixing QTBUG-122083 is safe
+ // https://spec.commonmark.org/0.31.2/#example-12
+ QTest::newRow("punctuation backslash escapes") <<
+ R"(\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~)" <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)";
+ // https://spec.commonmark.org/0.31.2/#example-13
+ QTest::newRow("literal backslashes") <<
+ QString::fromUtf16(uR"(\→\A\a\ \3\φ\«)") <<
+ QString::fromUtf16(uR"(\→\A\a\ \3\φ\«)");
+ // https://spec.commonmark.org/0.31.2/#example-14
+ QTest::newRow("escape to avoid em") <<
+ R"(\*not emphasized*)" <<
+ R"(*not emphasized*)";
+ QTest::newRow("escape to avoid html") <<
+ R"(\
not a tag)" <<
+ R"(
not a tag)";
+ QTest::newRow("escape to avoid link") <<
+ R"(\[not a link](/foo))" <<
+ R"([not a link](/foo))";
+ QTest::newRow("escape to avoid mono") <<
+ R"(\`not code`)" <<
+ R"(`not code`)";
+ QTest::newRow("escape to avoid num list") <<
+ R"(1\. not a list)" <<
+ R"(1. not a list)";
+ QTest::newRow("escape to avoid list") <<
+ R"(\* not a list)" <<
+ R"(* not a list)";
+ QTest::newRow("escape to avoid heading") <<
+ R"(\# not a heading)" <<
+ R"(# not a heading)";
+ QTest::newRow("escape to avoid reflink") <<
+ R"(\[foo]: /url "not a reference")" <<
+ R"([foo]: /url "not a reference")";
+ QTest::newRow("escape to avoid entity") <<
+ R"(\ö not a character entity)" <<
+ R"(ö not a character entity)";
+ // https://spec.commonmark.org/0.31.2/#example-15
+ QTest::newRow("escape backslash only") <<
+ R"(\\*emphasis*)" <<
+ R"(\emphasis)";
+ // https://spec.commonmark.org/0.31.2/#example-16
+ QTest::newRow("backslash line break") <<
+ "foo\\\nbar" <<
+ "foo\u2029bar";
+ // https://spec.commonmark.org/0.31.2/#example-17
+ QTest::newRow("backslash in mono span") <<
+ R"(`` \[\` ``)" <<
+ R"(\[\`)";
+ // https://spec.commonmark.org/0.31.2/#example-18
+ QTest::newRow("backslash in indented code") <<
+ R"( \[\])" <<
+ R"(\[\])";
+ // https://spec.commonmark.org/0.31.2/#example-19
+ QTest::newRow("backslash in fenced code") <<
+ "~~~\n\\[\\]\n~~~" <<
+ R"(\[\])";
+ // https://spec.commonmark.org/0.31.2/#example-20
+ QTest::newRow("backslash in autolink") <<
+ R"()" <<
+ R"(https://example.com?find=\*)";
+ // https://spec.commonmark.org/0.31.2/#example-21
+ QTest::newRow("backslash in autolink") <<
+ "1011011 [ | 1011100 backslash \\ |
" <<
"|1011011 [|1011100 backslash \\\\|";
+ // https://spec.commonmark.org/0.31.2/#example-12
+ // escaping punctuation is ok, but QTextMarkdownWriter currently doesn't do that (which is also ok)
+ QTest::newRow("punctuation") <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
)" <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)";
+ // https://spec.commonmark.org/0.31.2/#example-14
+ QTest::newRow("backslash asterisk no emphasis") << // QTBUG-122083
+ R"(\*no emphasis*)" <<
+ R"(\\\*no emphasis*)";
+ // https://spec.commonmark.org/0.31.2/#example-15
+ QTest::newRow("backslash before emphasis") <<
+ R"(\emphasis)" <<
+ R"(\\*emphasis*)";
+ // https://spec.commonmark.org/0.31.2/#example-20
+ QTest::newRow("backslash-asterisk in autolink") <<
+ R"(https://example.com?find=\*
)" <<
+ R"()";
+ // https://spec.commonmark.org/0.31.2/#example-24
+ QTest::newRow("plus in fenced code lang") <<
+ "foo
" <<
+ "```foo+bar\nfoo\n```";
}
void tst_QTextMarkdownWriter::fromHtml()
@@ -850,12 +873,90 @@ void tst_QTextMarkdownWriter::fromHtml()
QCOMPARE(output, expectedOutput);
}
+void tst_QTextMarkdownWriter::fromPlainTextAndBack_data()
+{
+ QTest::addColumn("input");
+ QTest::addColumn("expectedMarkdown");
+
+ // tests to verify that fixing QTBUG-122083 is safe
+ QTest::newRow("single backslashes") <<
+ R"(\ again: \ not esc: \* \-\-\ \*abc*)" <<
+ R"(\\ again: \\ not esc: \\* \\-\\-\\ \\\*abc*)";
+ // https://spec.commonmark.org/0.31.2/#example-12
+ QTest::newRow("punctuation") <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)" <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)";
+ // https://spec.commonmark.org/0.31.2/#example-13
+ QTest::newRow("literal backslashes") <<
+ QString::fromUtf16(uR"(\→\A\a\ \3\φ\«)") <<
+ "\\\\\u2192\\\\A\\\\a\\\\ \\\\3\\\\\u03C6\\\\\u00AB";
+ // https://spec.commonmark.org/0.31.2/#example-14
+ QTest::newRow("escape to avoid em") <<
+ R"(*not emphasized*)" <<
+ R"(\*not emphasized*)";
+ QTest::newRow("escape to avoid html") <<
+ R"(
not a tag)" <<
+ R"(\
not a tag)";
+ QTest::newRow("escape to avoid link") <<
+ R"([not a link](/foo))" <<
+ R"(\[not a link](/foo))";
+ QTest::newRow("escape to avoid mono") <<
+ R"(`not code`)" <<
+ R"(\`not code`)";
+ QTest::newRow("escape to avoid num list") <<
+ R"(1. not a list)" <<
+ R"(1\. not a list)";
+ QTest::newRow("escape to avoid list") <<
+ R"(* not a list)" <<
+ R"(\* not a list)";
+ QTest::newRow("escape to avoid heading") <<
+ R"(# not a heading)" <<
+ R"(\# not a heading)";
+ QTest::newRow("escape to avoid reflink") <<
+ R"([foo]: /url "not a reference")" <<
+ R"(\[foo]: /url "not a reference")";
+ QTest::newRow("escape to avoid entity") <<
+ R"(ö not a character entity)" <<
+ R"(\ö not a character entity)";
+ // end of tests to verify that fixing QTBUG-122083 is safe
+ // (it's ok to add unrelated plain-to-markdown-to-plaintext cases later)
+}
+
+void tst_QTextMarkdownWriter::fromPlainTextAndBack()
+{
+ QFETCH(QString, input);
+ QFETCH(QString, expectedMarkdown);
+
+ document->setPlainText(input);
+ QString output = documentToUnixMarkdown();
+
+#ifdef DEBUG_WRITE_OUTPUT
+ {
+ QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".md");
+ out.open(QFile::WriteOnly);
+ out.write(output.toUtf8());
+ out.close();
+ }
+#endif
+
+ output = output.trimmed();
+ expectedMarkdown = expectedMarkdown.trimmed();
+ if (output != expectedMarkdown && (isMainFontFixed() || isFixedFontProportional()))
+ QSKIP("", "fixed main font or proportional fixed font (QTBUG-103484)");
+ QCOMPARE(output, expectedMarkdown);
+ QCOMPARE(document->toPlainText(), input);
+ document->setMarkdown(output);
+ QCOMPARE(document->toPlainText(), input);
+ if (document->blockCount() == 1)
+ QCOMPARE(document->firstBlock().text(), input);
+}
+
void tst_QTextMarkdownWriter::escapeSpecialCharacters_data()
{
QTest::addColumn("input");
QTest::addColumn("expectedOutput");
- QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\ baz \\\\";
+ QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\\\\\ baz \\\\";
QTest::newRow("not emphasized") << "*normal* **normal too**" << "\\*normal* \\**normal too**";
QTest::newRow("not code") << "`normal` `normal too`" << "\\`normal` \\`normal too`";
QTest::newRow("code fence") << "```not a fence; ``` no risk here; ```not a fence" // TODO slightly inconsistent