diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp index 3c05fed6a69..1acee65db69 100644 --- a/src/gui/text/qtextmarkdownwriter.cpp +++ b/src/gui/text/qtextmarkdownwriter.cpp @@ -301,25 +301,20 @@ static void maybeEscapeFirstChar(QString &s) } /*! \internal - Escape unescaped backslashes. Then escape any special character that stands + Escape all backslashes. Then escape any special character that stands alone or prefixes a "word", including the \c < that starts an HTML tag. https://spec.commonmark.org/0.31.2/#backslash-escapes */ static void escapeSpecialCharacters(QString &s) { - static const QRegularExpression backslashRe(uR"([^\\]\\)"_s); static const QRegularExpression spaceRe(uR"(\s+)"_s); static const QRegularExpression specialRe(uR"([= 0) { - if (int j = s.indexOf(backslashRe, i); j >= 0) { - ++j; // we found some char before the backslash that needs escaping - if (s.size() == j + 1 || s.at(j + 1) != qtmw_Backslash) - s.insert(j, qtmw_Backslash); - i = j + 3; - } - if (int j = s.indexOf(specialRe, i); j >= 0 && (j == 0 || s.at(j - 1) != u'\\')) { + if (int j = s.indexOf(specialRe, i); j >= 0) { s.insert(j, qtmw_Backslash); i = j + 3; } diff --git a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp index 0c889947a83..c0aa37286af 100644 --- a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp +++ b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp @@ -43,6 +43,8 @@ private slots: void pathological(); void fencedCodeBlocks_data(); void fencedCodeBlocks(); + void toRawText_data(); + void toRawText(); private: bool isMainFontFixed(); @@ -536,6 +538,10 @@ void tst_QTextMarkdownImporter::fencedCodeBlocks_data() << "```pseudocode\nprint('hello world\\n')\n```\n" << 1 << 0 << "pseudocode" << "`" << "```pseudocode\nprint('hello world\\n')\n```\n\n"; + QTest::newRow("backtick fence with punctuated language") + << "```html+js\nblah\n```\n" + << 1 << 0 << "html+js" << "`" + << "```html+js\nblah\n```\n\n"; QTest::newRow("tilde fence with language") << "~~~pseudocode\nprint('hello world\\n')\n~~~\n" << 1 << 0 << "pseudocode" << "~" @@ -595,5 +601,101 @@ void tst_QTextMarkdownImporter::fencedCodeBlocks() QCOMPARE(doc.toMarkdown(), rewrite); } +void tst_QTextMarkdownImporter::toRawText_data() +{ + QTest::addColumn("input"); + QTest::addColumn("expectedRawText"); + + // tests to verify that fixing QTBUG-122083 is safe + // https://spec.commonmark.org/0.31.2/#example-12 + QTest::newRow("punctuation backslash escapes") << + R"(\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~)" << + R"(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)"; + // https://spec.commonmark.org/0.31.2/#example-13 + QTest::newRow("literal backslashes") << + QString::fromUtf16(uR"(\→\A\a\ \3\φ\«)") << + QString::fromUtf16(uR"(\→\A\a\ \3\φ\«)"); + // https://spec.commonmark.org/0.31.2/#example-14 + QTest::newRow("escape to avoid em") << + R"(\*not emphasized*)" << + R"(*not emphasized*)"; + QTest::newRow("escape to avoid html") << + R"(\
not a tag)" << + R"(
not a tag)"; + QTest::newRow("escape to avoid link") << + R"(\[not a link](/foo))" << + R"([not a link](/foo))"; + QTest::newRow("escape to avoid mono") << + R"(\`not code`)" << + R"(`not code`)"; + QTest::newRow("escape to avoid num list") << + R"(1\. not a list)" << + R"(1. not a list)"; + QTest::newRow("escape to avoid list") << + R"(\* not a list)" << + R"(* not a list)"; + QTest::newRow("escape to avoid heading") << + R"(\# not a heading)" << + R"(# not a heading)"; + QTest::newRow("escape to avoid reflink") << + R"(\[foo]: /url "not a reference")" << + R"([foo]: /url "not a reference")"; + QTest::newRow("escape to avoid entity") << + R"(\ö not a character entity)" << + R"(ö not a character entity)"; + // https://spec.commonmark.org/0.31.2/#example-15 + QTest::newRow("escape backslash only") << + R"(\\*emphasis*)" << + R"(\emphasis)"; + // https://spec.commonmark.org/0.31.2/#example-16 + QTest::newRow("backslash line break") << + "foo\\\nbar" << + "foo\u2029bar"; + // https://spec.commonmark.org/0.31.2/#example-17 + QTest::newRow("backslash in mono span") << + R"(`` \[\` ``)" << + R"(\[\`)"; + // https://spec.commonmark.org/0.31.2/#example-18 + QTest::newRow("backslash in indented code") << + R"( \[\])" << + R"(\[\])"; + // https://spec.commonmark.org/0.31.2/#example-19 + QTest::newRow("backslash in fenced code") << + "~~~\n\\[\\]\n~~~" << + R"(\[\])"; + // https://spec.commonmark.org/0.31.2/#example-20 + QTest::newRow("backslash in autolink") << + R"()" << + R"(https://example.com?find=\*)"; + // https://spec.commonmark.org/0.31.2/#example-21 + QTest::newRow("backslash in autolink") << + "1011011 [1011100 backslash \\" << "|1011011 [|1011100 backslash \\\\|"; + // https://spec.commonmark.org/0.31.2/#example-12 + // escaping punctuation is ok, but QTextMarkdownWriter currently doesn't do that (which is also ok) + QTest::newRow("punctuation") << + R"(

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

)" << + R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)"; + // https://spec.commonmark.org/0.31.2/#example-14 + QTest::newRow("backslash asterisk no emphasis") << // QTBUG-122083 + R"(\*no emphasis*)" << + R"(\\\*no emphasis*)"; + // https://spec.commonmark.org/0.31.2/#example-15 + QTest::newRow("backslash before emphasis") << + R"(\emphasis)" << + R"(\\*emphasis*)"; + // https://spec.commonmark.org/0.31.2/#example-20 + QTest::newRow("backslash-asterisk in autolink") << + R"(

https://example.com?find=\*

)" << + R"()"; + // https://spec.commonmark.org/0.31.2/#example-24 + QTest::newRow("plus in fenced code lang") << + "
foo
" << + "```foo+bar\nfoo\n```"; } void tst_QTextMarkdownWriter::fromHtml() @@ -850,12 +873,90 @@ void tst_QTextMarkdownWriter::fromHtml() QCOMPARE(output, expectedOutput); } +void tst_QTextMarkdownWriter::fromPlainTextAndBack_data() +{ + QTest::addColumn("input"); + QTest::addColumn("expectedMarkdown"); + + // tests to verify that fixing QTBUG-122083 is safe + QTest::newRow("single backslashes") << + R"(\ again: \ not esc: \* \-\-\ \*abc*)" << + R"(\\ again: \\ not esc: \\* \\-\\-\\ \\\*abc*)"; + // https://spec.commonmark.org/0.31.2/#example-12 + QTest::newRow("punctuation") << + R"(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)" << + R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)"; + // https://spec.commonmark.org/0.31.2/#example-13 + QTest::newRow("literal backslashes") << + QString::fromUtf16(uR"(\→\A\a\ \3\φ\«)") << + "\\\\\u2192\\\\A\\\\a\\\\ \\\\3\\\\\u03C6\\\\\u00AB"; + // https://spec.commonmark.org/0.31.2/#example-14 + QTest::newRow("escape to avoid em") << + R"(*not emphasized*)" << + R"(\*not emphasized*)"; + QTest::newRow("escape to avoid html") << + R"(
not a tag)" << + R"(\
not a tag)"; + QTest::newRow("escape to avoid link") << + R"([not a link](/foo))" << + R"(\[not a link](/foo))"; + QTest::newRow("escape to avoid mono") << + R"(`not code`)" << + R"(\`not code`)"; + QTest::newRow("escape to avoid num list") << + R"(1. not a list)" << + R"(1\. not a list)"; + QTest::newRow("escape to avoid list") << + R"(* not a list)" << + R"(\* not a list)"; + QTest::newRow("escape to avoid heading") << + R"(# not a heading)" << + R"(\# not a heading)"; + QTest::newRow("escape to avoid reflink") << + R"([foo]: /url "not a reference")" << + R"(\[foo]: /url "not a reference")"; + QTest::newRow("escape to avoid entity") << + R"(ö not a character entity)" << + R"(\ö not a character entity)"; + // end of tests to verify that fixing QTBUG-122083 is safe + // (it's ok to add unrelated plain-to-markdown-to-plaintext cases later) +} + +void tst_QTextMarkdownWriter::fromPlainTextAndBack() +{ + QFETCH(QString, input); + QFETCH(QString, expectedMarkdown); + + document->setPlainText(input); + QString output = documentToUnixMarkdown(); + +#ifdef DEBUG_WRITE_OUTPUT + { + QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".md"); + out.open(QFile::WriteOnly); + out.write(output.toUtf8()); + out.close(); + } +#endif + + output = output.trimmed(); + expectedMarkdown = expectedMarkdown.trimmed(); + if (output != expectedMarkdown && (isMainFontFixed() || isFixedFontProportional())) + QSKIP("", "fixed main font or proportional fixed font (QTBUG-103484)"); + QCOMPARE(output, expectedMarkdown); + QCOMPARE(document->toPlainText(), input); + document->setMarkdown(output); + QCOMPARE(document->toPlainText(), input); + if (document->blockCount() == 1) + QCOMPARE(document->firstBlock().text(), input); +} + void tst_QTextMarkdownWriter::escapeSpecialCharacters_data() { QTest::addColumn("input"); QTest::addColumn("expectedOutput"); - QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\ baz \\\\"; + QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\\\\\ baz \\\\"; QTest::newRow("not emphasized") << "*normal* **normal too**" << "\\*normal* \\**normal too**"; QTest::newRow("not code") << "`normal` `normal too`" << "\\`normal` \\`normal too`"; QTest::newRow("code fence") << "```not a fence; ``` no risk here; ```not a fence" // TODO slightly inconsistent