Update QString::isRightToLeft() to take directional hints into account

Make sure we properly handle both directional embeddings as well as
directional isolates to determine the direction of the string.

According to the latest version of the Unicode bidi algorithm,
parts of the string contained inside an directional isolate is
to be ignored when determining the paragraph direction. Embedding
markers themselves are to be ignored as well, but not the characters
inside an explicit directional embedding or override.

This is also some required pre-work to get our BiDi algorithm
updated to the latest version of the standard.

Move the implementation to QStringView and implement the methods
in QString and QStringRef through that implementation.

Task-number: QTBUG-57743
Change-Id: I7f24e09198e22d6359c6534c9ae40a904e94c46e
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Reviewed-by: Eskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@qt.io>
This commit is contained in:
Lars Knoll 2017-12-11 13:17:03 +01:00
parent f28ef0eca9
commit 2b2b9c9962
5 changed files with 63 additions and 3 deletions

View File

@ -8506,7 +8506,7 @@ bool QString::isSimpleText() const
*/
bool QString::isRightToLeft() const
{
return QStringRef(this).isRightToLeft();
return QtPrivate::isRightToLeft(QStringView(*this));
}
/*! \fn QChar *QString::data()
@ -10778,8 +10778,23 @@ int QStringRef::count(const QStringRef &str, Qt::CaseSensitivity cs) const
*/
bool QStringRef::isRightToLeft() const
{
const ushort *p = reinterpret_cast<const ushort*>(unicode());
const ushort * const end = p + size();
return QtPrivate::isRightToLeft(QStringView(unicode(), size()));
}
/*!
\since 5.11
\internal
\relates QStringView
Returns \c true if the string is read right to left.
\sa QString::isRightToLeft()
*/
bool QtPrivate::isRightToLeft(QStringView string)
{
const ushort *p = reinterpret_cast<const ushort*>(string.data());
const ushort * const end = p + string.size();
int isolateLevel = 0;
while (p < end) {
uint ucs4 = *p;
if (QChar::isHighSurrogate(ucs4) && p < end - 1) {
@ -10791,10 +10806,23 @@ bool QStringRef::isRightToLeft() const
}
switch (QChar::direction(ucs4))
{
case QChar::DirRLI:
case QChar::DirLRI:
case QChar::DirFSI:
++isolateLevel;
break;
case QChar::DirPDI:
if (isolateLevel)
--isolateLevel;
break;
case QChar::DirL:
if (isolateLevel)
break;
return false;
case QChar::DirR:
case QChar::DirAL:
if (isolateLevel)
break;
return true;
default:
break;

View File

@ -80,6 +80,7 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLatin1(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToUtf8(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLocal8Bit(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT QVector<uint> convertToUcs4(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT bool isRightToLeft(QStringView string);
} // namespace QtPRivate

View File

@ -772,4 +772,13 @@ QT_BEGIN_NAMESPACE
\sa QString::isNull(), QStringRef::isNull(), QStringView
*/
/*!
\fn bool QStringView::isRightToLeft();
\since 5.11
Returns \c true if the string is read right to left.
\sa QString::isRightToLeft()
*/
QT_END_NAMESPACE

View File

@ -266,6 +266,9 @@ public:
Q_REQUIRED_RESULT bool endsWith(QChar c, Qt::CaseSensitivity cs) const Q_DECL_NOTHROW
{ return QtPrivate::endsWith(*this, QStringView(&c, 1), cs); }
Q_REQUIRED_RESULT bool isRightToLeft() const Q_DECL_NOTHROW
{ return QtPrivate::isRightToLeft(*this); }
//
// STL compatibility API:
//

View File

@ -6901,6 +6901,25 @@ void tst_QString::isRightToLeft_data()
static const ushort unicode3[] = { QChar::highSurrogate(0x10800u), QChar::lowSurrogate(0x10800u), QChar::highSurrogate(0x10805u), QChar::lowSurrogate(0x10805u) };
QTest::newRow("surrogates-cypriot") << QString::fromUtf16(unicode3, 4) << true;
QTest::newRow("lre") << (QString("12345") + QChar(0x202a) + QString("9") + QChar(0x202c)) << false;
QTest::newRow("rle") << (QString("12345") + QChar(0x202b) + QString("9") + QChar(0x202c)) << false;
QTest::newRow("r in lre") << (QString("12345") + QChar(0x202a) + QString::fromUtf16(unicode1, 2) + QChar(0x202c) + QString("a")) << true;
QTest::newRow("l in lre") << (QString("12345") + QChar(0x202a) + QString("a") + QChar(0x202c) + QString::fromUtf16(unicode1, 2)) << false;
QTest::newRow("r in rle") << (QString("12345") + QChar(0x202b) + QString::fromUtf16(unicode1, 2) + QChar(0x202c) + QString("a")) << true;
QTest::newRow("l in rle") << (QString("12345") + QChar(0x202b) + QString("a") + QChar(0x202c) + QString::fromUtf16(unicode1, 2)) << false;
QTest::newRow("lro") << (QString("12345") + QChar(0x202d) + QString("9") + QChar(0x202c)) << false;
QTest::newRow("rlo") << (QString("12345") + QChar(0x202e) + QString("9") + QChar(0x202c)) << false;
QTest::newRow("r in lro") << (QString("12345") + QChar(0x202d) + QString::fromUtf16(unicode1, 2) + QChar(0x202c) + QString("a")) << true;
QTest::newRow("l in lro") << (QString("12345") + QChar(0x202d) + QString("a") + QChar(0x202c) + QString::fromUtf16(unicode1, 2)) << false;
QTest::newRow("r in rlo") << (QString("12345") + QChar(0x202e) + QString::fromUtf16(unicode1, 2) + QChar(0x202c) + QString("a")) << true;
QTest::newRow("l in rlo") << (QString("12345") + QChar(0x202e) + QString("a") + QChar(0x202c) + QString::fromUtf16(unicode1, 2)) << false;
QTest::newRow("lri") << (QString("12345") + QChar(0x2066) + QString("a") + QChar(0x2069) + QString::fromUtf16(unicode1, 2)) << true;
QTest::newRow("rli") << (QString("12345") + QChar(0x2067) + QString::fromUtf16(unicode1, 2) + QChar(0x2069) + QString("a")) << false;
QTest::newRow("fsi1") << (QString("12345") + QChar(0x2068) + QString("a") + QChar(0x2069) + QString::fromUtf16(unicode1, 2)) << true;
QTest::newRow("fsi2") << (QString("12345") + QChar(0x2068) + QString::fromUtf16(unicode1, 2) + QChar(0x2069) + QString("a")) << false;
}
void tst_QString::isRightToLeft()