Implement UTF-16 to UTF-8 case-insensitive compare and make public
Change-Id: Ied637aece2a7427b8a2dfffd16116cf3645c6359 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
parent
9422b5ebc3
commit
45838673df
@ -1168,6 +1168,7 @@ static int compareElementRecursive(const QCborContainerPrivate *c1, const Elemen
|
||||
if (!(e1.flags & Element::StringIsAscii) || !(e2.flags & Element::StringIsAscii)) {
|
||||
// Case 2: one of them is UTF-8 and the other is UTF-16, so lengths
|
||||
// are NOT comparable. We need to convert to UTF-16 first...
|
||||
// (we can't use QUtf8::compareUtf8 because we need to compare lengths)
|
||||
auto string = [](const Element &e, const ByteData *b) {
|
||||
return e.flags & Element::StringIsUtf16 ? b->asQStringRaw() : b->toUtf8String();
|
||||
};
|
||||
|
@ -1,7 +1,7 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2020 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Copyright (C) 2020 Intel Corporation.
|
||||
** Copyright (C) 2019 Mail.ru Group.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
@ -871,6 +871,35 @@ static int ucstricmp(const QChar *a, const QChar *ae, const char *b, const char
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Case-insensitive comparison between a Unicode string and a UTF-8 string
|
||||
static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
|
||||
{
|
||||
auto src1 = reinterpret_cast<const uchar *>(utf8);
|
||||
auto end1 = reinterpret_cast<const uchar *>(utf8end);
|
||||
QStringIterator src2(utf16, utf16end);
|
||||
|
||||
while (src1 < end1 && src2.hasNext()) {
|
||||
uint uc1;
|
||||
uint *output = &uc1;
|
||||
uchar b = *src1++;
|
||||
int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
|
||||
if (res < 0) {
|
||||
// decoding error
|
||||
uc1 = QChar::ReplacementCharacter;
|
||||
} else {
|
||||
uc1 = QChar::toCaseFolded(uc1);
|
||||
}
|
||||
|
||||
uint uc2 = QChar::toCaseFolded(src2.next());
|
||||
int diff = uc1 - uc2; // can't underflow
|
||||
if (diff)
|
||||
return diff;
|
||||
}
|
||||
|
||||
// the shorter string sorts first
|
||||
return (end1 > src1) - int(src2.hasNext());
|
||||
}
|
||||
|
||||
#if defined(__mips_dsp)
|
||||
// From qstring_mips_dsp_asm.S
|
||||
extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
|
||||
@ -1334,6 +1363,30 @@ int QtPrivate::compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
|
||||
return qt_compare_strings(lhs, rhs, cs);
|
||||
}
|
||||
|
||||
/*!
|
||||
\relates QStringView
|
||||
\internal
|
||||
\since 6.0
|
||||
\overload
|
||||
|
||||
Returns an integer that compares to 0 as \a lhs compares to \a rhs.
|
||||
|
||||
If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
|
||||
otherwise the comparison is case-insensitive.
|
||||
|
||||
Case-sensitive comparison is based exclusively on the numeric values of the
|
||||
decoded Unicode code points and is very fast, but is not what a human would
|
||||
expect. Consider sorting user-visible strings with
|
||||
QString::localeAwareCompare().
|
||||
*/
|
||||
int QtPrivate::compareStringsUtf8(const char *u8str, qsizetype u8len, QStringView rhs, Qt::CaseSensitivity cs) noexcept
|
||||
{
|
||||
if (cs == Qt::CaseSensitive)
|
||||
return QUtf8::compareUtf8(u8str, u8len, rhs.data(), rhs.size());
|
||||
else
|
||||
return ucstricmp8(u8str, u8str + u8len, rhs.begin(), rhs.end());
|
||||
}
|
||||
|
||||
#define REHASH(a) \
|
||||
if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \
|
||||
hashHaystack -= std::size_t(a) << sl_minus_1; \
|
||||
|
@ -62,6 +62,7 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringV
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStringsUtf8(const char *, qsizetype, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
|
||||
|
||||
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
|
||||
|
@ -713,7 +713,7 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len)
|
||||
return { true, isValidAscii };
|
||||
}
|
||||
|
||||
int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len)
|
||||
int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len) noexcept
|
||||
{
|
||||
uint uc1, uc2;
|
||||
auto src1 = reinterpret_cast<const uchar *>(utf8);
|
||||
|
@ -338,7 +338,7 @@ struct QUtf8
|
||||
bool isValidAscii;
|
||||
};
|
||||
static ValidUtf8Result isValidUtf8(const char *, qsizetype);
|
||||
static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype);
|
||||
static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype) noexcept;
|
||||
static int compareUtf8(const char *, qsizetype, QLatin1String s);
|
||||
};
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2016 The Qt Company Ltd.
|
||||
** Copyright (C) 2016 Intel Corporation.
|
||||
** Copyright (C) 2020 The Qt Company Ltd.
|
||||
** Copyright (C) 2020 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the test suite of the Qt Toolkit.
|
||||
@ -6181,6 +6181,23 @@ void tst_QString::compare_data()
|
||||
in2[i] = 'b';
|
||||
QTest::addRow("all-same-except-char-%d", i) << in1 << in2 << -1 << -1;
|
||||
}
|
||||
|
||||
// some non-US-ASCII comparisons
|
||||
QChar smallA = u'a';
|
||||
QChar smallAWithAcute = u'á';
|
||||
QChar capitalAWithAcute = u'Á';
|
||||
QChar nbsp = u'\u00a0';
|
||||
for (int i = 1; i <= 65; ++i) {
|
||||
QString padding(i - 1, ' ');
|
||||
QTest::addRow("ascii-nonascii-%d", i)
|
||||
<< (padding + smallA) << (padding + smallAWithAcute) << -1 << -1;
|
||||
QTest::addRow("nonascii-nonascii-equal-%d", i)
|
||||
<< (padding + smallAWithAcute) << (padding + smallAWithAcute) << 0 << 0;
|
||||
QTest::addRow("nonascii-nonascii-caseequal-%d", i)
|
||||
<< (padding + capitalAWithAcute) << (padding + smallAWithAcute) << -1 << 0;
|
||||
QTest::addRow("nonascii-nonascii-notequal-%d", i)
|
||||
<< (padding + nbsp) << (padding + smallAWithAcute) << -1 << -1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isLatin(const QString &s)
|
||||
@ -6200,7 +6217,10 @@ void tst_QString::compare()
|
||||
|
||||
QStringRef r1(&s1, 0, s1.length());
|
||||
QStringRef r2(&s2, 0, s2.length());
|
||||
QByteArray s1_8 = s1.toUtf8();
|
||||
QByteArray s2_8 = s2.toUtf8();
|
||||
|
||||
const QStringView v1(s1);
|
||||
const QStringView v2(s2);
|
||||
|
||||
QCOMPARE(sign(QString::compare(s1, s2)), csr);
|
||||
@ -6218,6 +6238,10 @@ void tst_QString::compare()
|
||||
QCOMPARE(sign(r1.compare(r2, Qt::CaseInsensitive)), cir);
|
||||
QCOMPARE(sign(s1.compare(v2, Qt::CaseSensitive)), csr);
|
||||
QCOMPARE(sign(s1.compare(v2, Qt::CaseInsensitive)), cir);
|
||||
QCOMPARE(sign(QtPrivate::compareStringsUtf8(s1_8, s1_8.size(), v2, Qt::CaseSensitive)), csr);
|
||||
QCOMPARE(sign(QtPrivate::compareStringsUtf8(s1_8, s1_8.size(), v2, Qt::CaseInsensitive)), cir);
|
||||
QCOMPARE(sign(QtPrivate::compareStringsUtf8(s2_8, s2_8.size(), v1, Qt::CaseSensitive)), -csr);
|
||||
QCOMPARE(sign(QtPrivate::compareStringsUtf8(s2_8, s2_8.size(), v1, Qt::CaseInsensitive)), -cir);
|
||||
|
||||
QCOMPARE(sign(QString::compare(s1, s2, Qt::CaseSensitive)), csr);
|
||||
QCOMPARE(sign(QString::compare(s1, s2, Qt::CaseInsensitive)), cir);
|
||||
@ -6238,6 +6262,7 @@ void tst_QString::compare()
|
||||
}
|
||||
|
||||
if (isLatin(s2)) {
|
||||
QVERIFY(QtPrivate::isLatin1(s2));
|
||||
QCOMPARE(sign(QString::compare(s1, QLatin1String(s2.toLatin1()))), csr);
|
||||
QCOMPARE(sign(QString::compare(s1, QLatin1String(s2.toLatin1()), Qt::CaseInsensitive)), cir);
|
||||
QCOMPARE(sign(QStringRef::compare(r1, QLatin1String(s2.toLatin1()))), csr);
|
||||
@ -6252,6 +6277,7 @@ void tst_QString::compare()
|
||||
}
|
||||
|
||||
if (isLatin(s1)) {
|
||||
QVERIFY(QtPrivate::isLatin1(s1));
|
||||
QCOMPARE(sign(QString::compare(QLatin1String(s1.toLatin1()), s2)), csr);
|
||||
QCOMPARE(sign(QString::compare(QLatin1String(s1.toLatin1()), s2, Qt::CaseInsensitive)), cir);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user