Fix case insensitive comparisons using QCollator
In ICU the strength parameter decides whether a comparison is case sensitive or not. Fix mac comparison code. It can't have worked before. Added some basic automated testing for QCollator. Change-Id: I2646c464fd22ccd3a93c461fa3dba4bd1d4c7b4b Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
parent
dbe6db192a
commit
81ba16cad9
@ -75,10 +75,17 @@ void QCollator::setCaseSensitivity(Qt::CaseSensitivity cs)
|
|||||||
{
|
{
|
||||||
detach();
|
detach();
|
||||||
|
|
||||||
UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_UPPER_FIRST : UCOL_OFF;
|
// The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY
|
||||||
|
// ignores differences between base characters and accented characters as well as case.
|
||||||
|
// So A and A-umlaut would compare equal.
|
||||||
|
// UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages
|
||||||
|
// and does case sensitive comparison.
|
||||||
|
// UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some
|
||||||
|
// additional differences in those languages.
|
||||||
|
UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY;
|
||||||
|
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status);
|
ucol_setAttribute(d->collator, UCOL_STRENGTH, val, &status);
|
||||||
if (U_FAILURE(status))
|
if (U_FAILURE(status))
|
||||||
qWarning("ucol_setAttribute: Case First failed: %d", status);
|
qWarning("ucol_setAttribute: Case First failed: %d", status);
|
||||||
}
|
}
|
||||||
|
@ -128,12 +128,15 @@ bool QCollator::ignorePunctuation() const
|
|||||||
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
|
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
|
||||||
{
|
{
|
||||||
SInt32 result;
|
SInt32 result;
|
||||||
return UCCompareText(d->collator.collator,
|
Boolean equivalent;
|
||||||
|
UCCompareText(d->collator.collator,
|
||||||
reinterpret_cast<const UniChar *>(s1), len1,
|
reinterpret_cast<const UniChar *>(s1), len1,
|
||||||
reinterpret_cast<const UniChar *>(s2), len2,
|
reinterpret_cast<const UniChar *>(s2), len2,
|
||||||
NULL,
|
&equivalent,
|
||||||
&result);
|
&result);
|
||||||
return result;
|
if (equivalent)
|
||||||
|
return 0;
|
||||||
|
return result < 0 ? -1 : 1;
|
||||||
}
|
}
|
||||||
int QCollator::compare(const QString &str1, const QString &str2) const
|
int QCollator::compare(const QString &str1, const QString &str2) const
|
||||||
{
|
{
|
||||||
|
@ -52,6 +52,9 @@ class tst_QCollator : public QObject
|
|||||||
|
|
||||||
private Q_SLOTS:
|
private Q_SLOTS:
|
||||||
void moveSemantics();
|
void moveSemantics();
|
||||||
|
|
||||||
|
void compare_data();
|
||||||
|
void compare();
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef Q_COMPILER_RVALUE_REFS
|
#ifdef Q_COMPILER_RVALUE_REFS
|
||||||
@ -87,6 +90,93 @@ void tst_QCollator::moveSemantics()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void tst_QCollator::compare_data()
|
||||||
|
{
|
||||||
|
QTest::addColumn<QString>("locale");
|
||||||
|
QTest::addColumn<QString>("s1");
|
||||||
|
QTest::addColumn<QString>("s2");
|
||||||
|
QTest::addColumn<int>("result");
|
||||||
|
QTest::addColumn<int>("caseInsensitiveResult");
|
||||||
|
|
||||||
|
/*
|
||||||
|
A few tests below are commented out on the mac. It's unclear why they fail,
|
||||||
|
as it looks like the collator for the locale is created correctly.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
It's hard to test English, because it's treated differently
|
||||||
|
on different platforms. For example, on Linux, it uses the
|
||||||
|
iso14651_t1 template file, which happens to provide good
|
||||||
|
defaults for Swedish. Mac OS X seems to do a pure bytewise
|
||||||
|
comparison of Latin-1 values, although I'm not sure. So I
|
||||||
|
just test digits to make sure that it's not totally broken.
|
||||||
|
*/
|
||||||
|
QTest::newRow("english1") << QString("en_US") << QString("5") << QString("4") << 1 << 1;
|
||||||
|
QTest::newRow("english2") << QString("en_US") << QString("4") << QString("6") << -1 << -1;
|
||||||
|
QTest::newRow("english3") << QString("en_US") << QString("5") << QString("6") << -1 << -1;
|
||||||
|
QTest::newRow("english4") << QString("en_US") << QString("a") << QString("b") << -1 << -1;
|
||||||
|
/*
|
||||||
|
In Swedish, a with ring above (E5) comes before a with
|
||||||
|
diaresis (E4), which comes before o diaresis (F6), which
|
||||||
|
all come after z.
|
||||||
|
*/
|
||||||
|
QTest::newRow("swedish1") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xe4") << -1 << -1;
|
||||||
|
QTest::newRow("swedish2") << QString("sv_SE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
|
||||||
|
QTest::newRow("swedish3") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xf6") << -1 << -1;
|
||||||
|
#ifndef Q_OS_MAC
|
||||||
|
QTest::newRow("swedish4") << QString("sv_SE") << QString::fromLatin1("z") << QString::fromLatin1("\xe5") << -1 << -1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
In Norwegian, ae (E6) comes before o with stroke (D8), which
|
||||||
|
comes before a with ring above (E5).
|
||||||
|
*/
|
||||||
|
QTest::newRow("norwegian1") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xd8") << -1 << -1;
|
||||||
|
#ifndef Q_OS_MAC
|
||||||
|
QTest::newRow("norwegian2") << QString("no_NO") << QString::fromLatin1("\xd8") << QString::fromLatin1("\xe5") << -1 << -1;
|
||||||
|
#endif
|
||||||
|
QTest::newRow("norwegian3") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xe5") << -1 << -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
In German, z comes *after* a with diaresis (E4),
|
||||||
|
which comes before o diaresis (F6).
|
||||||
|
*/
|
||||||
|
QTest::newRow("german1") << QString("de_DE") << QString::fromLatin1("a") << QString::fromLatin1("\xe4") << -1 << -1;
|
||||||
|
QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("b") << QString::fromLatin1("\xe4") << 1 << 1;
|
||||||
|
QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xe4") << 1 << 1;
|
||||||
|
QTest::newRow("german4") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
|
||||||
|
QTest::newRow("german5") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1 << 1;
|
||||||
|
QTest::newRow("german6") << QString("de_DE") << QString::fromLatin1("\xc0") << QString::fromLatin1("\xe0") << 1 << 0;
|
||||||
|
QTest::newRow("german7") << QString("de_DE") << QString::fromLatin1("\xd6") << QString::fromLatin1("\xf6") << 1 << 0;
|
||||||
|
QTest::newRow("german8") << QString("de_DE") << QString::fromLatin1("oe") << QString::fromLatin1("\xf6") << 1 << 1;
|
||||||
|
QTest::newRow("german9") << QString("de_DE") << QString("A") << QString("a") << 1 << 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
French sorting of e and e with accent
|
||||||
|
*/
|
||||||
|
QTest::newRow("french1") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("e") << 1 << 1;
|
||||||
|
QTest::newRow("french2") << QString("fr_FR") << QString::fromLatin1("\xe9t") << QString::fromLatin1("et") << 1 << 1;
|
||||||
|
QTest::newRow("french3") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("d") << 1 << 1;
|
||||||
|
QTest::newRow("french4") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("f") << -1 << -1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void tst_QCollator::compare()
|
||||||
|
{
|
||||||
|
QFETCH(QString, locale);
|
||||||
|
QFETCH(QString, s1);
|
||||||
|
QFETCH(QString, s2);
|
||||||
|
QFETCH(int, result);
|
||||||
|
QFETCH(int, caseInsensitiveResult);
|
||||||
|
|
||||||
|
QCollator collator(locale);
|
||||||
|
QCOMPARE(collator.compare(s1, s2), result);
|
||||||
|
collator.setCaseSensitivity(Qt::CaseInsensitive);
|
||||||
|
QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult);
|
||||||
|
}
|
||||||
|
|
||||||
QTEST_APPLESS_MAIN(tst_QCollator)
|
QTEST_APPLESS_MAIN(tst_QCollator)
|
||||||
|
|
||||||
#include "tst_qcollator.moc"
|
#include "tst_qcollator.moc"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user