From 81ba16cad933981f55218964a89ed1446022efba Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Mon, 24 Mar 2014 16:12:20 +0100 Subject: [PATCH] Fix case insensitive comparisons using QCollator In ICU the strength parameter decides whether a comparison is case sensitive or not. Fix mac comparison code. It can't have worked before. Added some basic automated testing for QCollator. Change-Id: I2646c464fd22ccd3a93c461fa3dba4bd1d4c7b4b Reviewed-by: Konstantin Ritt --- src/corelib/tools/qcollator_icu.cpp | 11 ++- src/corelib/tools/qcollator_macx.cpp | 9 +- .../corelib/tools/qcollator/tst_qcollator.cpp | 90 +++++++++++++++++++ 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/src/corelib/tools/qcollator_icu.cpp b/src/corelib/tools/qcollator_icu.cpp index 407a493d25d..23e88b50150 100644 --- a/src/corelib/tools/qcollator_icu.cpp +++ b/src/corelib/tools/qcollator_icu.cpp @@ -75,10 +75,17 @@ void QCollator::setCaseSensitivity(Qt::CaseSensitivity cs) { detach(); - UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_UPPER_FIRST : UCOL_OFF; + // The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY + // ignores differences between base characters and accented characters as well as case. + // So A and A-umlaut would compare equal. + // UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages + // and does case sensitive comparison. + // UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some + // additional differences in those languages. + UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY; UErrorCode status = U_ZERO_ERROR; - ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status); + ucol_setAttribute(d->collator, UCOL_STRENGTH, val, &status); if (U_FAILURE(status)) qWarning("ucol_setAttribute: Case First failed: %d", status); } diff --git a/src/corelib/tools/qcollator_macx.cpp b/src/corelib/tools/qcollator_macx.cpp index 8985cd4ebaf..877510489a9 100644 --- a/src/corelib/tools/qcollator_macx.cpp +++ b/src/corelib/tools/qcollator_macx.cpp @@ -128,12 +128,15 @@ bool QCollator::ignorePunctuation() const int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const { SInt32 result; - return UCCompareText(d->collator.collator, + Boolean equivalent; + UCCompareText(d->collator.collator, reinterpret_cast(s1), len1, reinterpret_cast(s2), len2, - NULL, + &equivalent, &result); - return result; + if (equivalent) + return 0; + return result < 0 ? -1 : 1; } int QCollator::compare(const QString &str1, const QString &str2) const { diff --git a/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp b/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp index 3df8422a34f..9ed27a87426 100644 --- a/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp +++ b/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp @@ -52,6 +52,9 @@ class tst_QCollator : public QObject private Q_SLOTS: void moveSemantics(); + + void compare_data(); + void compare(); }; #ifdef Q_COMPILER_RVALUE_REFS @@ -87,6 +90,93 @@ void tst_QCollator::moveSemantics() #endif } + +void tst_QCollator::compare_data() +{ + QTest::addColumn("locale"); + QTest::addColumn("s1"); + QTest::addColumn("s2"); + QTest::addColumn("result"); + QTest::addColumn("caseInsensitiveResult"); + + /* + A few tests below are commented out on the mac. It's unclear why they fail, + as it looks like the collator for the locale is created correctly. + */ + + /* + It's hard to test English, because it's treated differently + on different platforms. For example, on Linux, it uses the + iso14651_t1 template file, which happens to provide good + defaults for Swedish. Mac OS X seems to do a pure bytewise + comparison of Latin-1 values, although I'm not sure. So I + just test digits to make sure that it's not totally broken. + */ + QTest::newRow("english1") << QString("en_US") << QString("5") << QString("4") << 1 << 1; + QTest::newRow("english2") << QString("en_US") << QString("4") << QString("6") << -1 << -1; + QTest::newRow("english3") << QString("en_US") << QString("5") << QString("6") << -1 << -1; + QTest::newRow("english4") << QString("en_US") << QString("a") << QString("b") << -1 << -1; + /* + In Swedish, a with ring above (E5) comes before a with + diaresis (E4), which comes before o diaresis (F6), which + all come after z. + */ + QTest::newRow("swedish1") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xe4") << -1 << -1; + QTest::newRow("swedish2") << QString("sv_SE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1; + QTest::newRow("swedish3") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xf6") << -1 << -1; +#ifndef Q_OS_MAC + QTest::newRow("swedish4") << QString("sv_SE") << QString::fromLatin1("z") << QString::fromLatin1("\xe5") << -1 << -1; +#endif + + /* + In Norwegian, ae (E6) comes before o with stroke (D8), which + comes before a with ring above (E5). + */ + QTest::newRow("norwegian1") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xd8") << -1 << -1; +#ifndef Q_OS_MAC + QTest::newRow("norwegian2") << QString("no_NO") << QString::fromLatin1("\xd8") << QString::fromLatin1("\xe5") << -1 << -1; +#endif + QTest::newRow("norwegian3") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xe5") << -1 << -1; + + /* + In German, z comes *after* a with diaresis (E4), + which comes before o diaresis (F6). + */ + QTest::newRow("german1") << QString("de_DE") << QString::fromLatin1("a") << QString::fromLatin1("\xe4") << -1 << -1; + QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("b") << QString::fromLatin1("\xe4") << 1 << 1; + QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xe4") << 1 << 1; + QTest::newRow("german4") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1; + QTest::newRow("german5") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1 << 1; + QTest::newRow("german6") << QString("de_DE") << QString::fromLatin1("\xc0") << QString::fromLatin1("\xe0") << 1 << 0; + QTest::newRow("german7") << QString("de_DE") << QString::fromLatin1("\xd6") << QString::fromLatin1("\xf6") << 1 << 0; + QTest::newRow("german8") << QString("de_DE") << QString::fromLatin1("oe") << QString::fromLatin1("\xf6") << 1 << 1; + QTest::newRow("german9") << QString("de_DE") << QString("A") << QString("a") << 1 << 0; + + /* + French sorting of e and e with accent + */ + QTest::newRow("french1") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("e") << 1 << 1; + QTest::newRow("french2") << QString("fr_FR") << QString::fromLatin1("\xe9t") << QString::fromLatin1("et") << 1 << 1; + QTest::newRow("french3") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("d") << 1 << 1; + QTest::newRow("french4") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("f") << -1 << -1; + +} + + +void tst_QCollator::compare() +{ + QFETCH(QString, locale); + QFETCH(QString, s1); + QFETCH(QString, s2); + QFETCH(int, result); + QFETCH(int, caseInsensitiveResult); + + QCollator collator(locale); + QCOMPARE(collator.compare(s1, s2), result); + collator.setCaseSensitivity(Qt::CaseInsensitive); + QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult); +} + QTEST_APPLESS_MAIN(tst_QCollator) #include "tst_qcollator.moc"