Limit QByteArray's 8-bit support to ASCII

Previously it handled Latin-1, which made it incompatible with UTF-8,
which is now our preferred 8-bit encoding. For Qt6 it is limited to
ASCII. Adjusted tests to match. QLatin1String::compare() turned out
to be relying on qstrnicmp()'s Latin-1 handling.

Removed some spurious Q_UNLIKELY()s and tidied up code a little in the
process.

[ChangeLog][QtCore][Important Behavior Changes] Encoding-dependent
features of QByteArrray are now limited to ASCII, where previously
they worked for the whole of Latin-1. This affects case-insensitive
comparison, notably including qstricmp() and qstrnicmp(), and
case-transforming functions.

Fixes: QTBUG-84323
Change-Id: I2925d9908f8654599195a2860847b17083911b41
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
This commit is contained in:
Edward Welbourne 2020-05-29 13:12:28 +02:00
parent 135204bdf6
commit 9dd8e655cd
4 changed files with 164 additions and 182 deletions

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2019 The Qt Company Ltd.
** Copyright (C) 2020 The Qt Company Ltd.
** Copyright (C) 2016 Intel Corporation.
** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
** Contact: https://www.qt.io/licensing/
@ -69,64 +69,16 @@
QT_BEGIN_NAMESPACE
// Latin 1 case system, used by QByteArray::to{Upper,Lower}() and qstr(n)icmp():
/*
#!/usr/bin/perl -l
use feature "unicode_strings";
for (0..255) {
$up = uc(chr($_));
$up = chr($_) if ord($up) > 0x100 || length $up > 1;
printf "0x%02x,", ord($up);
print "" if ($_ & 0xf) == 0xf;
// ASCII case system, used by QByteArray::to{Upper,Lower}() and qstr(n)icmp():
static constexpr inline uchar asciiUpper(uchar c)
{
return c >= 'a' && c <= 'z' ? c & ~0x20 : c;
}
*/
static const uchar latin1_uppercased[256] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff
};
/*
#!/usr/bin/perl -l
use feature "unicode_strings";
for (0..255) {
$up = lc(chr($_));
$up = chr($_) if ord($up) > 0x100 || length $up > 1;
printf "0x%02x,", ord($up);
print "" if ($_ & 0xf) == 0xf;
static constexpr inline uchar asciiLower(uchar c)
{
return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
}
*/
static const uchar latin1_lowercased[256] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};
int qFindByteArray(
const char *haystack0, int haystackLen, int from,
@ -293,8 +245,8 @@ int qstrcmp(const char *str1, const char *str2)
A safe \c stricmp() function.
Compares \a str1 and \a str2 ignoring the case of the
characters. The encoding of the strings is assumed to be Latin-1.
Compares \a str1 and \a str2, ignoring differences in the case of any ASCII
characters.
Returns a negative value if \a str1 is less than \a str2, 0 if \a
str1 is equal to \a str2 or a positive value if \a str1 is greater
@ -323,11 +275,10 @@ int qstricmp(const char *str1, const char *str2)
auto innerCompare = [=, &offset](qptrdiff max, bool unlimited) {
max += offset;
do {
uchar c = latin1_lowercased[s1[offset]];
int res = c - latin1_lowercased[s2[offset]];
if (Q_UNLIKELY(res))
uchar c = s1[offset];
if (int res = asciiLower(c) - asciiLower(s2[offset]))
return res;
if (Q_UNLIKELY(!c))
if (!c)
return 0;
++offset;
} while (unlimited || offset < max);
@ -385,9 +336,8 @@ int qstricmp(const char *str1, const char *str2)
A safe \c strnicmp() function.
Compares at most \a len bytes of \a str1 and \a str2 ignoring the
case of the characters. The encoding of the strings is assumed to
be Latin-1.
Compares at most \a len bytes of \a str1 and \a str2, ignoring differences
in the case of any ASCII characters.
Returns a negative value if \a str1 is less than \a str2, 0 if \a str1
is equal to \a str2 or a positive value if \a str1 is greater than \a
@ -406,12 +356,11 @@ int qstrnicmp(const char *str1, const char *str2, uint len)
{
const uchar *s1 = reinterpret_cast<const uchar *>(str1);
const uchar *s2 = reinterpret_cast<const uchar *>(str2);
int res;
uchar c;
if (!s1 || !s2)
return s1 ? 1 : (s2 ? -1 : 0);
for (; len--; s1++, s2++) {
if ((res = (c = latin1_lowercased[*s1]) - latin1_lowercased[*s2]))
for (; len--; ++s1, ++s2) {
const uchar c = *s1;
if (int res = asciiLower(c) - asciiLower(*s2))
return res;
if (!c) // strings are equal
break;
@ -437,28 +386,23 @@ int qstrnicmp(const char *str1, qsizetype len1, const char *str2, qsizetype len2
if (!s2)
return len1 == 0 ? 0 : 1;
int res;
uchar c;
if (len2 == -1) {
// null-terminated str2
qsizetype i;
for (i = 0; i < len1; ++i) {
c = latin1_lowercased[s2[i]];
const uchar c = s2[i];
if (!c)
return 1;
res = latin1_lowercased[s1[i]] - c;
if (res)
if (int res = asciiLower(s1[i]) - asciiLower(c))
return res;
}
c = latin1_lowercased[s2[i]];
return c ? -1 : 0;
return s2[i] ? -1 : 0;
} else {
// not null-terminated
for (qsizetype i = 0; i < qMin(len1, len2); ++i) {
c = latin1_lowercased[s2[i]];
res = latin1_lowercased[s1[i]] - c;
if (res)
const qsizetype len = qMin(len1, len2);
for (qsizetype i = 0; i < len; ++i) {
if (int res = asciiLower(s1[i]) - asciiLower(s2[i]))
return res;
}
if (len1 == len2)
@ -786,14 +730,14 @@ QByteArray qUncompress(const uchar* data, int nbytes)
terminator, and uses \l{implicit sharing} (copy-on-write) to
reduce memory usage and avoid needless copying of data.
In addition to QByteArray, Qt also provides the QString class to
store string data. For most purposes, QString is the class you
want to use. It stores 16-bit Unicode characters, making it easy
to store non-ASCII/non-Latin-1 characters in your application.
Furthermore, QString is used throughout in the Qt API. The two
main cases where QByteArray is appropriate are when you need to
store raw binary data, and when memory conservation is critical
(e.g., with Qt for Embedded Linux).
In addition to QByteArray, Qt also provides the QString class to store
string data. For most purposes, QString is the class you want to use. It
understands its content as Unicode text (encoded using UTF-16) where
QByteArray aims to avoid assumptions about the encoding or semantics of the
bytes it stores (aside from a few legacy cases where it uses ASCII).
Furthermore, QString is used throughout in the Qt API. The two main cases
where QByteArray is appropriate are when you need to store raw binary data,
and when memory conservation is critical (e.g., with Qt for Embedded Linux).
One way to initialize a QByteArray is simply to pass a \c{const
char *} to its constructor. For example, the following code
@ -868,13 +812,6 @@ QByteArray qUncompress(const uchar* data, int nbytes)
memory QByteArray actually allocated. Data appended to an empty
array is not copied.
A frequent requirement is to remove whitespace characters from a
byte array ('\\n', '\\t', ' ', etc.). If you want to remove
whitespace from both ends of a QByteArray, use trimmed(). If you
want to remove whitespace from both ends and replace multiple
consecutive whitespaces with a single space character within the
byte array, use simplified().
If you want to find all occurrences of a particular character or
substring in a QByteArray, use indexOf() or lastIndexOf(). The
former searches forward starting from a given index position, the
@ -932,29 +869,40 @@ QByteArray qUncompress(const uchar* data, int nbytes)
Such considerations, the configuration of such behavior or any mitigation
are outside the scope of the QByteArray API.
\section1 Notes on Locale
\section1 C locale and ASCII functions
QByteArray generally handles data as bytes, without presuming any semantics;
where it does presume semantics, it uses the C locale and ASCII encoding.
Standard Unicode encodings are supported by QString, other encodings may be
supported using QStringEncoder and QStringDecoder to convert to Unicode. For
locale-specific interpretation of text, use QLocale or QString.
\section2 Spacing Characters
A frequent requirement is to remove spacing characters from a byte array
('\\n', '\\t', ' ', etc.). If you want to remove spacing from both ends of a
QByteArray, use trimmed(). If you want to remove spacing from both ends and
replace each run of spacing characters with a single space character within
the byte array, use simplified(). Only ASCII spacing characters are
recognized for these purposes.
\section2 Number-String Conversions
Functions that perform conversions between numeric data types and
strings are performed in the C locale, irrespective of the user's
locale settings. Use QString to perform locale-aware conversions
between numbers and strings.
Functions that perform conversions between numeric data types and strings
are performed in the C locale, regardless of the user's locale settings. Use
QLocale to perform locale-aware conversions between numbers and strings.
\section2 8-bit Character Comparisons
\section2 Character Case
In QByteArray, the notion of uppercase and lowercase and of which
character is greater than or less than another character is done
in the Latin-1 locale. This affects functions that support a case
insensitive option or that compare or lowercase or uppercase
their arguments. Case insensitive operations and comparisons will
be accurate if both strings contain only Latin-1 characters.
Functions that this affects include contains(), indexOf(),
lastIndexOf(), operator<(), operator<=(), operator>(),
operator>=(), isLower(), isUpper(), toLower() and toUpper().
In QByteArray, the notion of uppercase and lowercase and of case-independent
comparison is limited to ASCII. Non-ASCII characters are treated as
caseless, since their case depends on encoding. This affects functions that
support a case insensitive option or that change the case of their
arguments. Functions that this affects include contains(), indexOf(),
lastIndexOf(), isLower(), isUpper(), toLower() and toUpper().
This issue does not apply to \l{QString}s since they represent
characters using Unicode.
This issue does not apply to \l{QString}s since they represent characters
using Unicode.
\sa QString, QBitArray
*/
@ -2899,22 +2847,16 @@ bool QByteArray::endsWith(const char *str) const
}
/*
Returns true if \a c is an uppercase Latin1 letter.
\note The multiplication sign 0xD7 and the sz ligature 0xDF are not
treated as uppercase Latin1.
Returns true if \a c is an uppercase ASCII letter.
*/
static inline bool isUpperCaseLatin1(char c)
static constexpr inline bool isUpperCaseAscii(char c)
{
if (c >= 'A' && c <= 'Z')
return true;
return (uchar(c) >= 0xC0 && uchar(c) <= 0xDE && uchar(c) != 0xD7);
return c >= 'A' && c <= 'Z';
}
/*!
Returns \c true if this byte array contains only uppercase letters,
otherwise returns \c false. The byte array is interpreted as a Latin-1
encoded string.
Returns \c true if this byte array contains only ASCII uppercase letters,
otherwise returns \c false.
\since 5.12
\sa isLower(), toUpper()
@ -2927,7 +2869,7 @@ bool QByteArray::isUpper() const
const char *d = data();
for (int i = 0, max = size(); i < max; ++i) {
if (!isUpperCaseLatin1(d[i]))
if (!isUpperCaseAscii(d[i]))
return false;
}
@ -2935,22 +2877,16 @@ bool QByteArray::isUpper() const
}
/*
Returns true if \a c is an lowercase Latin1 letter.
\note The division sign 0xF7 is not treated as lowercase Latin1,
but the small y dieresis 0xFF is.
Returns true if \a c is an lowercase ASCII letter.
*/
static inline bool isLowerCaseLatin1(char c)
static constexpr inline bool isLowerCaseAscii(char c)
{
if (c >= 'a' && c <= 'z')
return true;
return (uchar(c) >= 0xD0 && uchar(c) != 0xF7);
return c >= 'a' && c <= 'z';
}
/*!
Returns \c true if this byte array contains only lowercase letters,
otherwise returns \c false. The byte array is interpreted as a Latin-1
encoded string.
Returns \c true if this byte array contains only lowercase ASCII letters,
otherwise returns \c false.
\since 5.12
\sa isUpper(), toLower()
@ -2963,7 +2899,7 @@ bool QByteArray::isLower() const
const char *d = data();
for (int i = 0, max = size(); i < max; ++i) {
if (!isLowerCaseLatin1(d[i]))
if (!isLowerCaseAscii(d[i]))
return false;
}
@ -3076,8 +3012,8 @@ QByteArray QByteArray::mid(int pos, int len) const
/*!
\fn QByteArray QByteArray::toLower() const
Returns a lowercase copy of the byte array. The bytearray is
interpreted as a Latin-1 encoded string.
Returns a copy of the byte array in which each ASCII uppercase letter
converted to lowercase.
Example:
\snippet code/src_corelib_text_qbytearray.cpp 30
@ -3090,7 +3026,7 @@ QByteArray QByteArray::mid(int pos, int len) const
// (even with constant propagation, there's no gain in performance).
template <typename T>
Q_NEVER_INLINE
static QByteArray toCase_template(T &input, const uchar * table)
static QByteArray toCase_template(T &input, uchar (*lookup)(uchar))
{
// find the first bad character in input
const char *orig_begin = input.constBegin();
@ -3098,7 +3034,7 @@ static QByteArray toCase_template(T &input, const uchar * table)
const char *e = input.constEnd();
for ( ; firstBad != e ; ++firstBad) {
uchar ch = uchar(*firstBad);
uchar converted = table[ch];
uchar converted = lookup(ch);
if (ch != converted)
break;
}
@ -3111,27 +3047,26 @@ static QByteArray toCase_template(T &input, const uchar * table)
char *b = s.begin(); // will detach if necessary
char *p = b + (firstBad - orig_begin);
e = b + s.size();
for ( ; p != e; ++p) {
*p = char(uchar(table[uchar(*p)]));
}
for ( ; p != e; ++p)
*p = char(lookup(uchar(*p)));
return s;
}
QByteArray QByteArray::toLower_helper(const QByteArray &a)
{
return toCase_template(a, latin1_lowercased);
return toCase_template(a, asciiLower);
}
QByteArray QByteArray::toLower_helper(QByteArray &a)
{
return toCase_template(a, latin1_lowercased);
return toCase_template(a, asciiLower);
}
/*!
\fn QByteArray QByteArray::toUpper() const
Returns an uppercase copy of the byte array. The bytearray is
interpreted as a Latin-1 encoded string.
Returns a copy of the byte array in which each ASCII lowercase letter
converted to uppercase.
Example:
\snippet code/src_corelib_text_qbytearray.cpp 31
@ -3141,12 +3076,12 @@ QByteArray QByteArray::toLower_helper(QByteArray &a)
QByteArray QByteArray::toUpper_helper(const QByteArray &a)
{
return toCase_template(a, latin1_uppercased);
return toCase_template(a, asciiUpper);
}
QByteArray QByteArray::toUpper_helper(QByteArray &a)
{
return toCase_template(a, latin1_uppercased);
return toCase_template(a, asciiUpper);
}
/*! \fn void QByteArray::clear()
@ -4226,7 +4161,7 @@ QByteArray &QByteArray::setNum(double n, char f, int prec)
QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
uint flags = QLocaleData::ZeroPadExponent;
char lower = latin1_lowercased[uchar(f)];
char lower = asciiLower(uchar(f));
if (f != lower)
flags |= QLocaleData::CapitalEorX;
f = lower;
@ -4248,7 +4183,7 @@ QByteArray &QByteArray::setNum(double n, char f, int prec)
break;
}
*this = QLocaleData::c()->doubleToString(n, prec, form, -1, flags).toLatin1();
*this = QLocaleData::c()->doubleToString(n, prec, form, -1, flags).toUtf8();
return *this;
}

View File

@ -1192,6 +1192,46 @@ static int ucstrcmp(const QChar *a, size_t alen, const char *b, size_t blen)
return cmp ? cmp : lencmp(alen, blen);
}
static int latin1nicmp(const char *lhsChar, int lSize, const char *rhsChar, int rSize)
{
constexpr uchar latin1Lower[256] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
// 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};
// We're called with QLatin1String's .data() and .size():
Q_ASSERT(lSize >= 0 && rSize >= 0);
if (!lSize)
return rSize ? -1 : 0;
if (!rSize)
return 1;
const int size = std::min(lSize, rSize);
const uchar *lhs = reinterpret_cast<const uchar *>(lhsChar);
const uchar *rhs = reinterpret_cast<const uchar *>(rhsChar);
Q_ASSERT(lhs && rhs); // since both lSize and rSize are positive
for (int i = 0; i < size; i++) {
Q_ASSERT(lhs[i] && rhs[i]);
if (int res = latin1Lower[lhs[i]] - latin1Lower[rhs[i]])
return res;
}
return lencmp(lSize, rSize);
}
static int qt_compare_strings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
{
if (cs == Qt::CaseSensitive)
@ -1218,7 +1258,7 @@ static int qt_compare_strings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
if (lhs.isEmpty())
return lencmp(0, rhs.size());
if (cs == Qt::CaseInsensitive)
return qstrnicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
const auto l = std::min(lhs.size(), rhs.size());
int r = qstrncmp(lhs.data(), rhs.data(), l);
return r ? r : lencmp(lhs.size(), rhs.size());

View File

@ -176,9 +176,9 @@ QByteArray verifyZeroTermination(const QByteArray &ba)
int baSize = ba.size();
char baTerminator = ba.constData()[baSize];
if ('\0' != baTerminator)
return QString::fromLatin1(
"*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromLatin1(ba))
.arg(baTerminator, 2, 16, QChar('0')).toLatin1();
return QString::fromUtf8(
"*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromUtf8(ba))
.arg(baTerminator, 2, 16, QChar('0')).toUtf8();
// Skip mutating checks on shared strings
if (baDataPtr->isShared())
@ -934,30 +934,30 @@ void tst_QByteArray::qstricmp()
QFETCH(QString, str1);
QFETCH(QString, str2);
int expected = strcmp(str1.toUpper().toLatin1(),
str2.toUpper().toLatin1());
int expected = strcmp(str1.toUpper().toUtf8(),
str2.toUpper().toUtf8());
if ( expected != 0 ) {
expected = (expected < 0 ? -1 : 1);
}
int actual = ::qstricmp(str1.toLatin1(), str2.toLatin1());
int actual = ::qstricmp(str1.toUtf8(), str2.toUtf8());
if ( actual != 0 ) {
actual = (actual < 0 ? -1 : 1);
}
QCOMPARE(actual, expected);
actual = ::qstricmp("012345679abcd" + str1.toLatin1(), "012345679AbCd" + str2.toLatin1());
actual = ::qstricmp("012345679abcd" + str1.toUtf8(), "012345679AbCd" + str2.toUtf8());
if ( actual != 0 ) {
actual = (actual < 0 ? -1 : 1);
}
QCOMPARE(actual, expected);
actual = str1.toLatin1().compare(str2.toLatin1(), Qt::CaseInsensitive);
actual = str1.toUtf8().compare(str2.toUtf8(), Qt::CaseInsensitive);
if ( actual != 0 ) {
actual = (actual < 0 ? -1 : 1);
}
QCOMPARE(actual, expected);
actual = str1.toLatin1().compare(str2.toLatin1().constData(), Qt::CaseInsensitive);
actual = str1.toUtf8().compare(str2.toUtf8().constData(), Qt::CaseInsensitive);
if ( actual != 0 ) {
actual = (actual < 0 ? -1 : 1);
}
@ -1468,7 +1468,7 @@ void tst_QByteArray::toULong_data()
QTest::addColumn<bool>("ok");
ulong LongMaxPlusOne = (ulong)LONG_MAX + 1;
QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toLatin1() << 10 << LongMaxPlusOne << true;
QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toUtf8() << 10 << LongMaxPlusOne << true;
QTest::newRow("default") << QByteArray() << 10 << 0UL << false;
QTest::newRow("empty") << QByteArray("") << 10 << 0UL << false;
QTest::newRow("ulong1") << QByteArray("3234567890") << 10 << 3234567890UL << true;
@ -1990,7 +1990,7 @@ void tst_QByteArray::compareCharStar()
const bool isEqual = result == 0;
const bool isLess = result < 0;
const bool isGreater = result > 0;
QByteArray qba = string2.toLatin1();
QByteArray qba = string2.toUtf8();
const char *str2 = qba.constData();
if (string2.isNull())
str2 = 0;
@ -2297,6 +2297,14 @@ void tst_QByteArray::toUpperLower_data()
QTest::addColumn<QByteArray>("upper");
QTest::addColumn<QByteArray>("lower");
{
QByteArray nonAscii(128, Qt::Uninitialized);
char *data = nonAscii.data();
for (unsigned char i = 0; i < 128; ++i)
data[i] = i + 128;
QTest::newRow("non-ASCII") << nonAscii << nonAscii << nonAscii;
}
QTest::newRow("empty") << QByteArray() << QByteArray() << QByteArray();
QTest::newRow("literal") << QByteArrayLiteral("Hello World")
<< QByteArrayLiteral("HELLO WORLD")
@ -2304,9 +2312,6 @@ void tst_QByteArray::toUpperLower_data()
QTest::newRow("ascii") << QByteArray("Hello World, this is a STRING")
<< QByteArray("HELLO WORLD, THIS IS A STRING")
<< QByteArray("hello world, this is a string");
QTest::newRow("latin1") << QByteArray("R\311sum\351")
<< QByteArray("R\311SUM\311")
<< QByteArray("r\351sum\351");
QTest::newRow("nul") << QByteArray("a\0B", 3) << QByteArray("A\0B", 3) << QByteArray("a\0b", 3);
}
@ -2350,9 +2355,9 @@ void tst_QByteArray::isUpper()
QVERIFY(!QByteArray().isUpper());
QVERIFY(!QByteArray("").isUpper());
QVERIFY(QByteArray("TEXT").isUpper());
QVERIFY(QByteArray("\xD0\xDE").isUpper());
QVERIFY(!QByteArray("\xD7").isUpper()); // multiplication sign is not upper
QVERIFY(!QByteArray("\xDF").isUpper()); // sz ligature is not upper
QVERIFY(!QByteArray("\xD0\xDE").isUpper()); // non-ASCII is neither upper nor lower
QVERIFY(!QByteArray("\xD7").isUpper());
QVERIFY(!QByteArray("\xDF").isUpper());
QVERIFY(!QByteArray("text").isUpper());
QVERIFY(!QByteArray("Text").isUpper());
QVERIFY(!QByteArray("tExt").isUpper());
@ -2373,8 +2378,8 @@ void tst_QByteArray::isLower()
QVERIFY(!QByteArray().isLower());
QVERIFY(!QByteArray("").isLower());
QVERIFY(QByteArray("text").isLower());
QVERIFY(QByteArray("\xE0\xFF").isLower());
QVERIFY(!QByteArray("\xF7").isLower()); // division sign is not lower
QVERIFY(!QByteArray("\xE0\xFF").isLower()); // non-ASCII is neither upper nor lower
QVERIFY(!QByteArray("\xF7").isLower());
QVERIFY(!QByteArray("Text").isLower());
QVERIFY(!QByteArray("tExt").isLower());
QVERIFY(!QByteArray("teXt").isLower());
@ -2416,7 +2421,6 @@ void tst_QByteArray::stdString()
QVERIFY(l1str.length() < utf8str.length());
}
const char globalChar = '1';
QTEST_MAIN(tst_QByteArray)

View File

@ -972,7 +972,7 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty)
<< 0 << 0;
}
#define ROW(lhs, rhs) \
#define ROW(lhs, rhs, caseless) \
do { \
static const QString pinned[] = { \
QString(QLatin1String(lhs)), \
@ -981,16 +981,19 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty)
QTest::newRow(qUtf8Printable(QLatin1String("'" lhs "' <> '" rhs "': "))) \
<< QStringRef(&pinned[0]) << QLatin1String(lhs) \
<< QStringRef(&pinned[1]) << QLatin1String(rhs) \
<< sign(qstrcmp(lhs, rhs)) << sign(qstricmp(lhs, rhs)); \
<< sign(qstrcmp(lhs, rhs)) << caseless; \
} while (false)
ROW("", "0");
ROW("0", "");
ROW("0", "1");
ROW("0", "0");
ROW("10", "0");
ROW("01", "1");
ROW("\xE4", "\xE4"); // ä <> ä
ROW("\xE4", "\xC4"); // ä <> Ä
#define ASCIIROW(lhs, rhs) ROW(lhs, rhs, sign(qstricmp(lhs, rhs)))
ASCIIROW("", "0");
ASCIIROW("0", "");
ASCIIROW("0", "1");
ASCIIROW("0", "0");
ASCIIROW("10", "0");
ASCIIROW("01", "1");
ASCIIROW("e", "e");
ASCIIROW("e", "E");
ROW("\xE4", "\xE4", 0); // ä <> ä
ROW("\xE4", "\xC4", 0); // ä <> Ä
#undef ROW
}