Limit QByteArray's 8-bit support to ASCII
Previously it handled Latin-1, which made it incompatible with UTF-8, which is now our preferred 8-bit encoding. For Qt6 it is limited to ASCII. Adjusted tests to match. QLatin1String::compare() turned out to be relying on qstrnicmp()'s Latin-1 handling. Removed some spurious Q_UNLIKELY()s and tidied up code a little in the process. [ChangeLog][QtCore][Important Behavior Changes] Encoding-dependent features of QByteArrray are now limited to ASCII, where previously they worked for the whole of Latin-1. This affects case-insensitive comparison, notably including qstricmp() and qstrnicmp(), and case-transforming functions. Fixes: QTBUG-84323 Change-Id: I2925d9908f8654599195a2860847b17083911b41 Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
This commit is contained in:
parent
135204bdf6
commit
9dd8e655cd
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2019 The Qt Company Ltd.
|
||||
** Copyright (C) 2020 The Qt Company Ltd.
|
||||
** Copyright (C) 2016 Intel Corporation.
|
||||
** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
@ -69,64 +69,16 @@
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
// Latin 1 case system, used by QByteArray::to{Upper,Lower}() and qstr(n)icmp():
|
||||
/*
|
||||
#!/usr/bin/perl -l
|
||||
use feature "unicode_strings";
|
||||
for (0..255) {
|
||||
$up = uc(chr($_));
|
||||
$up = chr($_) if ord($up) > 0x100 || length $up > 1;
|
||||
printf "0x%02x,", ord($up);
|
||||
print "" if ($_ & 0xf) == 0xf;
|
||||
// ASCII case system, used by QByteArray::to{Upper,Lower}() and qstr(n)icmp():
|
||||
static constexpr inline uchar asciiUpper(uchar c)
|
||||
{
|
||||
return c >= 'a' && c <= 'z' ? c & ~0x20 : c;
|
||||
}
|
||||
*/
|
||||
static const uchar latin1_uppercased[256] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
||||
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
||||
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
||||
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
|
||||
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
|
||||
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
|
||||
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff
|
||||
};
|
||||
|
||||
/*
|
||||
#!/usr/bin/perl -l
|
||||
use feature "unicode_strings";
|
||||
for (0..255) {
|
||||
$up = lc(chr($_));
|
||||
$up = chr($_) if ord($up) > 0x100 || length $up > 1;
|
||||
printf "0x%02x,", ord($up);
|
||||
print "" if ($_ & 0xf) == 0xf;
|
||||
static constexpr inline uchar asciiLower(uchar c)
|
||||
{
|
||||
return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
|
||||
}
|
||||
*/
|
||||
static const uchar latin1_lowercased[256] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
||||
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
||||
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
||||
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
|
||||
};
|
||||
|
||||
int qFindByteArray(
|
||||
const char *haystack0, int haystackLen, int from,
|
||||
@ -293,8 +245,8 @@ int qstrcmp(const char *str1, const char *str2)
|
||||
|
||||
A safe \c stricmp() function.
|
||||
|
||||
Compares \a str1 and \a str2 ignoring the case of the
|
||||
characters. The encoding of the strings is assumed to be Latin-1.
|
||||
Compares \a str1 and \a str2, ignoring differences in the case of any ASCII
|
||||
characters.
|
||||
|
||||
Returns a negative value if \a str1 is less than \a str2, 0 if \a
|
||||
str1 is equal to \a str2 or a positive value if \a str1 is greater
|
||||
@ -323,11 +275,10 @@ int qstricmp(const char *str1, const char *str2)
|
||||
auto innerCompare = [=, &offset](qptrdiff max, bool unlimited) {
|
||||
max += offset;
|
||||
do {
|
||||
uchar c = latin1_lowercased[s1[offset]];
|
||||
int res = c - latin1_lowercased[s2[offset]];
|
||||
if (Q_UNLIKELY(res))
|
||||
uchar c = s1[offset];
|
||||
if (int res = asciiLower(c) - asciiLower(s2[offset]))
|
||||
return res;
|
||||
if (Q_UNLIKELY(!c))
|
||||
if (!c)
|
||||
return 0;
|
||||
++offset;
|
||||
} while (unlimited || offset < max);
|
||||
@ -385,9 +336,8 @@ int qstricmp(const char *str1, const char *str2)
|
||||
|
||||
A safe \c strnicmp() function.
|
||||
|
||||
Compares at most \a len bytes of \a str1 and \a str2 ignoring the
|
||||
case of the characters. The encoding of the strings is assumed to
|
||||
be Latin-1.
|
||||
Compares at most \a len bytes of \a str1 and \a str2, ignoring differences
|
||||
in the case of any ASCII characters.
|
||||
|
||||
Returns a negative value if \a str1 is less than \a str2, 0 if \a str1
|
||||
is equal to \a str2 or a positive value if \a str1 is greater than \a
|
||||
@ -406,12 +356,11 @@ int qstrnicmp(const char *str1, const char *str2, uint len)
|
||||
{
|
||||
const uchar *s1 = reinterpret_cast<const uchar *>(str1);
|
||||
const uchar *s2 = reinterpret_cast<const uchar *>(str2);
|
||||
int res;
|
||||
uchar c;
|
||||
if (!s1 || !s2)
|
||||
return s1 ? 1 : (s2 ? -1 : 0);
|
||||
for (; len--; s1++, s2++) {
|
||||
if ((res = (c = latin1_lowercased[*s1]) - latin1_lowercased[*s2]))
|
||||
for (; len--; ++s1, ++s2) {
|
||||
const uchar c = *s1;
|
||||
if (int res = asciiLower(c) - asciiLower(*s2))
|
||||
return res;
|
||||
if (!c) // strings are equal
|
||||
break;
|
||||
@ -437,28 +386,23 @@ int qstrnicmp(const char *str1, qsizetype len1, const char *str2, qsizetype len2
|
||||
if (!s2)
|
||||
return len1 == 0 ? 0 : 1;
|
||||
|
||||
int res;
|
||||
uchar c;
|
||||
if (len2 == -1) {
|
||||
// null-terminated str2
|
||||
qsizetype i;
|
||||
for (i = 0; i < len1; ++i) {
|
||||
c = latin1_lowercased[s2[i]];
|
||||
const uchar c = s2[i];
|
||||
if (!c)
|
||||
return 1;
|
||||
|
||||
res = latin1_lowercased[s1[i]] - c;
|
||||
if (res)
|
||||
if (int res = asciiLower(s1[i]) - asciiLower(c))
|
||||
return res;
|
||||
}
|
||||
c = latin1_lowercased[s2[i]];
|
||||
return c ? -1 : 0;
|
||||
return s2[i] ? -1 : 0;
|
||||
} else {
|
||||
// not null-terminated
|
||||
for (qsizetype i = 0; i < qMin(len1, len2); ++i) {
|
||||
c = latin1_lowercased[s2[i]];
|
||||
res = latin1_lowercased[s1[i]] - c;
|
||||
if (res)
|
||||
const qsizetype len = qMin(len1, len2);
|
||||
for (qsizetype i = 0; i < len; ++i) {
|
||||
if (int res = asciiLower(s1[i]) - asciiLower(s2[i]))
|
||||
return res;
|
||||
}
|
||||
if (len1 == len2)
|
||||
@ -786,14 +730,14 @@ QByteArray qUncompress(const uchar* data, int nbytes)
|
||||
terminator, and uses \l{implicit sharing} (copy-on-write) to
|
||||
reduce memory usage and avoid needless copying of data.
|
||||
|
||||
In addition to QByteArray, Qt also provides the QString class to
|
||||
store string data. For most purposes, QString is the class you
|
||||
want to use. It stores 16-bit Unicode characters, making it easy
|
||||
to store non-ASCII/non-Latin-1 characters in your application.
|
||||
Furthermore, QString is used throughout in the Qt API. The two
|
||||
main cases where QByteArray is appropriate are when you need to
|
||||
store raw binary data, and when memory conservation is critical
|
||||
(e.g., with Qt for Embedded Linux).
|
||||
In addition to QByteArray, Qt also provides the QString class to store
|
||||
string data. For most purposes, QString is the class you want to use. It
|
||||
understands its content as Unicode text (encoded using UTF-16) where
|
||||
QByteArray aims to avoid assumptions about the encoding or semantics of the
|
||||
bytes it stores (aside from a few legacy cases where it uses ASCII).
|
||||
Furthermore, QString is used throughout in the Qt API. The two main cases
|
||||
where QByteArray is appropriate are when you need to store raw binary data,
|
||||
and when memory conservation is critical (e.g., with Qt for Embedded Linux).
|
||||
|
||||
One way to initialize a QByteArray is simply to pass a \c{const
|
||||
char *} to its constructor. For example, the following code
|
||||
@ -868,13 +812,6 @@ QByteArray qUncompress(const uchar* data, int nbytes)
|
||||
memory QByteArray actually allocated. Data appended to an empty
|
||||
array is not copied.
|
||||
|
||||
A frequent requirement is to remove whitespace characters from a
|
||||
byte array ('\\n', '\\t', ' ', etc.). If you want to remove
|
||||
whitespace from both ends of a QByteArray, use trimmed(). If you
|
||||
want to remove whitespace from both ends and replace multiple
|
||||
consecutive whitespaces with a single space character within the
|
||||
byte array, use simplified().
|
||||
|
||||
If you want to find all occurrences of a particular character or
|
||||
substring in a QByteArray, use indexOf() or lastIndexOf(). The
|
||||
former searches forward starting from a given index position, the
|
||||
@ -932,29 +869,40 @@ QByteArray qUncompress(const uchar* data, int nbytes)
|
||||
Such considerations, the configuration of such behavior or any mitigation
|
||||
are outside the scope of the QByteArray API.
|
||||
|
||||
\section1 Notes on Locale
|
||||
\section1 C locale and ASCII functions
|
||||
|
||||
QByteArray generally handles data as bytes, without presuming any semantics;
|
||||
where it does presume semantics, it uses the C locale and ASCII encoding.
|
||||
Standard Unicode encodings are supported by QString, other encodings may be
|
||||
supported using QStringEncoder and QStringDecoder to convert to Unicode. For
|
||||
locale-specific interpretation of text, use QLocale or QString.
|
||||
|
||||
\section2 Spacing Characters
|
||||
|
||||
A frequent requirement is to remove spacing characters from a byte array
|
||||
('\\n', '\\t', ' ', etc.). If you want to remove spacing from both ends of a
|
||||
QByteArray, use trimmed(). If you want to remove spacing from both ends and
|
||||
replace each run of spacing characters with a single space character within
|
||||
the byte array, use simplified(). Only ASCII spacing characters are
|
||||
recognized for these purposes.
|
||||
|
||||
\section2 Number-String Conversions
|
||||
|
||||
Functions that perform conversions between numeric data types and
|
||||
strings are performed in the C locale, irrespective of the user's
|
||||
locale settings. Use QString to perform locale-aware conversions
|
||||
between numbers and strings.
|
||||
Functions that perform conversions between numeric data types and strings
|
||||
are performed in the C locale, regardless of the user's locale settings. Use
|
||||
QLocale to perform locale-aware conversions between numbers and strings.
|
||||
|
||||
\section2 8-bit Character Comparisons
|
||||
\section2 Character Case
|
||||
|
||||
In QByteArray, the notion of uppercase and lowercase and of which
|
||||
character is greater than or less than another character is done
|
||||
in the Latin-1 locale. This affects functions that support a case
|
||||
insensitive option or that compare or lowercase or uppercase
|
||||
their arguments. Case insensitive operations and comparisons will
|
||||
be accurate if both strings contain only Latin-1 characters.
|
||||
Functions that this affects include contains(), indexOf(),
|
||||
lastIndexOf(), operator<(), operator<=(), operator>(),
|
||||
operator>=(), isLower(), isUpper(), toLower() and toUpper().
|
||||
In QByteArray, the notion of uppercase and lowercase and of case-independent
|
||||
comparison is limited to ASCII. Non-ASCII characters are treated as
|
||||
caseless, since their case depends on encoding. This affects functions that
|
||||
support a case insensitive option or that change the case of their
|
||||
arguments. Functions that this affects include contains(), indexOf(),
|
||||
lastIndexOf(), isLower(), isUpper(), toLower() and toUpper().
|
||||
|
||||
This issue does not apply to \l{QString}s since they represent
|
||||
characters using Unicode.
|
||||
This issue does not apply to \l{QString}s since they represent characters
|
||||
using Unicode.
|
||||
|
||||
\sa QString, QBitArray
|
||||
*/
|
||||
@ -2899,22 +2847,16 @@ bool QByteArray::endsWith(const char *str) const
|
||||
}
|
||||
|
||||
/*
|
||||
Returns true if \a c is an uppercase Latin1 letter.
|
||||
\note The multiplication sign 0xD7 and the sz ligature 0xDF are not
|
||||
treated as uppercase Latin1.
|
||||
Returns true if \a c is an uppercase ASCII letter.
|
||||
*/
|
||||
static inline bool isUpperCaseLatin1(char c)
|
||||
static constexpr inline bool isUpperCaseAscii(char c)
|
||||
{
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return true;
|
||||
|
||||
return (uchar(c) >= 0xC0 && uchar(c) <= 0xDE && uchar(c) != 0xD7);
|
||||
return c >= 'A' && c <= 'Z';
|
||||
}
|
||||
|
||||
/*!
|
||||
Returns \c true if this byte array contains only uppercase letters,
|
||||
otherwise returns \c false. The byte array is interpreted as a Latin-1
|
||||
encoded string.
|
||||
Returns \c true if this byte array contains only ASCII uppercase letters,
|
||||
otherwise returns \c false.
|
||||
\since 5.12
|
||||
|
||||
\sa isLower(), toUpper()
|
||||
@ -2927,7 +2869,7 @@ bool QByteArray::isUpper() const
|
||||
const char *d = data();
|
||||
|
||||
for (int i = 0, max = size(); i < max; ++i) {
|
||||
if (!isUpperCaseLatin1(d[i]))
|
||||
if (!isUpperCaseAscii(d[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -2935,22 +2877,16 @@ bool QByteArray::isUpper() const
|
||||
}
|
||||
|
||||
/*
|
||||
Returns true if \a c is an lowercase Latin1 letter.
|
||||
\note The division sign 0xF7 is not treated as lowercase Latin1,
|
||||
but the small y dieresis 0xFF is.
|
||||
Returns true if \a c is an lowercase ASCII letter.
|
||||
*/
|
||||
static inline bool isLowerCaseLatin1(char c)
|
||||
static constexpr inline bool isLowerCaseAscii(char c)
|
||||
{
|
||||
if (c >= 'a' && c <= 'z')
|
||||
return true;
|
||||
|
||||
return (uchar(c) >= 0xD0 && uchar(c) != 0xF7);
|
||||
return c >= 'a' && c <= 'z';
|
||||
}
|
||||
|
||||
/*!
|
||||
Returns \c true if this byte array contains only lowercase letters,
|
||||
otherwise returns \c false. The byte array is interpreted as a Latin-1
|
||||
encoded string.
|
||||
Returns \c true if this byte array contains only lowercase ASCII letters,
|
||||
otherwise returns \c false.
|
||||
\since 5.12
|
||||
|
||||
\sa isUpper(), toLower()
|
||||
@ -2963,7 +2899,7 @@ bool QByteArray::isLower() const
|
||||
const char *d = data();
|
||||
|
||||
for (int i = 0, max = size(); i < max; ++i) {
|
||||
if (!isLowerCaseLatin1(d[i]))
|
||||
if (!isLowerCaseAscii(d[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -3076,8 +3012,8 @@ QByteArray QByteArray::mid(int pos, int len) const
|
||||
/*!
|
||||
\fn QByteArray QByteArray::toLower() const
|
||||
|
||||
Returns a lowercase copy of the byte array. The bytearray is
|
||||
interpreted as a Latin-1 encoded string.
|
||||
Returns a copy of the byte array in which each ASCII uppercase letter
|
||||
converted to lowercase.
|
||||
|
||||
Example:
|
||||
\snippet code/src_corelib_text_qbytearray.cpp 30
|
||||
@ -3090,7 +3026,7 @@ QByteArray QByteArray::mid(int pos, int len) const
|
||||
// (even with constant propagation, there's no gain in performance).
|
||||
template <typename T>
|
||||
Q_NEVER_INLINE
|
||||
static QByteArray toCase_template(T &input, const uchar * table)
|
||||
static QByteArray toCase_template(T &input, uchar (*lookup)(uchar))
|
||||
{
|
||||
// find the first bad character in input
|
||||
const char *orig_begin = input.constBegin();
|
||||
@ -3098,7 +3034,7 @@ static QByteArray toCase_template(T &input, const uchar * table)
|
||||
const char *e = input.constEnd();
|
||||
for ( ; firstBad != e ; ++firstBad) {
|
||||
uchar ch = uchar(*firstBad);
|
||||
uchar converted = table[ch];
|
||||
uchar converted = lookup(ch);
|
||||
if (ch != converted)
|
||||
break;
|
||||
}
|
||||
@ -3111,27 +3047,26 @@ static QByteArray toCase_template(T &input, const uchar * table)
|
||||
char *b = s.begin(); // will detach if necessary
|
||||
char *p = b + (firstBad - orig_begin);
|
||||
e = b + s.size();
|
||||
for ( ; p != e; ++p) {
|
||||
*p = char(uchar(table[uchar(*p)]));
|
||||
}
|
||||
for ( ; p != e; ++p)
|
||||
*p = char(lookup(uchar(*p)));
|
||||
return s;
|
||||
}
|
||||
|
||||
QByteArray QByteArray::toLower_helper(const QByteArray &a)
|
||||
{
|
||||
return toCase_template(a, latin1_lowercased);
|
||||
return toCase_template(a, asciiLower);
|
||||
}
|
||||
|
||||
QByteArray QByteArray::toLower_helper(QByteArray &a)
|
||||
{
|
||||
return toCase_template(a, latin1_lowercased);
|
||||
return toCase_template(a, asciiLower);
|
||||
}
|
||||
|
||||
/*!
|
||||
\fn QByteArray QByteArray::toUpper() const
|
||||
|
||||
Returns an uppercase copy of the byte array. The bytearray is
|
||||
interpreted as a Latin-1 encoded string.
|
||||
Returns a copy of the byte array in which each ASCII lowercase letter
|
||||
converted to uppercase.
|
||||
|
||||
Example:
|
||||
\snippet code/src_corelib_text_qbytearray.cpp 31
|
||||
@ -3141,12 +3076,12 @@ QByteArray QByteArray::toLower_helper(QByteArray &a)
|
||||
|
||||
QByteArray QByteArray::toUpper_helper(const QByteArray &a)
|
||||
{
|
||||
return toCase_template(a, latin1_uppercased);
|
||||
return toCase_template(a, asciiUpper);
|
||||
}
|
||||
|
||||
QByteArray QByteArray::toUpper_helper(QByteArray &a)
|
||||
{
|
||||
return toCase_template(a, latin1_uppercased);
|
||||
return toCase_template(a, asciiUpper);
|
||||
}
|
||||
|
||||
/*! \fn void QByteArray::clear()
|
||||
@ -4226,7 +4161,7 @@ QByteArray &QByteArray::setNum(double n, char f, int prec)
|
||||
QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
|
||||
uint flags = QLocaleData::ZeroPadExponent;
|
||||
|
||||
char lower = latin1_lowercased[uchar(f)];
|
||||
char lower = asciiLower(uchar(f));
|
||||
if (f != lower)
|
||||
flags |= QLocaleData::CapitalEorX;
|
||||
f = lower;
|
||||
@ -4248,7 +4183,7 @@ QByteArray &QByteArray::setNum(double n, char f, int prec)
|
||||
break;
|
||||
}
|
||||
|
||||
*this = QLocaleData::c()->doubleToString(n, prec, form, -1, flags).toLatin1();
|
||||
*this = QLocaleData::c()->doubleToString(n, prec, form, -1, flags).toUtf8();
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1192,6 +1192,46 @@ static int ucstrcmp(const QChar *a, size_t alen, const char *b, size_t blen)
|
||||
return cmp ? cmp : lencmp(alen, blen);
|
||||
}
|
||||
|
||||
static int latin1nicmp(const char *lhsChar, int lSize, const char *rhsChar, int rSize)
|
||||
{
|
||||
constexpr uchar latin1Lower[256] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
||||
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
||||
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
||||
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
// 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
|
||||
};
|
||||
// We're called with QLatin1String's .data() and .size():
|
||||
Q_ASSERT(lSize >= 0 && rSize >= 0);
|
||||
if (!lSize)
|
||||
return rSize ? -1 : 0;
|
||||
if (!rSize)
|
||||
return 1;
|
||||
const int size = std::min(lSize, rSize);
|
||||
|
||||
const uchar *lhs = reinterpret_cast<const uchar *>(lhsChar);
|
||||
const uchar *rhs = reinterpret_cast<const uchar *>(rhsChar);
|
||||
Q_ASSERT(lhs && rhs); // since both lSize and rSize are positive
|
||||
for (int i = 0; i < size; i++) {
|
||||
Q_ASSERT(lhs[i] && rhs[i]);
|
||||
if (int res = latin1Lower[lhs[i]] - latin1Lower[rhs[i]])
|
||||
return res;
|
||||
}
|
||||
return lencmp(lSize, rSize);
|
||||
}
|
||||
|
||||
static int qt_compare_strings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
|
||||
{
|
||||
if (cs == Qt::CaseSensitive)
|
||||
@ -1218,7 +1258,7 @@ static int qt_compare_strings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
|
||||
if (lhs.isEmpty())
|
||||
return lencmp(0, rhs.size());
|
||||
if (cs == Qt::CaseInsensitive)
|
||||
return qstrnicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
|
||||
return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
|
||||
const auto l = std::min(lhs.size(), rhs.size());
|
||||
int r = qstrncmp(lhs.data(), rhs.data(), l);
|
||||
return r ? r : lencmp(lhs.size(), rhs.size());
|
||||
|
@ -176,9 +176,9 @@ QByteArray verifyZeroTermination(const QByteArray &ba)
|
||||
int baSize = ba.size();
|
||||
char baTerminator = ba.constData()[baSize];
|
||||
if ('\0' != baTerminator)
|
||||
return QString::fromLatin1(
|
||||
"*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromLatin1(ba))
|
||||
.arg(baTerminator, 2, 16, QChar('0')).toLatin1();
|
||||
return QString::fromUtf8(
|
||||
"*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromUtf8(ba))
|
||||
.arg(baTerminator, 2, 16, QChar('0')).toUtf8();
|
||||
|
||||
// Skip mutating checks on shared strings
|
||||
if (baDataPtr->isShared())
|
||||
@ -934,30 +934,30 @@ void tst_QByteArray::qstricmp()
|
||||
QFETCH(QString, str1);
|
||||
QFETCH(QString, str2);
|
||||
|
||||
int expected = strcmp(str1.toUpper().toLatin1(),
|
||||
str2.toUpper().toLatin1());
|
||||
int expected = strcmp(str1.toUpper().toUtf8(),
|
||||
str2.toUpper().toUtf8());
|
||||
if ( expected != 0 ) {
|
||||
expected = (expected < 0 ? -1 : 1);
|
||||
}
|
||||
int actual = ::qstricmp(str1.toLatin1(), str2.toLatin1());
|
||||
int actual = ::qstricmp(str1.toUtf8(), str2.toUtf8());
|
||||
if ( actual != 0 ) {
|
||||
actual = (actual < 0 ? -1 : 1);
|
||||
}
|
||||
QCOMPARE(actual, expected);
|
||||
|
||||
actual = ::qstricmp("012345679abcd" + str1.toLatin1(), "012345679AbCd" + str2.toLatin1());
|
||||
actual = ::qstricmp("012345679abcd" + str1.toUtf8(), "012345679AbCd" + str2.toUtf8());
|
||||
if ( actual != 0 ) {
|
||||
actual = (actual < 0 ? -1 : 1);
|
||||
}
|
||||
QCOMPARE(actual, expected);
|
||||
|
||||
actual = str1.toLatin1().compare(str2.toLatin1(), Qt::CaseInsensitive);
|
||||
actual = str1.toUtf8().compare(str2.toUtf8(), Qt::CaseInsensitive);
|
||||
if ( actual != 0 ) {
|
||||
actual = (actual < 0 ? -1 : 1);
|
||||
}
|
||||
QCOMPARE(actual, expected);
|
||||
|
||||
actual = str1.toLatin1().compare(str2.toLatin1().constData(), Qt::CaseInsensitive);
|
||||
actual = str1.toUtf8().compare(str2.toUtf8().constData(), Qt::CaseInsensitive);
|
||||
if ( actual != 0 ) {
|
||||
actual = (actual < 0 ? -1 : 1);
|
||||
}
|
||||
@ -1468,7 +1468,7 @@ void tst_QByteArray::toULong_data()
|
||||
QTest::addColumn<bool>("ok");
|
||||
|
||||
ulong LongMaxPlusOne = (ulong)LONG_MAX + 1;
|
||||
QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toLatin1() << 10 << LongMaxPlusOne << true;
|
||||
QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toUtf8() << 10 << LongMaxPlusOne << true;
|
||||
QTest::newRow("default") << QByteArray() << 10 << 0UL << false;
|
||||
QTest::newRow("empty") << QByteArray("") << 10 << 0UL << false;
|
||||
QTest::newRow("ulong1") << QByteArray("3234567890") << 10 << 3234567890UL << true;
|
||||
@ -1990,7 +1990,7 @@ void tst_QByteArray::compareCharStar()
|
||||
const bool isEqual = result == 0;
|
||||
const bool isLess = result < 0;
|
||||
const bool isGreater = result > 0;
|
||||
QByteArray qba = string2.toLatin1();
|
||||
QByteArray qba = string2.toUtf8();
|
||||
const char *str2 = qba.constData();
|
||||
if (string2.isNull())
|
||||
str2 = 0;
|
||||
@ -2297,6 +2297,14 @@ void tst_QByteArray::toUpperLower_data()
|
||||
QTest::addColumn<QByteArray>("upper");
|
||||
QTest::addColumn<QByteArray>("lower");
|
||||
|
||||
{
|
||||
QByteArray nonAscii(128, Qt::Uninitialized);
|
||||
char *data = nonAscii.data();
|
||||
for (unsigned char i = 0; i < 128; ++i)
|
||||
data[i] = i + 128;
|
||||
QTest::newRow("non-ASCII") << nonAscii << nonAscii << nonAscii;
|
||||
}
|
||||
|
||||
QTest::newRow("empty") << QByteArray() << QByteArray() << QByteArray();
|
||||
QTest::newRow("literal") << QByteArrayLiteral("Hello World")
|
||||
<< QByteArrayLiteral("HELLO WORLD")
|
||||
@ -2304,9 +2312,6 @@ void tst_QByteArray::toUpperLower_data()
|
||||
QTest::newRow("ascii") << QByteArray("Hello World, this is a STRING")
|
||||
<< QByteArray("HELLO WORLD, THIS IS A STRING")
|
||||
<< QByteArray("hello world, this is a string");
|
||||
QTest::newRow("latin1") << QByteArray("R\311sum\351")
|
||||
<< QByteArray("R\311SUM\311")
|
||||
<< QByteArray("r\351sum\351");
|
||||
QTest::newRow("nul") << QByteArray("a\0B", 3) << QByteArray("A\0B", 3) << QByteArray("a\0b", 3);
|
||||
}
|
||||
|
||||
@ -2350,9 +2355,9 @@ void tst_QByteArray::isUpper()
|
||||
QVERIFY(!QByteArray().isUpper());
|
||||
QVERIFY(!QByteArray("").isUpper());
|
||||
QVERIFY(QByteArray("TEXT").isUpper());
|
||||
QVERIFY(QByteArray("\xD0\xDE").isUpper());
|
||||
QVERIFY(!QByteArray("\xD7").isUpper()); // multiplication sign is not upper
|
||||
QVERIFY(!QByteArray("\xDF").isUpper()); // sz ligature is not upper
|
||||
QVERIFY(!QByteArray("\xD0\xDE").isUpper()); // non-ASCII is neither upper nor lower
|
||||
QVERIFY(!QByteArray("\xD7").isUpper());
|
||||
QVERIFY(!QByteArray("\xDF").isUpper());
|
||||
QVERIFY(!QByteArray("text").isUpper());
|
||||
QVERIFY(!QByteArray("Text").isUpper());
|
||||
QVERIFY(!QByteArray("tExt").isUpper());
|
||||
@ -2373,8 +2378,8 @@ void tst_QByteArray::isLower()
|
||||
QVERIFY(!QByteArray().isLower());
|
||||
QVERIFY(!QByteArray("").isLower());
|
||||
QVERIFY(QByteArray("text").isLower());
|
||||
QVERIFY(QByteArray("\xE0\xFF").isLower());
|
||||
QVERIFY(!QByteArray("\xF7").isLower()); // division sign is not lower
|
||||
QVERIFY(!QByteArray("\xE0\xFF").isLower()); // non-ASCII is neither upper nor lower
|
||||
QVERIFY(!QByteArray("\xF7").isLower());
|
||||
QVERIFY(!QByteArray("Text").isLower());
|
||||
QVERIFY(!QByteArray("tExt").isLower());
|
||||
QVERIFY(!QByteArray("teXt").isLower());
|
||||
@ -2416,7 +2421,6 @@ void tst_QByteArray::stdString()
|
||||
QVERIFY(l1str.length() < utf8str.length());
|
||||
}
|
||||
|
||||
|
||||
const char globalChar = '1';
|
||||
|
||||
QTEST_MAIN(tst_QByteArray)
|
||||
|
@ -972,7 +972,7 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty)
|
||||
<< 0 << 0;
|
||||
}
|
||||
|
||||
#define ROW(lhs, rhs) \
|
||||
#define ROW(lhs, rhs, caseless) \
|
||||
do { \
|
||||
static const QString pinned[] = { \
|
||||
QString(QLatin1String(lhs)), \
|
||||
@ -981,16 +981,19 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty)
|
||||
QTest::newRow(qUtf8Printable(QLatin1String("'" lhs "' <> '" rhs "': "))) \
|
||||
<< QStringRef(&pinned[0]) << QLatin1String(lhs) \
|
||||
<< QStringRef(&pinned[1]) << QLatin1String(rhs) \
|
||||
<< sign(qstrcmp(lhs, rhs)) << sign(qstricmp(lhs, rhs)); \
|
||||
<< sign(qstrcmp(lhs, rhs)) << caseless; \
|
||||
} while (false)
|
||||
ROW("", "0");
|
||||
ROW("0", "");
|
||||
ROW("0", "1");
|
||||
ROW("0", "0");
|
||||
ROW("10", "0");
|
||||
ROW("01", "1");
|
||||
ROW("\xE4", "\xE4"); // ä <> ä
|
||||
ROW("\xE4", "\xC4"); // ä <> Ä
|
||||
#define ASCIIROW(lhs, rhs) ROW(lhs, rhs, sign(qstricmp(lhs, rhs)))
|
||||
ASCIIROW("", "0");
|
||||
ASCIIROW("0", "");
|
||||
ASCIIROW("0", "1");
|
||||
ASCIIROW("0", "0");
|
||||
ASCIIROW("10", "0");
|
||||
ASCIIROW("01", "1");
|
||||
ASCIIROW("e", "e");
|
||||
ASCIIROW("e", "E");
|
||||
ROW("\xE4", "\xE4", 0); // ä <> ä
|
||||
ROW("\xE4", "\xC4", 0); // ä <> Ä
|
||||
#undef ROW
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user