From b143215c5413a55415abd45ed23b5cf3f691beb5 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 6 Dec 2024 08:28:43 -0800 Subject: [PATCH] QString: update docs to prefer "UTF-32" over "UCS-4" They are now the same, but the name UTF-32 is preferred over UCS-4. The original ISO-10646 UCS-4 encoding was allowed to use all 31-bit code units, from 0 to 0x7FFFFFFF[1] including those above 0x10FFFF, which correspond to UTF-8's five- and six-byte sequences. Unicode doesn't allow that and restricts the UTF to the range possible in UTF-16. Renaming the functions is left as an exercise for the reader. [1] https://en.wikipedia.org/wiki/UTF-32#History Pick-to: 6.8 Change-Id: I2f29db62b974cb689585fffd9a6434ae252a7651 Reviewed-by: Edward Welbourne (cherry picked from commit 973d0c4c5160200c188f81da5df064510315f22d) Reviewed-by: Qt Cherry-pick Bot --- src/corelib/text/qstring.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 68596b9fa11..6c522b81bbc 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -2404,7 +2404,7 @@ encoded in \1, and is converted to QString using the \2 function. Reads the first \a size code units of the \c wchar_t array to whose start \a string points, converting them to Unicode and returning the result as - a QString. The encoding used by \c wchar_t is assumed to be UCS-4 if the + a QString. The encoding used by \c wchar_t is assumed to be UTF-32 if the type's size is four bytes or UTF-16 if its size is two bytes. If \a size is -1 (default), the \a string must be '\\0'-terminated. @@ -2443,7 +2443,7 @@ qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t Fills the \a array with the data contained in this QString object. The array is encoded in UTF-16 on platforms where - wchar_t is 2 bytes wide (e.g. windows) and in UCS-4 on platforms + wchar_t is 2 bytes wide (e.g. windows) and in UTF-32 on platforms where wchar_t is 4 bytes wide (most Unix systems). \a array has to be allocated by the caller and contain enough space to @@ -5846,8 +5846,8 @@ static QList qt_convert_to_ucs4(QStringView string); Returns a UCS-4/UTF-32 representation of the string as a QList. - UCS-4 is a Unicode codec and therefore it is lossless. All characters from - this string will be encoded in UCS-4. Any invalid sequence of code units in + UTF-32 is a Unicode codec and therefore it is lossless. All characters from + this string will be encoded in UTF-32. Any invalid sequence of code units in this string is replaced by the Unicode's replacement character (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}). @@ -5879,8 +5879,8 @@ static QList qt_convert_to_ucs4(QStringView string) Returns a UCS-4/UTF-32 representation of \a string as a QList. - UCS-4 is a Unicode codec and therefore it is lossless. All characters from - this string will be encoded in UCS-4. Any invalid sequence of code units in + UTF-32 is a Unicode codec and therefore it is lossless. All characters from + this string will be encoded in UTF-32. Any invalid sequence of code units in this string is replaced by the Unicode's replacement character (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}). @@ -6103,7 +6103,7 @@ QString QString::fromUtf16(const char16_t *unicode, qsizetype size) \since 5.3 Returns a QString initialized with the first \a size characters - of the Unicode string \a unicode (ISO-10646-UCS-4 encoded). + of the Unicode string \a unicode (encoded as UTF-32). If \a size is -1 (default), \a unicode must be \\0'-terminated. @@ -9413,7 +9413,7 @@ QString &QString::setRawData(const QChar *unicode, qsizetype size) /*! \fn QString QString::fromStdU32String(const std::u32string &str) \since 5.5 - \include qstring.cpp {from-std-string} {UCS-4} {fromUcs4()} + \include qstring.cpp {from-std-string} {UTF-32} {fromUcs4()} \sa fromUcs4(), fromStdWString(), fromStdU16String() */