diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index ae6e6f67ebc..1fae9ef07af 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -3346,11 +3346,15 @@ QString &QString::append(QChar ch) Replaces the contents of this string with a copy of the elements in the iterator range [\a first, \a last) and returns a reference to this string. - The size of this string will be equal to the number of elements in the - range [\a first, \a last). + The size of this string will be equal to the decoded length of the elements + in the range [\a first, \a last), which need not be the same as the length of + the range itself, because this function transparently recodes the input + character set to UTF-16. This function will only allocate memory if the number of elements in the - range exceeds the capacity of this string or this string is shared. + range, or, for non-UTF-16-encoded input, the maximum possible size of the + resulting string, exceeds the capacity of this string, or if this string is + shared. \note This function overload only participates in overload resolution if \c InputIterator meets the requirements of a @@ -3361,6 +3365,7 @@ QString &QString::append(QChar ch) \li QLatin1Char \li \c char16_t \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t + \li \c char32_t \endlist \note The behavior is undefined if either argument is an iterator into *this or @@ -3383,6 +3388,26 @@ QString &QString::assign(QAnyStringView s) return *this; } +QString &QString::assign_helper(const char32_t *data, qsizetype len) +{ + // worst case: each char32_t requires a surrogate pair, so + const auto requiredCapacity = len * 2; + if (requiredCapacity <= capacity() && isDetached()) { + const auto offset = d.freeSpaceAtBegin(); + if (offset) + d.setBegin(d.begin() - offset); + auto begin = reinterpret_cast(d.begin()); + auto ba = QByteArrayView(reinterpret_cast(data), len * sizeof(char32_t)); + QStringConverter::State state; + const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness); + d.size = end - begin; + d.data()[d.size] = u'\0'; + } else { + *this = QString::fromUcs4(data, len); + } + return *this; +} + /*! \fn QString &QString::remove(qsizetype position, qsizetype n) diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h index 76833147d42..e050ec356e4 100644 --- a/src/corelib/text/qstring.h +++ b/src/corelib/text/qstring.h @@ -47,6 +47,13 @@ class QString; namespace QtPrivate { template class BoolList; + +template +using IsCompatibleChar32TypeHelper = + std::is_same; +template +using IsCompatibleChar32Type + = IsCompatibleChar32TypeHelper>; } // Qt 4.x compatibility @@ -133,6 +140,7 @@ class Q_CORE_EXPORT QString template using is_compatible_char_helper = std::disjunction< QtPrivate::IsCompatibleCharType, + QtPrivate::IsCompatibleChar32Type, std::is_same // special case >; @@ -418,15 +426,32 @@ public: { using V = typename std::iterator_traits::value_type; constexpr bool IsL1C = std::is_same_v, QLatin1Char>; + constexpr bool IsFwdIt = std::is_convertible_v< + typename std::iterator_traits::iterator_category, + std::forward_iterator_tag + >; if constexpr (is_contiguous_iterator_v) { const auto p = q20::to_address(first); const auto len = qsizetype(last - first); if constexpr (IsL1C) return assign(QLatin1StringView(reinterpret_cast(p), len)); + else if constexpr (sizeof(V) == 4) + return assign_helper(p, len); else return assign(QAnyStringView(p, len)); - } else { // non-contiguous iterator, need to feed data piecemeal + } else if constexpr (sizeof(V) == 4) { // non-contiguous iterator, feed data piecemeal + resize(0); + if constexpr (IsFwdIt) { + const qsizetype requiredCapacity = 2 * std::distance(first, last); + reserve(requiredCapacity); + } + while (first != last) { + append(QChar::fromUcs4(*first)); + ++first; + } + return *this; + } else { d.assign(first, last, [](QChar ch) -> char16_t { return ch.unicode(); }); d.data()[d.size] = u'\0'; return *this; @@ -896,6 +921,8 @@ private: void reallocData(qsizetype alloc, QArrayData::AllocationOption option); void reallocGrowData(qsizetype n); + // ### remove once QAnyStringView supports UTF-32: + QString &assign_helper(const char32_t *data, qsizetype len); static int compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index f3d2594e348..eaf35c969ea 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -3442,6 +3442,14 @@ void tst_QString::assign() str.assign(c16str.begin(), c16str.end()); QCOMPARE(str, c16); + const char32_t c32[] = U"٩(⁎❛ᴗ❛⁎)۶ 🤷"; + str.assign(std::begin(c32), std::end(c32) - 1); + QCOMPARE(str, c16); + + std::u32string c32str(c32); + str.assign(c32str.begin(), c32str.end()); + QCOMPARE(str, c16); + QVarLengthArray l1ch = {'F'_L1, 'G'_L1, 'H'_L1, 'I'_L1, 'J'_L1}; str.assign(l1ch.begin(), l1ch.end()); QCOMPARE(str, u"FGHIJ");