Convert a couple of APIs to use views

Try to get rid of APIs that use raw 'const {char, QChar} *, length'
pairs. Instead, use QByteArrayView or QStringView.

As QStringConverter is a new class, simply change the API to what we'd like
to have. Also adjust hidden API in QStringBuilder and friends.

Change-Id: I897d47f63a7b965f5574a1e51da64147f9e981f6
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Lars Knoll 2020-07-30 17:36:14 +02:00 committed by Mårten Nordheim
parent 40874625f9
commit fa8d021fa6
18 changed files with 201 additions and 204 deletions

View File

@ -418,7 +418,7 @@ bool TextEdit::load(const QString &f)
QByteArray data = file.readAll(); QByteArray data = file.readAll();
QMimeDatabase db; QMimeDatabase db;
if (db.mimeTypeForFileNameAndData(f, data).name() == QLatin1String("text/html")) { if (db.mimeTypeForFileNameAndData(f, data).name() == QLatin1String("text/html")) {
auto encoding = QStringDecoder::encodingForHtml(data.constData(), data.size()); auto encoding = QStringDecoder::encodingForHtml(data);
QString str = QStringDecoder(encoding ? *encoding : QStringDecoder::Utf8)(data); QString str = QStringDecoder(encoding ? *encoding : QStringDecoder::Utf8)(data);
QUrl baseUrl = (f.front() == QLatin1Char(':') ? QUrl(f) : QUrl::fromLocalFile(f)).adjusted(QUrl::RemoveFilename); QUrl baseUrl = (f.front() == QLatin1Char(':') ? QUrl(f) : QUrl::fromLocalFile(f)).adjusted(QUrl::RemoveFilename);
textEdit->document()->setBaseUrl(baseUrl); textEdit->document()->setBaseUrl(baseUrl);

View File

@ -56,7 +56,7 @@ static bool checkNameDecodable(const char *d_name, qsizetype len)
// This function is called in a loop from advance() below, but the loop is // This function is called in a loop from advance() below, but the loop is
// usually run only once. // usually run only once.
return QUtf8::isValidUtf8(d_name, len).isValidUtf8; return QUtf8::isValidUtf8(QByteArrayView(d_name, len)).isValidUtf8;
} }
QFileSystemIterator::QFileSystemIterator(const QFileSystemEntry &entry, QDir::Filters filters, QFileSystemIterator::QFileSystemIterator(const QFileSystemEntry &entry, QDir::Filters filters,

View File

@ -667,7 +667,7 @@ void QSettingsPrivate::iniEscapedString(const QString &str, QByteArray &result)
escapeNextIfDigit = true; escapeNextIfDigit = true;
} else if (useCodec) { } else if (useCodec) {
// slow // slow
result += toUtf8(&unicode[i], 1); result += toUtf8(unicode[i]);
} else { } else {
result += (char)ch; result += (char)ch;
} }
@ -815,7 +815,7 @@ StNormal:
++j; ++j;
} }
stringResult += fromUtf8(str.constData() + i, j - i); stringResult += fromUtf8(QByteArrayView(str).first(j).sliced(i));
i = j; i = j;
} }
} }

View File

@ -159,7 +159,7 @@ QVariant QMimeDataPrivate::retrieveTypedData(const QString &format, QMetaType ty
case QMetaType::QString: { case QMetaType::QString: {
const QByteArray ba = data.toByteArray(); const QByteArray ba = data.toByteArray();
if (format == QLatin1String("text/html")) { if (format == QLatin1String("text/html")) {
auto encoding = QStringConverter::encodingForHtml(ba.constData(), ba.size()); auto encoding = QStringConverter::encodingForHtml(ba);
if (encoding) { if (encoding) {
QStringDecoder toUtf16(*encoding); QStringDecoder toUtf16(*encoding);
return QString(toUtf16(ba)); return QString(toUtf16(ba));

View File

@ -1061,7 +1061,7 @@ bool QCborStreamReader::next(int maxRecursion)
d->handleError(CborErrorDataTooLarge); d->handleError(CborErrorDataTooLarge);
break; break;
} }
if (isString() && !QUtf8::isValidUtf8(r.data, r.data.size()).isValidUtf8) { if (isString() && !QUtf8::isValidUtf8(r.data).isValidUtf8) {
d->handleError(CborErrorInvalidUtf8TextString); d->handleError(CborErrorInvalidUtf8TextString);
break; break;
} }
@ -1350,7 +1350,7 @@ QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
err = CborErrorDataTooLarge; err = CborErrorDataTooLarge;
} else { } else {
QStringConverter::State cs(QStringConverter::Flag::Stateless); QStringConverter::State cs(QStringConverter::Flag::Stateless);
result.data = QUtf8::convertToUnicode(r.data, r.data.size(), &cs); result.data = QUtf8::convertToUnicode(r.data, &cs);
if (cs.invalidChars != 0 || cs.remainingChars != 0) if (cs.invalidChars != 0 || cs.remainingChars != 0)
err = CborErrorInvalidUtf8TextString; err = CborErrorInvalidUtf8TextString;
} }

View File

@ -1621,7 +1621,7 @@ void QCborContainerPrivate::decodeStringFromCbor(QCborStreamReader &reader)
while (r.status == QCborStreamReader::Ok) { while (r.status == QCborStreamReader::Ok) {
if (e.type == QCborValue::String && len) { if (e.type == QCborValue::String && len) {
// verify UTF-8 string validity // verify UTF-8 string validity
auto utf8result = QUtf8::isValidUtf8(dataPtr() + data.size() - len, len); auto utf8result = QUtf8::isValidUtf8(QByteArrayView(dataPtr(), data.size()).last(len));
if (!utf8result.isValidUtf8) { if (!utf8result.isValidUtf8) {
r.status = QCborStreamReader::Error; r.status = QCborStreamReader::Error;
setErrorInReader(reader, { QCborError::InvalidUtf8String }); setErrorInReader(reader, { QCborError::InvalidUtf8String });

View File

@ -364,12 +364,12 @@ public:
static int compareUtf8(const QtCbor::ByteData *b, const QLatin1String &s) static int compareUtf8(const QtCbor::ByteData *b, const QLatin1String &s)
{ {
return QUtf8::compareUtf8(b->byte(), b->len, s); return QUtf8::compareUtf8(QByteArrayView(b->byte(), b->len), s);
} }
static int compareUtf8(const QtCbor::ByteData *b, QStringView s) static int compareUtf8(const QtCbor::ByteData *b, QStringView s)
{ {
return QUtf8::compareUtf8(b->byte(), b->len, s.data(), s.size()); return QUtf8::compareUtf8(QByteArrayView(b->byte(), b->len), s);
} }
template<typename String> template<typename String>

View File

@ -434,7 +434,7 @@ bool QTextStreamPrivate::fillReadBuffer(qint64 maxBytes)
if (autoDetectUnicode) { if (autoDetectUnicode) {
autoDetectUnicode = false; autoDetectUnicode = false;
auto e = QStringConverter::encodingForData(buf, bytesRead); auto e = QStringConverter::encodingForData(QByteArrayView(buf, bytesRead));
// QStringConverter::Locale implies unknown, so keep the current encoding // QStringConverter::Locale implies unknown, so keep the current encoding
if (e) { if (e) {
encoding = *e; encoding = *e;
@ -452,7 +452,7 @@ bool QTextStreamPrivate::fillReadBuffer(qint64 maxBytes)
#endif #endif
int oldReadBufferSize = readBuffer.size(); int oldReadBufferSize = readBuffer.size();
readBuffer += toUtf16(buf, bytesRead); readBuffer += toUtf16(QByteArrayView(buf, bytesRead));
// remove all '\r\n' in the string. // remove all '\r\n' in the string.
if (readBuffer.size() > oldReadBufferSize && textModeEnabled) { if (readBuffer.size() > oldReadBufferSize && textModeEnabled) {
@ -2247,7 +2247,7 @@ QTextStream &QTextStream::operator>>(char *c)
} }
QStringEncoder encoder(QStringConverter::Utf8); QStringEncoder encoder(QStringConverter::Utf8);
char *e = encoder.appendToBuffer(c, ptr, length); char *e = encoder.appendToBuffer(c, QStringView(ptr, length));
*e = '\0'; *e = '\0';
d->consumeLastToken(); d->consumeLastToken();
return *this; return *this;

View File

@ -1505,14 +1505,14 @@ uint QXmlStreamReaderPrivate::getChar_helper()
atEnd = true; atEnd = true;
return StreamEOF; return StreamEOF;
} }
auto encoding = QStringDecoder::encodingForData(rawReadBuffer.constData(), rawReadBuffer.size(), char16_t('<')); auto encoding = QStringDecoder::encodingForData(rawReadBuffer, char16_t('<'));
if (!encoding) if (!encoding)
// assume utf-8 // assume utf-8
encoding = QStringDecoder::Utf8; encoding = QStringDecoder::Utf8;
decoder = QStringDecoder(*encoding); decoder = QStringDecoder(*encoding);
} }
readBuffer = decoder(rawReadBuffer.constData(), nbytesread); readBuffer = decoder(QByteArrayView(rawReadBuffer).first(nbytesread));
if (lockEncoding && decoder.hasError()) { if (lockEncoding && decoder.hasError()) {
raiseWellFormedError(QXmlStream::tr("Encountered incorrectly encoded content.")); raiseWellFormedError(QXmlStream::tr("Encountered incorrectly encoded content."));
@ -1794,7 +1794,7 @@ void QXmlStreamReaderPrivate::startDocument()
if (!decoder.isValid()) { if (!decoder.isValid()) {
err = QXmlStream::tr("Encoding %1 is unsupported").arg(value); err = QXmlStream::tr("Encoding %1 is unsupported").arg(value);
} else { } else {
readBuffer = decoder(rawReadBuffer.data(), nbytesread); readBuffer = decoder(QByteArrayView(rawReadBuffer).first(nbytesread));
} }
} }
} }

View File

@ -1285,7 +1285,7 @@ static int qt_compare_strings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
{ {
if (cs == Qt::CaseSensitive) if (cs == Qt::CaseSensitive)
return QUtf8::compareUtf8(lhs.data(), lhs.size(), rhs.data(), rhs.size()); return QUtf8::compareUtf8(lhs, rhs);
else else
return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
} }
@ -5059,7 +5059,7 @@ static QByteArray qt_convert_to_utf8(QStringView str)
if (str.isNull()) if (str.isNull())
return QByteArray(); return QByteArray();
return QUtf8::convertFromUnicode(str.data(), str.length()); return QUtf8::convertFromUnicode(str);
} }
/*! /*!
@ -5210,7 +5210,7 @@ QString QString::fromLocal8Bit(QByteArrayView ba)
if (ba.isEmpty()) if (ba.isEmpty())
return QString(DataPointer::fromRawData(&_empty, 0)); return QString(DataPointer::fromRawData(&_empty, 0));
QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless); QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
return toUtf16(ba.data(), ba.size()); return toUtf16(ba);
} }
/*! \fn QString QString::fromUtf8(const char *str, qsizetype size) /*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
@ -5263,7 +5263,7 @@ QString QString::fromUtf8(QByteArrayView ba)
return QString(); return QString();
if (ba.isEmpty()) if (ba.isEmpty())
return QString(DataPointer::fromRawData(&_empty, 0)); return QString(DataPointer::fromRawData(&_empty, 0));
return QUtf8::convertToUnicode(ba.data(), ba.size()); return QUtf8::convertToUnicode(ba);
} }
/*! /*!
@ -5293,7 +5293,7 @@ QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
++size; ++size;
} }
QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless); QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
return toUtf16(reinterpret_cast<const char *>(unicode), size*2); return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
} }
/*! /*!
@ -5331,7 +5331,7 @@ QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
++size; ++size;
} }
QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless); QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
return toUtf16(reinterpret_cast<const char *>(unicode), size*4); return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
} }
@ -5951,7 +5951,7 @@ int QString::compare_helper(const QChar *data1, qsizetype length1, const char *d
// ### make me nothrow in all cases // ### make me nothrow in all cases
QVarLengthArray<ushort> s2(length2); QVarLengthArray<ushort> s2(length2);
const auto beg = reinterpret_cast<QChar *>(s2.data()); const auto beg = reinterpret_cast<QChar *>(s2.data());
const auto end = QUtf8::convertToUnicode(beg, data2, length2); const auto end = QUtf8::convertToUnicode(beg, QByteArrayView(data2, length2));
return qt_compare_strings(QStringView(data1, length1), QStringView(beg, end - beg), cs); return qt_compare_strings(QStringView(data1, length1), QStringView(beg, end - beg), cs);
} }
@ -6423,7 +6423,7 @@ static void append_utf8(QString &qs, const char *cs, int len)
{ {
const int oldSize = qs.size(); const int oldSize = qs.size();
qs.resize(oldSize + len); qs.resize(oldSize + len);
const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, cs, len); const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len));
qs.resize(newEnd - qs.constData()); qs.resize(newEnd - qs.constData());
} }
@ -10228,9 +10228,9 @@ QString QString::toHtmlEscaped() const
/*! /*!
\internal \internal
*/ */
void QAbstractConcatenable::appendLatin1To(const char *a, int len, QChar *out) noexcept void QAbstractConcatenable::appendLatin1To(QLatin1String in, QChar *out) noexcept
{ {
qt_from_latin1(reinterpret_cast<char16_t *>(out), a, size_t(len)); qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
} }
double QStringView::toDouble(bool *ok) const double QStringView::toDouble(bool *ok) const

View File

@ -124,14 +124,9 @@ QT_BEGIN_NAMESPACE
/*! /*!
\internal \internal
*/ */
void QAbstractConcatenable::convertFromAscii(const char *a, int len, QChar *&out) noexcept void QAbstractConcatenable::convertFromUtf8(QByteArrayView in, QChar *&out) noexcept
{ {
if (Q_UNLIKELY(len == -1)) { out = QUtf8::convertToUnicode(out, in);
if (!a)
return;
len = int(strlen(a));
}
out = QUtf8::convertToUnicode(out, a, len);
} }
QT_END_NAMESPACE QT_END_NAMESPACE

View File

@ -58,12 +58,12 @@ QT_BEGIN_NAMESPACE
struct Q_CORE_EXPORT QAbstractConcatenable struct Q_CORE_EXPORT QAbstractConcatenable
{ {
protected: protected:
static void convertFromAscii(const char *a, int len, QChar *&out) noexcept; static void convertFromUtf8(QByteArrayView in, QChar *&out) noexcept;
static inline void convertFromAscii(char a, QChar *&out) noexcept static inline void convertFromAscii(char a, QChar *&out) noexcept
{ {
*out++ = QLatin1Char(a); *out++ = QLatin1Char(a);
} }
static void appendLatin1To(const char *a, int len, QChar *out) noexcept; static void appendLatin1To(QLatin1String in, QChar *out) noexcept;
}; };
template <typename T> struct QConcatenable {}; template <typename T> struct QConcatenable {};
@ -237,7 +237,7 @@ template <> struct QConcatenable<QLatin1String> : private QAbstractConcatenable
static qsizetype size(const QLatin1String a) { return a.size(); } static qsizetype size(const QLatin1String a) { return a.size(); }
static inline void appendTo(const QLatin1String a, QChar *&out) static inline void appendTo(const QLatin1String a, QChar *&out)
{ {
appendLatin1To(a.latin1(), a.size(), out); appendLatin1To(a, out);
out += a.size(); out += a.size();
} }
static inline void appendTo(const QLatin1String a, char *&out) static inline void appendTo(const QLatin1String a, char *&out)
@ -288,7 +288,7 @@ template <int N> struct QConcatenable<const char[N]> : private QAbstractConcaten
#ifndef QT_NO_CAST_FROM_ASCII #ifndef QT_NO_CAST_FROM_ASCII
QT_ASCII_CAST_WARN static inline void appendTo(const char a[N], QChar *&out) QT_ASCII_CAST_WARN static inline void appendTo(const char a[N], QChar *&out)
{ {
QAbstractConcatenable::convertFromAscii(a, N - 1, out); QAbstractConcatenable::convertFromUtf8(QByteArrayView(a, N - 1), out);
} }
#endif #endif
static inline void appendTo(const char a[N], char *&out) static inline void appendTo(const char a[N], char *&out)
@ -311,7 +311,7 @@ template <> struct QConcatenable<const char *> : private QAbstractConcatenable
static qsizetype size(const char *a) { return qstrlen(a); } static qsizetype size(const char *a) { return qstrlen(a); }
#ifndef QT_NO_CAST_FROM_ASCII #ifndef QT_NO_CAST_FROM_ASCII
QT_ASCII_CAST_WARN static inline void appendTo(const char *a, QChar *&out) QT_ASCII_CAST_WARN static inline void appendTo(const char *a, QChar *&out)
{ QAbstractConcatenable::convertFromAscii(a, -1, out); } { QAbstractConcatenable::convertFromUtf8(QByteArrayView(a), out); }
#endif #endif
static inline void appendTo(const char *a, char *&out) static inline void appendTo(const char *a, char *&out)
{ {
@ -374,7 +374,7 @@ template <> struct QConcatenable<QByteArray> : private QAbstractConcatenable
#ifndef QT_NO_CAST_FROM_ASCII #ifndef QT_NO_CAST_FROM_ASCII
QT_ASCII_CAST_WARN static inline void appendTo(const QByteArray &ba, QChar *&out) QT_ASCII_CAST_WARN static inline void appendTo(const QByteArray &ba, QChar *&out)
{ {
QAbstractConcatenable::convertFromAscii(ba.constData(), ba.size(), out); QAbstractConcatenable::convertFromUtf8(ba, out);
} }
#endif #endif
static inline void appendTo(const QByteArray &ba, char *&out) static inline void appendTo(const QByteArray &ba, char *&out)

View File

@ -484,12 +484,14 @@ static void simdCompareAscii(const char8_t *&, const char8_t *, const char16_t *
enum { HeaderDone = 1 }; enum { HeaderDone = 1 };
QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len) QByteArray QUtf8::convertFromUnicode(QStringView in)
{ {
qsizetype len = in.size();
// create a QByteArray with the worst case scenario size // create a QByteArray with the worst case scenario size
QByteArray result(len * 3, Qt::Uninitialized); QByteArray result(len * 3, Qt::Uninitialized);
uchar *dst = reinterpret_cast<uchar *>(const_cast<char *>(result.constData())); uchar *dst = reinterpret_cast<uchar *>(const_cast<char *>(result.constData()));
const ushort *src = reinterpret_cast<const ushort *>(uc); const ushort *src = reinterpret_cast<const ushort *>(in.data());
const ushort *const end = src + len; const ushort *const end = src + len;
while (src != end) { while (src != end) {
@ -511,10 +513,10 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len)
return result; return result;
} }
QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverterBase::State *state) QByteArray QUtf8::convertFromUnicode(QStringView in, QStringConverterBase::State *state)
{ {
QByteArray ba(3*len +3, Qt::Uninitialized); QByteArray ba(3*in.size() +3, Qt::Uninitialized);
char *end = convertFromUnicode(ba.data(), QStringView(uc, len), state); char *end = convertFromUnicode(ba.data(), in, state);
ba.truncate(end - ba.data()); ba.truncate(end - ba.data());
return ba; return ba;
} }
@ -590,7 +592,7 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
return reinterpret_cast<char *>(cursor); return reinterpret_cast<char *>(cursor);
} }
QString QUtf8::convertToUnicode(const char *chars, qsizetype len) QString QUtf8::convertToUnicode(QByteArrayView in)
{ {
// UTF-8 to UTF-16 always needs the exact same number of words or less: // UTF-8 to UTF-16 always needs the exact same number of words or less:
// UTF-8 UTF-16 // UTF-8 UTF-16
@ -604,9 +606,9 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len)
// //
// The table holds for invalid sequences too: we'll insert one replacement char // The table holds for invalid sequences too: we'll insert one replacement char
// per invalid byte. // per invalid byte.
QString result(len, Qt::Uninitialized); QString result(in.size(), Qt::Uninitialized);
QChar *data = const_cast<QChar*>(result.constData()); // we know we're not shared QChar *data = const_cast<QChar*>(result.constData()); // we know we're not shared
const QChar *end = convertToUnicode(data, chars, len); const QChar *end = convertToUnicode(data, in);
result.truncate(end - data); result.truncate(end - data);
return result; return result;
} }
@ -615,10 +617,10 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len)
\since 5.7 \since 5.7
\overload \overload
Converts the UTF-8 sequence of \a len octets beginning at \a chars to Converts the UTF-8 sequence of bytes viewed by \a in to a sequence of
a sequence of QChar starting at \a buffer. The buffer is expected to be QChar starting at \a buffer. The buffer is expected to be large enough
large enough to hold the result. An upper bound for the size of the to hold the result. An upper bound for the size of the buffer is
buffer is \a len QChars. \c in.size() QChars.
If, during decoding, an error occurs, a QChar::ReplacementCharacter is If, during decoding, an error occurs, a QChar::ReplacementCharacter is
written. written.
@ -628,18 +630,19 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len)
This function never throws. This function never throws.
*/ */
QChar *QUtf8::convertToUnicode(QChar *buffer, const char *chars, qsizetype len) noexcept QChar *QUtf8::convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
{ {
ushort *dst = reinterpret_cast<ushort *>(buffer); ushort *dst = reinterpret_cast<ushort *>(buffer);
const uchar *src = reinterpret_cast<const uchar *>(chars); const uchar *const start = reinterpret_cast<const uchar *>(in.data());
const uchar *end = src + len; const uchar *src = start;
const uchar *end = src + in.size();
// attempt to do a full decoding in SIMD // attempt to do a full decoding in SIMD
const uchar *nextAscii = end; const uchar *nextAscii = end;
if (!simdDecodeAscii(dst, nextAscii, src, end)) { if (!simdDecodeAscii(dst, nextAscii, src, end)) {
// at least one non-ASCII entry // at least one non-ASCII entry
// check if we failed to decode the UTF-8 BOM; if so, skip it // check if we failed to decode the UTF-8 BOM; if so, skip it
if (Q_UNLIKELY(src == reinterpret_cast<const uchar *>(chars)) if (Q_UNLIKELY(src == start)
&& end - src >= 3 && end - src >= 3
&& Q_UNLIKELY(src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])) { && Q_UNLIKELY(src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])) {
src += 3; src += 3;
@ -664,7 +667,7 @@ QChar *QUtf8::convertToUnicode(QChar *buffer, const char *chars, qsizetype len)
return reinterpret_cast<QChar *>(dst); return reinterpret_cast<QChar *>(dst);
} }
QString QUtf8::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state) QString QUtf8::convertToUnicode(QByteArrayView in, QStringConverter::State *state)
{ {
// See above for buffer requirements for stateless decoding. However, that // See above for buffer requirements for stateless decoding. However, that
// fails if the state is not empty. The following situations can add to the // fails if the state is not empty. The following situations can add to the
@ -676,14 +679,16 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len, QStringConvert
// 1 of 2 bytes invalid continuation +1 (need to insert replacement and restart) // 1 of 2 bytes invalid continuation +1 (need to insert replacement and restart)
// 2 of 3 bytes same +1 (same) // 2 of 3 bytes same +1 (same)
// 3 of 4 bytes same +1 (same) // 3 of 4 bytes same +1 (same)
QString result(len + 1, Qt::Uninitialized); QString result(in.size() + 1, Qt::Uninitialized);
QChar *end = convertToUnicode(result.data(), chars, len, state); QChar *end = convertToUnicode(result.data(), in, state);
result.truncate(end - result.constData()); result.truncate(end - result.constData());
return result; return result;
} }
QChar *QUtf8::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state) QChar *QUtf8::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
qsizetype len = in.size();
Q_ASSERT(state); Q_ASSERT(state);
if (!len) if (!len)
return out; return out;
@ -697,7 +702,7 @@ QChar *QUtf8::convertToUnicode(QChar *out, const char *chars, qsizetype len, QSt
uchar ch = 0; uchar ch = 0;
ushort *dst = reinterpret_cast<ushort *>(out); ushort *dst = reinterpret_cast<ushort *>(out);
const uchar *src = reinterpret_cast<const uchar *>(chars); const uchar *src = reinterpret_cast<const uchar *>(in.data());
const uchar *end = src + len; const uchar *end = src + len;
if (!(state->flags & QStringConverter::Flag::Stateless)) { if (!(state->flags & QStringConverter::Flag::Stateless)) {
@ -790,10 +795,10 @@ struct QUtf8NoOutputTraits : public QUtf8BaseTraitsNoAscii
static void appendUcs4(const NoOutput &, uint) {} static void appendUcs4(const NoOutput &, uint) {}
}; };
QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len) QUtf8::ValidUtf8Result QUtf8::isValidUtf8(QByteArrayView in)
{ {
const uchar *src = reinterpret_cast<const uchar *>(chars); const uchar *src = reinterpret_cast<const uchar *>(in.data());
const uchar *end = src + len; const uchar *end = src + in.size();
const uchar *nextAscii = src; const uchar *nextAscii = src;
bool isValidAscii = true; bool isValidAscii = true;
@ -821,12 +826,12 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len)
return { true, isValidAscii }; return { true, isValidAscii };
} }
int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len) noexcept int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
{ {
auto src1 = reinterpret_cast<const char8_t *>(utf8); auto src1 = reinterpret_cast<const char8_t *>(utf8.data());
auto end1 = src1 + u8len; auto end1 = src1 + utf8.size();
auto src2 = reinterpret_cast<const char16_t *>(utf16); auto src2 = reinterpret_cast<const char16_t *>(utf16.data());
auto end2 = src2 + u16len; auto end2 = src2 + utf16.size();
do { do {
simdCompareAscii(src1, end1, src2, end2); simdCompareAscii(src1, end1, src2, end2);
@ -858,11 +863,11 @@ int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qs
return (end1 > src1) - int(end2 > src2); return (end1 > src1) - int(end2 > src2);
} }
int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s) int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1String s)
{ {
uint uc1 = QChar::Null; uint uc1 = QChar::Null;
auto src1 = reinterpret_cast<const uchar *>(utf8); auto src1 = reinterpret_cast<const uchar *>(utf8.data());
auto end1 = src1 + u8len; auto end1 = src1 + utf8.size();
auto src2 = reinterpret_cast<const uchar *>(s.latin1()); auto src2 = reinterpret_cast<const uchar *>(s.latin1());
auto end2 = src2 + s.size(); auto end2 = src2 + s.size();
@ -884,15 +889,15 @@ int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s)
return (end1 > src1) - (end2 > src2); return (end1 > src1) - (end2 > src2);
} }
QByteArray QUtf16::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian) QByteArray QUtf16::convertFromUnicode(QStringView in, QStringConverter::State *state, DataEndianness endian)
{ {
bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
qsizetype length = 2*len; qsizetype length = 2 * in.size();
if (writeBom) if (writeBom)
length += 2; length += 2;
QByteArray d(length, Qt::Uninitialized); QByteArray d(length, Qt::Uninitialized);
char *end = convertFromUnicode(d.data(), QStringView(uc, len), state, endian); char *end = convertFromUnicode(d.data(), in, state, endian);
Q_ASSERT(end - d.constData() == d.length()); Q_ASSERT(end - d.constData() == d.length());
Q_UNUSED(end); Q_UNUSED(end);
return d; return d;
@ -924,16 +929,19 @@ char *QUtf16::convertFromUnicode(char *out, QStringView in, QStringConverter::St
return out + 2*in.length(); return out + 2*in.length();
} }
QString QUtf16::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) QString QUtf16::convertToUnicode(QByteArrayView in, QStringConverter::State *state, DataEndianness endian)
{ {
QString result((len + 1) >> 1, Qt::Uninitialized); // worst case QString result((in.size() + 1) >> 1, Qt::Uninitialized); // worst case
QChar *qch = convertToUnicode(result.data(), chars, len, state, endian); QChar *qch = convertToUnicode(result.data(), in, state, endian);
result.truncate(qch - result.constData()); result.truncate(qch - result.constData());
return result; return result;
} }
QChar *QUtf16::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) QChar *QUtf16::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state, DataEndianness endian)
{ {
qsizetype len = in.size();
const char *chars = in.data();
Q_ASSERT(state); Q_ASSERT(state);
if (endian == DetectEndianness) if (endian == DetectEndianness)
@ -1009,14 +1017,14 @@ QChar *QUtf16::convertToUnicode(QChar *out, const char *chars, qsizetype len, QS
return out; return out;
} }
QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian) QByteArray QUtf32::convertFromUnicode(QStringView in, QStringConverter::State *state, DataEndianness endian)
{ {
bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
int length = 4*len; int length = 4*in.size();
if (writeBom) if (writeBom)
length += 4; length += 4;
QByteArray ba(length, Qt::Uninitialized); QByteArray ba(length, Qt::Uninitialized);
char *end = convertFromUnicode(ba.data(), QStringView(uc, len), state, endian); char *end = convertFromUnicode(ba.data(), in, state, endian);
Q_ASSERT(end - ba.constData() == length); Q_ASSERT(end - ba.constData() == length);
Q_UNUSED(end); Q_UNUSED(end);
return ba; return ba;
@ -1093,17 +1101,20 @@ decode_surrogate:
return out; return out;
} }
QString QUtf32::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) QString QUtf32::convertToUnicode(QByteArrayView in, QStringConverter::State *state, DataEndianness endian)
{ {
QString result; QString result;
result.resize((len + 7) >> 1); // worst case result.resize((in.size() + 7) >> 1); // worst case
QChar *end = convertToUnicode(result.data(), chars, len, state, endian); QChar *end = convertToUnicode(result.data(), in, state, endian);
result.truncate(end - result.constData()); result.truncate(end - result.constData());
return result; return result;
} }
QChar *QUtf32::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) QChar *QUtf32::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state, DataEndianness endian)
{ {
qsizetype len = in.size();
const char *chars = in.data();
Q_ASSERT(state); Q_ASSERT(state);
if (endian == DetectEndianness) if (endian == DetectEndianness)
endian = (DataEndianness)state->state_data[Endian]; endian = (DataEndianness)state->state_data[Endian];
@ -1188,8 +1199,11 @@ QChar *QUtf32::convertToUnicode(QChar *out, const char *chars, qsizetype len, QS
} }
#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED) #if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, QStringConverter::State *state) static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::State *state)
{ {
qsizetype length = in.size();
const char *chars = in.data();
Q_ASSERT(state); Q_ASSERT(state);
if (state->flags & QStringConverter::Flag::Stateless) // temporary if (state->flags & QStringConverter::Flag::Stateless) // temporary
state = nullptr; state = nullptr;
@ -1238,10 +1252,12 @@ static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, Q
} }
QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStringConverter::State *state) QString QLocal8Bit::convertToUnicode(QByteArrayView in, QStringConverter::State *state)
{ {
qsizetype length = in.size();
Q_ASSERT(length < INT_MAX); // ### FIXME Q_ASSERT(length < INT_MAX); // ### FIXME
const char *mb = chars; const char *mb = in.data();
int mblen = length; int mblen = length;
if (!mb || !mblen) if (!mb || !mblen)
@ -1294,7 +1310,7 @@ QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStrin
mblen--; mblen--;
//check whether, we hit an invalid character in the middle //check whether, we hit an invalid character in the middle
if ((mblen <= 1) || (remainingChars && state_data)) if ((mblen <= 1) || (remainingChars && state_data))
return convertToUnicodeCharByChar(chars, length, state); return convertToUnicodeCharByChar(in, state);
//Remove the last character and try again... //Remove the last character and try again...
state_data = mb[mblen-1]; state_data = mb[mblen-1];
remainingChars = 1; remainingChars = 1;
@ -1324,8 +1340,11 @@ QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStrin
return s; return s;
} }
QByteArray QLocal8Bit::convertFromUnicode(const QChar *ch, qsizetype uclen, QStringConverter::State *state) QByteArray QLocal8Bit::convertFromUnicode(QStringView in, QStringConverter::State *state)
{ {
const QChar *ch = in.data();
qsizetype uclen = in.size();
Q_ASSERT(uclen < INT_MAX); // ### FIXME Q_ASSERT(uclen < INT_MAX); // ### FIXME
Q_ASSERT(state); Q_ASSERT(state);
Q_UNUSED(state); // ### Fixme Q_UNUSED(state); // ### Fixme
@ -1375,9 +1394,9 @@ void QStringConverter::State::clear()
internalState = 0; internalState = 0;
} }
static QChar *fromUtf16(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf16(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
return QUtf16::convertToUnicode(out, in, length, state, DetectEndianness); return QUtf16::convertToUnicode(out, in, state, DetectEndianness);
} }
static char *toUtf16(char *out, QStringView in, QStringConverter::State *state) static char *toUtf16(char *out, QStringView in, QStringConverter::State *state)
@ -1385,9 +1404,9 @@ static char *toUtf16(char *out, QStringView in, QStringConverter::State *state)
return QUtf16::convertFromUnicode(out, in, state, DetectEndianness); return QUtf16::convertFromUnicode(out, in, state, DetectEndianness);
} }
static QChar *fromUtf16BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf16BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
return QUtf16::convertToUnicode(out, in, length, state, BigEndianness); return QUtf16::convertToUnicode(out, in, state, BigEndianness);
} }
static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state) static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state)
@ -1395,9 +1414,9 @@ static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state
return QUtf16::convertFromUnicode(out, in, state, BigEndianness); return QUtf16::convertFromUnicode(out, in, state, BigEndianness);
} }
static QChar *fromUtf16LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf16LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
return QUtf16::convertToUnicode(out, in, length, state, LittleEndianness); return QUtf16::convertToUnicode(out, in, state, LittleEndianness);
} }
static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state) static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state)
@ -1405,9 +1424,9 @@ static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state
return QUtf16::convertFromUnicode(out, in, state, LittleEndianness); return QUtf16::convertFromUnicode(out, in, state, LittleEndianness);
} }
static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf32(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
return QUtf32::convertToUnicode(out, in, length, state, DetectEndianness); return QUtf32::convertToUnicode(out, in, state, DetectEndianness);
} }
static char *toUtf32(char *out, QStringView in, QStringConverter::State *state) static char *toUtf32(char *out, QStringView in, QStringConverter::State *state)
@ -1415,9 +1434,9 @@ static char *toUtf32(char *out, QStringView in, QStringConverter::State *state)
return QUtf32::convertFromUnicode(out, in, state, DetectEndianness); return QUtf32::convertFromUnicode(out, in, state, DetectEndianness);
} }
static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf32BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
return QUtf32::convertToUnicode(out, in, length, state, BigEndianness); return QUtf32::convertToUnicode(out, in, state, BigEndianness);
} }
static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state) static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
@ -1425,9 +1444,9 @@ static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state
return QUtf32::convertFromUnicode(out, in, state, BigEndianness); return QUtf32::convertFromUnicode(out, in, state, BigEndianness);
} }
static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf32LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
return QUtf32::convertToUnicode(out, in, length, state, LittleEndianness); return QUtf32::convertToUnicode(out, in, state, LittleEndianness);
} }
static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state) static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
@ -1437,13 +1456,13 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state
void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept; void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept;
static QChar *fromLatin1(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state) static QChar *fromLatin1(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
Q_ASSERT(state); Q_ASSERT(state);
Q_UNUSED(state); Q_UNUSED(state);
qt_from_latin1(reinterpret_cast<char16_t *>(out), chars, size_t(len)); qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
return out + len; return out + in.size();
} }
@ -1469,16 +1488,16 @@ static char *toLatin1(char *out, QStringView in, QStringConverter::State *state)
return out; return out;
} }
static QChar *fromLocal8Bit(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromLocal8Bit(QChar *out, QByteArrayView in, QStringConverter::State *state)
{ {
QString s = QLocal8Bit::convertToUnicode(in, length, state); QString s = QLocal8Bit::convertToUnicode(in, state);
memcpy(out, s.constData(), s.length()*sizeof(QChar)); memcpy(out, s.constData(), s.length()*sizeof(QChar));
return out + s.length(); return out + s.length();
} }
static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state) static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state)
{ {
QByteArray s = QLocal8Bit::convertFromUnicode(in.data(), in.length(), state); QByteArray s = QLocal8Bit::convertFromUnicode(in, state);
memcpy(out, s.constData(), s.length()); memcpy(out, s.constData(), s.length());
return out + s.length(); return out + s.length();
} }
@ -1727,16 +1746,17 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(cons
} }
/*! /*!
Returns the encoding for the content of \a buf if it can be determined. Returns the encoding for the content of \a data if it can be determined.
\a expectedFirstCharacter can be passed as an additional hint to help determine \a expectedFirstCharacter can be passed as an additional hint to help determine
the encoding. the encoding.
The returned optional is empty, if the encoding is unclear. The returned optional is empty, if the encoding is unclear.
*/ */
std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(const char *buf, qsizetype arraySize, char16_t expectedFirstCharacter) std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(QByteArrayView data, char16_t expectedFirstCharacter)
{ {
qsizetype arraySize = data.size();
if (arraySize > 3) { if (arraySize > 3) {
uint uc = qFromUnaligned<uint>(buf); uint uc = qFromUnaligned<uint>(data.data());
if (uc == qToBigEndian(uint(QChar::ByteOrderMark))) if (uc == qToBigEndian(uint(QChar::ByteOrderMark)))
return QStringConverter::Utf32BE; return QStringConverter::Utf32BE;
if (uc == qToLittleEndian(uint(QChar::ByteOrderMark))) if (uc == qToLittleEndian(uint(QChar::ByteOrderMark)))
@ -1751,12 +1771,12 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(cons
} }
if (arraySize > 2) { if (arraySize > 2) {
if (memcmp(buf, utf8bom, sizeof(utf8bom)) == 0) if (memcmp(data.data(), utf8bom, sizeof(utf8bom)) == 0)
return QStringConverter::Utf8; return QStringConverter::Utf8;
} }
if (arraySize > 1) { if (arraySize > 1) {
ushort uc = qFromUnaligned<ushort>(buf); ushort uc = qFromUnaligned<ushort>(data.data());
if (uc == qToBigEndian(ushort(QChar::ByteOrderMark))) if (uc == qToBigEndian(ushort(QChar::ByteOrderMark)))
return QStringConverter::Utf16BE; return QStringConverter::Utf16BE;
if (uc == qToLittleEndian(ushort(QChar::ByteOrderMark))) if (uc == qToLittleEndian(ushort(QChar::ByteOrderMark)))
@ -1773,19 +1793,20 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(cons
} }
/*! /*!
Tries to determine the encoding of the HTML in \a buf by looking at leading byte order marks or Tries to determine the encoding of the HTML in \a data by looking at leading byte
a charset specifier in the HTML meta tag. If the optional is empty, the encoding specified is order marks or a charset specifier in the HTML meta tag. If the optional is empty,
not supported by QStringConverter. If no encoding is detected, the method returns Utf8. the encoding specified is not supported by QStringConverter. If no encoding is
detected, the method returns Utf8.
*/ */
std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(const char *buf, qsizetype arraySize) std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByteArrayView data)
{ {
// determine charset // determine charset
auto encoding = encodingForData(buf, arraySize); auto encoding = encodingForData(data);
if (encoding) if (encoding)
// trust the initial BOM // trust the initial BOM
return encoding; return encoding;
QByteArray header = QByteArray(buf, qMin(arraySize, qsizetype(1024))).toLower(); QByteArray header = data.first(qMin(data.size(), qsizetype(1024))).toByteArray().toLower();
int pos = header.indexOf("meta "); int pos = header.indexOf("meta ");
if (pos != -1) { if (pos != -1) {
pos = header.indexOf("charset=", pos); pos = header.indexOf("charset=", pos);
@ -2020,14 +2041,14 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
*/ */
/*! /*!
\fn QChar *QStringDecoder::appendToBuffer(QChar *out, const char *in, qsizetype length) \fn QChar *QStringDecoder::appendToBuffer(QChar *out, QByteArrayView in)
Decodes \a length bytes from \a in and writes the decoded result into the buffer Decodes the sequence of bytes viewed by \a in and writes the decoded result into
starting at \a out. Returns a pointer to the end of data written. the buffer starting at \a out. Returns a pointer to the end of data written.
\a out needs to be large enough to be able to hold all the decoded data. Use \a out needs to be large enough to be able to hold all the decoded data. Use
\l{requiredSpace} to determine the maximum size requirements to decode an encoded \l{requiredSpace} to determine the maximum size requirements to decode an encoded
data buffer of \a length. data buffer of \c in.size() bytes.
\sa requiredSpace \sa requiredSpace
*/ */

View File

@ -142,8 +142,7 @@ protected:
struct Interface struct Interface
{ {
// ### FIXME: need a QByteArrayView using DecoderFn = QChar * (*)(QChar *out, QByteArrayView in, State *state);
using DecoderFn = QChar * (*)(QChar *out, const char *in, qsizetype length, State *state);
using LengthFn = qsizetype (*)(qsizetype inLength); using LengthFn = qsizetype (*)(qsizetype inLength);
using EncoderFn = char * (*)(char *out, QStringView in, State *state); using EncoderFn = char * (*)(char *out, QStringView in, State *state);
const char *name = nullptr; const char *name = nullptr;
@ -179,8 +178,8 @@ public:
Q_CORE_EXPORT static std::optional<Encoding> encodingForName(const char *name); Q_CORE_EXPORT static std::optional<Encoding> encodingForName(const char *name);
Q_CORE_EXPORT static const char *nameForEncoding(Encoding e); Q_CORE_EXPORT static const char *nameForEncoding(Encoding e);
Q_CORE_EXPORT static std::optional<Encoding> encodingForData(const char *buf, qsizetype arraySize, char16_t expectedFirstCharacter = 0); Q_CORE_EXPORT static std::optional<Encoding> encodingForData(QByteArrayView data, char16_t expectedFirstCharacter = 0);
Q_CORE_EXPORT static std::optional<Encoding> encodingForHtml(const char *buf, qsizetype arraySize); Q_CORE_EXPORT static std::optional<Encoding> encodingForHtml(QByteArrayView data);
protected: protected:
const Interface *iface; const Interface *iface;
@ -209,36 +208,32 @@ public:
#if defined(Q_QDOC) #if defined(Q_QDOC)
QByteArray operator()(const QString &in); QByteArray operator()(const QString &in);
QByteArray operator()(QStringView in); QByteArray operator()(QStringView in);
QByteArray operator()(const QChar *in, qsizetype length);
QByteArray encode(const QString &in); QByteArray encode(const QString &in);
QByteArray encode(QStringView in); QByteArray encode(QStringView in);
QByteArray encode(const QChar *in, qsizetype length);
#else #else
template<typename T> template<typename T>
struct DecodedData struct DecodedData
{ {
QStringEncoder *encoder; QStringEncoder *encoder;
T data; T data;
operator QByteArray() const { return encoder->encodeAsByteArray(QStringView(data)); } operator QByteArray() const { return encoder->encodeAsByteArray(data); }
}; };
Q_WEAK_OVERLOAD
DecodedData<const QString &> operator()(const QString &str) DecodedData<const QString &> operator()(const QString &str)
{ return DecodedData<const QString &>{this, str}; } { return DecodedData<const QString &>{this, str}; }
DecodedData<QStringView> operator()(QStringView in) DecodedData<QStringView> operator()(QStringView in)
{ return DecodedData<QStringView>{this, in}; } { return DecodedData<QStringView>{this, in}; }
DecodedData<QStringView> operator()(const QChar *in, qsizetype length) Q_WEAK_OVERLOAD
{ return (*this)(QStringView(in, length)); }
DecodedData<const QString &> encode(const QString &str) DecodedData<const QString &> encode(const QString &str)
{ return DecodedData<const QString &>{this, str}; } { return DecodedData<const QString &>{this, str}; }
DecodedData<QStringView> encode(QStringView in) DecodedData<QStringView> encode(QStringView in)
{ return DecodedData<QStringView>{this, in}; } { return DecodedData<QStringView>{this, in}; }
DecodedData<QStringView> encode(const QChar *in, qsizetype length)
{ return (*this)(QStringView(in, length)); }
#endif #endif
qsizetype requiredSpace(qsizetype inputLength) const qsizetype requiredSpace(qsizetype inputLength) const
{ return iface->fromUtf16Len(inputLength); } { return iface->fromUtf16Len(inputLength); }
char *appendToBuffer(char *out, const QChar *in, qsizetype length) char *appendToBuffer(char *out, QStringView in)
{ return iface->fromUtf16(out, QStringView(in, length), &state); } { return iface->fromUtf16(out, in, &state); }
private: private:
QByteArray encodeAsByteArray(QStringView in) QByteArray encodeAsByteArray(QStringView in)
{ {
@ -253,13 +248,6 @@ private:
class QStringDecoder : public QStringConverter class QStringDecoder : public QStringConverter
{ {
struct View {
const char *ch;
qsizetype l;
const char *data() const { return ch; }
qsizetype length() const { return l; }
};
protected: protected:
QSTRINGCONVERTER_CONSTEXPR QStringDecoder(const Interface *i) QSTRINGCONVERTER_CONSTEXPR QStringDecoder(const Interface *i)
: QStringConverter(i) : QStringConverter(i)
@ -277,44 +265,38 @@ public:
#if defined(Q_QDOC) #if defined(Q_QDOC)
QString operator()(const QByteArray &ba); QString operator()(const QByteArray &ba);
QString operator()(const char *in, qsizetype size); QString operator()(QByteArrayView ba);
QString operator()(const char *chars);
QString decode(const QByteArray &ba); QString decode(const QByteArray &ba);
QString decode(const char *in, qsizetype size); QString decode(QByteArrayView ba);
QString decode(const char *chars);
#else #else
template<typename T> template<typename T>
struct EncodedData struct EncodedData
{ {
QStringDecoder *decoder; QStringDecoder *decoder;
T data; T data;
operator QString() const { return decoder->decodeAsString(data.data(), data.length()); } operator QString() const { return decoder->decodeAsString(data); }
}; };
Q_WEAK_OVERLOAD
EncodedData<const QByteArray &> operator()(const QByteArray &ba) EncodedData<const QByteArray &> operator()(const QByteArray &ba)
{ return EncodedData<const QByteArray &>{this, ba}; } { return EncodedData<const QByteArray &>{this, ba}; }
EncodedData<View> operator()(const char *in, qsizetype length) EncodedData<QByteArrayView> operator()(QByteArrayView ba)
{ return EncodedData<View>{this, {in, length}}; } { return EncodedData<QByteArrayView>{this, ba}; }
EncodedData<View> operator()(const char *chars) Q_WEAK_OVERLOAD
{ return EncodedData<View>{this, {chars, qsizetype(strlen(chars))}}; }
EncodedData<const QByteArray &> decode(const QByteArray &ba) EncodedData<const QByteArray &> decode(const QByteArray &ba)
{ return EncodedData<const QByteArray &>{this, ba}; } { return EncodedData<const QByteArray &>{this, ba}; }
EncodedData<View> decode(const char *in, qsizetype length) EncodedData<QByteArrayView> decode(QByteArrayView ba)
{ return EncodedData<View>{this, {in, length}}; } { return EncodedData<QByteArrayView>{this, ba}; }
EncodedData<View> decode(const char *chars)
{ return EncodedData<View>{this, {chars, qsizetype(strlen(chars))}}; }
#endif #endif
qsizetype requiredSpace(qsizetype inputLength) const qsizetype requiredSpace(qsizetype inputLength) const
{ return iface->toUtf16Len(inputLength); } { return iface->toUtf16Len(inputLength); }
QChar *appendToBuffer(QChar *out, const char *in, qsizetype length) QChar *appendToBuffer(QChar *out, QByteArrayView ba)
{ return iface->toUtf16(out, in, length, &state); } { return iface->toUtf16(out, ba, &state); }
private: private:
QString decodeAsString(const char *in, qsizetype length) QString decodeAsString(QByteArrayView in)
{ {
QString result(iface->toUtf16Len(length), Qt::Uninitialized); QString result(iface->toUtf16Len(in.size()), Qt::Uninitialized);
QChar *out = result.data(); const QChar *out = iface->toUtf16(result.data(), in, &state);
// ### Fixme: state handling needs to be moved into the conversion methods
out = iface->toUtf16(out, in, length, &state);
result.truncate(out - result.constData()); result.truncate(out - result.constData());
return result; return result;
} }
@ -329,10 +311,10 @@ struct QConcatenable<QStringDecoder::EncodedData<T>>
typedef QChar type; typedef QChar type;
typedef QString ConvertTo; typedef QString ConvertTo;
enum { ExactSize = false }; enum { ExactSize = false };
static qsizetype size(const QStringDecoder::EncodedData<T> &s) { return s.decoder->requiredSpace(s.data.length()); } static qsizetype size(const QStringDecoder::EncodedData<T> &s) { return s.decoder->requiredSpace(s.data.size()); }
static inline void appendTo(const QStringDecoder::EncodedData<T> &s, QChar *&out) static inline void appendTo(const QStringDecoder::EncodedData<T> &s, QChar *&out)
{ {
out = s.decoder->appendToBuffer(out, s.data.data(), s.data.length()); out = s.decoder->appendToBuffer(out, s.data);
} }
}; };
@ -343,10 +325,10 @@ struct QConcatenable<QStringEncoder::DecodedData<T>>
typedef char type; typedef char type;
typedef QByteArray ConvertTo; typedef QByteArray ConvertTo;
enum { ExactSize = false }; enum { ExactSize = false };
static qsizetype size(const QStringEncoder::DecodedData<T> &s) { return s.encoder->requiredSpace(s.data.length()); } static qsizetype size(const QStringEncoder::DecodedData<T> &s) { return s.encoder->requiredSpace(s.data.size()); }
static inline void appendTo(const QStringEncoder::DecodedData<T> &s, char *&out) static inline void appendTo(const QStringEncoder::DecodedData<T> &s, char *&out)
{ {
out = s.encoder->appendToBuffer(out, s.data.data(), s.data.length()); out = s.encoder->appendToBuffer(out, s.data);
} }
}; };

View File

@ -326,48 +326,48 @@ enum DataEndianness
struct QUtf8 struct QUtf8
{ {
Q_CORE_EXPORT static QChar *convertToUnicode(QChar *, const char *, qsizetype) noexcept; Q_CORE_EXPORT static QChar *convertToUnicode(QChar *buffer, QByteArrayView in) noexcept;
static QString convertToUnicode(const char *, qsizetype); static QString convertToUnicode(QByteArrayView in);
Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *); Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state);
static QChar *convertToUnicode(QChar *out, const char *in, qsizetype length, QStringConverter::State *state); static QChar *convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state);
Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype); Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in);
Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *); Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in, QStringConverterBase::State *state);
static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state);
struct ValidUtf8Result { struct ValidUtf8Result {
bool isValidUtf8; bool isValidUtf8;
bool isValidAscii; bool isValidAscii;
}; };
static ValidUtf8Result isValidUtf8(const char *, qsizetype); static ValidUtf8Result isValidUtf8(QByteArrayView in);
static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype) noexcept; static int compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept;
static int compareUtf8(const char *, qsizetype, QLatin1String s); static int compareUtf8(QByteArrayView utf8, QLatin1String s);
}; };
struct QUtf16 struct QUtf16
{ {
Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness = DetectEndianness);
static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian); static QChar *convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian);
Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness = DetectEndianness);
static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian);
}; };
struct QUtf32 struct QUtf32
{ {
static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian); static QChar *convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian);
Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness = DetectEndianness);
Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness = DetectEndianness);
static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian);
}; };
struct Q_CORE_EXPORT QLocal8Bit struct Q_CORE_EXPORT QLocal8Bit
{ {
#if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED) #if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED)
static QString convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state) static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
{ return QUtf8::convertToUnicode(chars, len, state); } { return QUtf8::convertToUnicode(in, state); }
static QByteArray convertFromUnicode(const QChar *chars, qsizetype len, QStringConverter::State *state) static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
{ return QUtf8::convertFromUnicode(chars, len, state); } { return QUtf8::convertFromUnicode(in, state); }
#else #else
static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *); static QString convertToUnicode(QByteArrayView, QStringConverter::State *);
static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *); static QByteArray convertFromUnicode(QStringView, QStringConverter::State *);
#endif #endif
}; };

View File

@ -286,7 +286,7 @@ QString QClipboard::text(QString &subtype, Mode mode) const
} }
const QByteArray rawData = data->data(QLatin1String("text/") + subtype); const QByteArray rawData = data->data(QLatin1String("text/") + subtype);
auto encoding = QStringConverter::encodingForData(rawData.constData(), rawData.size()); auto encoding = QStringConverter::encodingForData(rawData);
if (!encoding) if (!encoding)
encoding = QStringConverter::Utf8; encoding = QStringConverter::Utf8;
return QStringDecoder(*encoding).decode(rawData); return QStringDecoder(*encoding).decode(rawData);

View File

@ -312,7 +312,7 @@ void QTextBrowserPrivate::setSource(const QUrl &url, QTextDocument::ResourceType
} else if (data.userType() == QMetaType::QByteArray) { } else if (data.userType() == QMetaType::QByteArray) {
QByteArray ba = data.toByteArray(); QByteArray ba = data.toByteArray();
if (type == QTextDocument::HtmlResource) { if (type == QTextDocument::HtmlResource) {
auto encoding = QStringConverter::encodingForHtml(ba.constData(), ba.size()); auto encoding = QStringConverter::encodingForHtml(ba);
if (!encoding) if (!encoding)
// fall back to utf8 // fall back to utf8
encoding = QStringDecoder::Utf8; encoding = QStringDecoder::Utf8;

View File

@ -129,21 +129,21 @@ void tst_QStringConverter::convertUtf8()
QVERIFY(decoder.isValid()); QVERIFY(decoder.isValid());
QString uniString; QString uniString;
for (int i = 0; i < ba.size(); ++i) for (int i = 0; i < ba.size(); ++i)
uniString += decoder(ba.constData() + i, 1); uniString += decoder(QByteArrayView(ba).sliced(i, 1));
QCOMPARE(uniString, QString::fromUtf8(ba)); QCOMPARE(uniString, QString::fromUtf8(ba));
uniString.clear(); uniString.clear();
for (int i = 0; i < ba.size(); ++i) for (int i = 0; i < ba.size(); ++i)
uniString += decoder.decode(ba.constData() + i, 1); uniString += decoder.decode(QByteArrayView(ba).sliced(i, 1));
QCOMPARE(uniString, QString::fromUtf8(ba)); QCOMPARE(uniString, QString::fromUtf8(ba));
QStringEncoder encoder(QStringEncoder::Utf8); QStringEncoder encoder(QStringEncoder::Utf8);
QByteArray reencoded; QByteArray reencoded;
for (int i = 0; i < uniString.size(); ++i) for (int i = 0; i < uniString.size(); ++i)
reencoded += encoder(uniString.constData() + i, 1); reencoded += encoder(QStringView(uniString).sliced(i, 1));
QCOMPARE(ba, encoder(uniString)); QCOMPARE(ba, encoder(uniString));
reencoded.clear(); reencoded.clear();
for (int i = 0; i < uniString.size(); ++i) for (int i = 0; i < uniString.size(); ++i)
reencoded += encoder.encode(uniString.constData() + i, 1); reencoded += encoder.encode(QStringView(uniString).sliced(i, 1));
QCOMPARE(ba, encoder(uniString)); QCOMPARE(ba, encoder(uniString));
} }
} }
@ -1254,8 +1254,7 @@ void tst_QStringConverter::utf8Codec()
QFETCH(int, len); QFETCH(int, len);
QStringDecoder decoder(QStringDecoder::Utf8, QStringDecoder::Flag::Stateless); QStringDecoder decoder(QStringDecoder::Utf8, QStringDecoder::Flag::Stateless);
QString str = decoder(utf8.isNull() ? 0 : utf8.constData(), QString str = decoder(QByteArrayView(utf8).first(len < 0 ? qstrlen(utf8.constData()) : len));
len < 0 ? qstrlen(utf8.constData()) : len);
QCOMPARE(str, res); QCOMPARE(str, res);
str = QString::fromUtf8(utf8.isNull() ? 0 : utf8.constData(), len); str = QString::fromUtf8(utf8.isNull() ? 0 : utf8.constData(), len);
@ -1324,7 +1323,7 @@ void tst_QStringConverter::utf8bom()
QStringDecoder decoder(QStringDecoder::Utf8); QStringDecoder decoder(QStringDecoder::Utf8);
QCOMPARE(decoder(data.constData(), data.length()), result); QCOMPARE(decoder(data), result);
} }
void tst_QStringConverter::utf8stateful_data() void tst_QStringConverter::utf8stateful_data()
@ -1409,9 +1408,9 @@ void tst_QStringConverter::utf8stateful()
QString decoded; QString decoded;
for (char c : buffer1) for (char c : buffer1)
decoded += decoder(&c, 1); decoded += decoder(QByteArrayView(&c, 1));
for (char c : buffer2) for (char c : buffer2)
decoded += decoder(&c, 1); decoded += decoder(QByteArrayView(&c, 1));
if (result.isNull()) { if (result.isNull()) {
QVERIFY(decoder.hasError()); QVERIFY(decoder.hasError());
} else { } else {
@ -1607,7 +1606,7 @@ void tst_QStringConverter::utfHeaders()
QString result; QString result;
for (char c : encoded) for (char c : encoded)
result += decode(&c, 1); result += decode(QByteArrayView(&c, 1));
QCOMPARE(result.length(), unicode.length()); QCOMPARE(result.length(), unicode.length());
QCOMPARE(result, unicode); QCOMPARE(result, unicode);
} }
@ -1625,7 +1624,7 @@ void tst_QStringConverter::utfHeaders()
QVERIFY(encode.isValid()); QVERIFY(encode.isValid());
QByteArray reencoded; QByteArray reencoded;
for (QChar c : unicode) for (QChar c : unicode)
reencoded += encode(&c, 1); reencoded += encode(QStringView(&c, 1));
QCOMPARE(reencoded, encoded); QCOMPARE(reencoded, encoded);
} }
} }
@ -1729,7 +1728,7 @@ void tst_QStringConverter::encodingForData()
QFETCH(QByteArray, encoded); QFETCH(QByteArray, encoded);
QFETCH(std::optional<QStringConverter::Encoding>, encoding); QFETCH(std::optional<QStringConverter::Encoding>, encoding);
auto e = QStringConverter::encodingForData(encoded.constData(), encoded.size(), char16_t('<')); auto e = QStringConverter::encodingForData(encoded, char16_t('<'));
QCOMPARE(e, encoding); QCOMPARE(e, encoding);
} }
@ -1816,7 +1815,7 @@ void tst_QStringConverter::encodingForHtml()
QFETCH(QByteArray, html); QFETCH(QByteArray, html);
QFETCH(std::optional<QStringConverter::Encoding>, encoding); QFETCH(std::optional<QStringConverter::Encoding>, encoding);
QCOMPARE(QStringConverter::encodingForHtml(html.constData(), html.size()), encoding); QCOMPARE(QStringConverter::encodingForHtml(html), encoding);
} }
class LoadAndConvert: public QRunnable class LoadAndConvert: public QRunnable