QCborStreamReader: move the UTF-8 decoding into readStringChunk
This allows us to decode long UTF-8 strings in chunks, instead of allocating a big block of the size of the UTF-8 source and then another for the full UTF-16 content. Task-number: QTBUG-88253 Change-Id: I7b9b97ae9b32412abdc6fffd16452a47b1036ef3 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io> (cherry picked from commit bab2cd1125a21885bea97079219031ab45861826) Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
8675f81cd8
commit
d7a0f8356a
@ -48,6 +48,7 @@
|
|||||||
#include <qiodevice.h>
|
#include <qiodevice.h>
|
||||||
#include <qdebug.h>
|
#include <qdebug.h>
|
||||||
#include <qstack.h>
|
#include <qstack.h>
|
||||||
|
#include <qvarlengtharray.h>
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
|
|
||||||
@ -671,18 +672,23 @@ public:
|
|||||||
union {
|
union {
|
||||||
char *ptr;
|
char *ptr;
|
||||||
QByteArray *array;
|
QByteArray *array;
|
||||||
|
QString *string;
|
||||||
};
|
};
|
||||||
enum { ByteArray = -1 };
|
enum { ByteArray = -1, String = -3 };
|
||||||
qsizetype maxlen_or_type;
|
qsizetype maxlen_or_type;
|
||||||
|
|
||||||
ReadStringChunk(char *ptr, qsizetype maxlen) : ptr(ptr), maxlen_or_type(maxlen) {}
|
ReadStringChunk(char *ptr, qsizetype maxlen) : ptr(ptr), maxlen_or_type(maxlen) {}
|
||||||
ReadStringChunk(QByteArray *array) : array(array), maxlen_or_type(ByteArray) {}
|
ReadStringChunk(QByteArray *array) : array(array), maxlen_or_type(ByteArray) {}
|
||||||
|
ReadStringChunk(QString *str) : string(str), maxlen_or_type(String) {}
|
||||||
|
bool isString() const { return maxlen_or_type == String; }
|
||||||
bool isByteArray() const { return maxlen_or_type == ByteArray; }
|
bool isByteArray() const { return maxlen_or_type == ByteArray; }
|
||||||
bool isPlainPointer() const { return maxlen_or_type >= 0; }
|
bool isPlainPointer() const { return maxlen_or_type >= 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
static QCborStreamReader::StringResultCode appendStringChunk(QCborStreamReader &reader, QByteArray *data);
|
static QCborStreamReader::StringResultCode appendStringChunk(QCborStreamReader &reader, QByteArray *data);
|
||||||
QCborStreamReader::StringResult<qsizetype> readStringChunk(ReadStringChunk params);
|
QCborStreamReader::StringResult<qsizetype> readStringChunk(ReadStringChunk params);
|
||||||
|
qsizetype readStringChunk_byte(ReadStringChunk params, qsizetype len);
|
||||||
|
qsizetype readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len);
|
||||||
bool ensureStringIteration();
|
bool ensureStringIteration();
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1354,29 +1360,17 @@ bool QCborStreamReader::leaveContainer()
|
|||||||
*/
|
*/
|
||||||
QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
|
QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
|
||||||
{
|
{
|
||||||
auto r = _readByteArray_helper();
|
|
||||||
QCborStreamReader::StringResult<QString> result;
|
QCborStreamReader::StringResult<QString> result;
|
||||||
|
auto r = d->readStringChunk(&result.data);
|
||||||
result.status = r.status;
|
result.status = r.status;
|
||||||
|
if (r.status == Error) {
|
||||||
if (r.status == Ok) {
|
result.data.clear();
|
||||||
// See QUtf8::convertToUnicode() a detailed explanation of why this
|
} else {
|
||||||
// conversion uses the same number of words or less.
|
Q_ASSERT(r.data == result.data.length());
|
||||||
CborError err = CborNoError;
|
if (r.status == EndOfString && lastError() == QCborError::NoError)
|
||||||
if (r.data.size() > MaxStringSize) {
|
preparse();
|
||||||
err = CborErrorDataTooLarge;
|
|
||||||
} else {
|
|
||||||
QStringConverter::State cs(QStringConverter::Flag::Stateless);
|
|
||||||
result.data = QUtf8::convertToUnicode(r.data, &cs);
|
|
||||||
if (cs.invalidChars != 0 || cs.remainingChars != 0)
|
|
||||||
err = CborErrorInvalidUtf8TextString;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
d->handleError(err);
|
|
||||||
result.data.clear();
|
|
||||||
result.status = Error;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1547,12 +1541,41 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the chunk into the user's buffer.
|
|
||||||
qint64 actuallyRead;
|
|
||||||
qptrdiff offset = qptrdiff(content);
|
qptrdiff offset = qptrdiff(content);
|
||||||
|
bufferStart += offset;
|
||||||
|
if (device) {
|
||||||
|
// This first skip can't fail because we've already read this many bytes.
|
||||||
|
device->skip(bufferStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params.isString()) {
|
||||||
|
// readString()
|
||||||
|
result.data = readStringChunk_unicode(params, qsizetype(len));
|
||||||
|
} else {
|
||||||
|
// readByteArray() or readStringChunk()
|
||||||
|
result.data = readStringChunk_byte(params, qsizetype(len));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.data < 0)
|
||||||
|
return result; // error
|
||||||
|
|
||||||
|
if (device)
|
||||||
|
updateBufferAfterString(0, len);
|
||||||
|
else
|
||||||
|
bufferStart += len;
|
||||||
|
|
||||||
|
preread();
|
||||||
|
result.status = QCborStreamReader::Ok;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline qsizetype
|
||||||
|
QCborStreamReaderPrivate::readStringChunk_byte(ReadStringChunk params, qsizetype len)
|
||||||
|
{
|
||||||
|
qint64 actuallyRead;
|
||||||
qsizetype toRead = qsizetype(len);
|
qsizetype toRead = qsizetype(len);
|
||||||
qsizetype left = 0; // bytes from the chunk not copied to the user buffer, to discard
|
qsizetype left = 0; // bytes from the chunk not copied to the user buffer, to discard
|
||||||
char *ptr;
|
char *ptr = nullptr;
|
||||||
|
|
||||||
if (params.isPlainPointer()) {
|
if (params.isPlainPointer()) {
|
||||||
left = toRead - params.maxlen_or_type;
|
left = toRead - params.maxlen_or_type;
|
||||||
@ -1567,7 +1590,7 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
|||||||
auto newSize = oldSize;
|
auto newSize = oldSize;
|
||||||
if (add_overflow<decltype(newSize)>(oldSize, toRead, &newSize)) {
|
if (add_overflow<decltype(newSize)>(oldSize, toRead, &newSize)) {
|
||||||
handleError(CborErrorDataTooLarge);
|
handleError(CborErrorDataTooLarge);
|
||||||
return result;
|
return -1;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
params.array->resize(newSize);
|
params.array->resize(newSize);
|
||||||
@ -1576,15 +1599,13 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
|||||||
// compatibility with Qt 5; in Qt 6, we could consider everything
|
// compatibility with Qt 5; in Qt 6, we could consider everything
|
||||||
// to be OOM.
|
// to be OOM.
|
||||||
handleError(newSize > MaxByteArraySize ? CborErrorDataTooLarge: CborErrorOutOfMemory);
|
handleError(newSize > MaxByteArraySize ? CborErrorDataTooLarge: CborErrorOutOfMemory);
|
||||||
return result;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr = const_cast<char *>(params.array->constData()) + oldSize;
|
ptr = const_cast<char *>(params.array->constData()) + oldSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device) {
|
if (device) {
|
||||||
// This first skip can't fail because we've already read this many bytes.
|
|
||||||
device->skip(bufferStart + qptrdiff(content));
|
|
||||||
actuallyRead = device->read(ptr, toRead);
|
actuallyRead = device->read(ptr, toRead);
|
||||||
|
|
||||||
if (actuallyRead != toRead) {
|
if (actuallyRead != toRead) {
|
||||||
@ -1597,20 +1618,71 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
|||||||
|
|
||||||
if (actuallyRead < 0) {
|
if (actuallyRead < 0) {
|
||||||
handleError(CborErrorIO);
|
handleError(CborErrorIO);
|
||||||
return result;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
updateBufferAfterString(offset, len);
|
|
||||||
} else {
|
} else {
|
||||||
actuallyRead = toRead;
|
actuallyRead = toRead;
|
||||||
memcpy(ptr, buffer.constData() + bufferStart + offset, toRead);
|
memcpy(ptr, buffer.constData() + bufferStart, toRead);
|
||||||
bufferStart += QByteArray::size_type(offset + len);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
preread();
|
return actuallyRead;
|
||||||
result.data = actuallyRead;
|
}
|
||||||
result.status = QCborStreamReader::Ok;
|
|
||||||
return result;
|
inline qsizetype
|
||||||
|
QCborStreamReaderPrivate::readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len)
|
||||||
|
{
|
||||||
|
// See QUtf8::convertToUnicode() a detailed explanation of why this
|
||||||
|
// conversion uses the same number of words or less.
|
||||||
|
QChar *begin = nullptr;
|
||||||
|
if (params.isString()) {
|
||||||
|
try {
|
||||||
|
params.string->resize(utf8len);
|
||||||
|
} catch (const std::bad_alloc &) {
|
||||||
|
if (utf8len > MaxStringSize)
|
||||||
|
handleError(CborErrorDataTooLarge);
|
||||||
|
else
|
||||||
|
handleError(CborErrorOutOfMemory);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
begin = const_cast<QChar *>(params.string->constData());
|
||||||
|
}
|
||||||
|
|
||||||
|
QChar *ptr = begin;
|
||||||
|
QStringConverter::State cs(QStringConverter::Flag::Stateless);
|
||||||
|
if (device == nullptr) {
|
||||||
|
// Easy case: we can decode straight from the buffer we already have
|
||||||
|
ptr = QUtf8::convertToUnicode(ptr, { buffer.constData() + bufferStart, utf8len }, &cs);
|
||||||
|
} else {
|
||||||
|
// read in chunks, to avoid creating large, intermediate buffers
|
||||||
|
constexpr qsizetype StringChunkSize = 16384;
|
||||||
|
qsizetype chunkSize = qMin(StringChunkSize, utf8len);
|
||||||
|
QVarLengthArray<char> chunk(chunkSize);
|
||||||
|
|
||||||
|
cs = { QStringConverter::Flag::ConvertInitialBom };
|
||||||
|
while (utf8len > 0 && cs.invalidChars == 0) {
|
||||||
|
qsizetype toRead = qMin(chunkSize, utf8len);
|
||||||
|
qint64 actuallyRead = device->read(chunk.data(), toRead);
|
||||||
|
if (actuallyRead == toRead)
|
||||||
|
ptr = QUtf8::convertToUnicode(ptr, { chunk.data(), toRead }, &cs);
|
||||||
|
|
||||||
|
if (actuallyRead != toRead) {
|
||||||
|
handleError(CborErrorIO);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
utf8len -= toRead;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cs.invalidChars != 0 || cs.remainingChars != 0) {
|
||||||
|
handleError(CborErrorInvalidUtf8TextString);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
qsizetype size = ptr - begin;
|
||||||
|
if (params.isString())
|
||||||
|
params.string->truncate(size);
|
||||||
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
QT_END_NAMESPACE
|
QT_END_NAMESPACE
|
||||||
|
Loading…
x
Reference in New Issue
Block a user