QCborStreamReader: move the UTF-8 decoding into readStringChunk
This allows us to decode long UTF-8 strings in chunks, instead of allocating a big block of the size of the UTF-8 source and then another for the full UTF-16 content. Task-number: QTBUG-88253 Change-Id: I7b9b97ae9b32412abdc6fffd16452a47b1036ef3 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io> (cherry picked from commit bab2cd1125a21885bea97079219031ab45861826) Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
8675f81cd8
commit
d7a0f8356a
@ -48,6 +48,7 @@
|
||||
#include <qiodevice.h>
|
||||
#include <qdebug.h>
|
||||
#include <qstack.h>
|
||||
#include <qvarlengtharray.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -671,18 +672,23 @@ public:
|
||||
union {
|
||||
char *ptr;
|
||||
QByteArray *array;
|
||||
QString *string;
|
||||
};
|
||||
enum { ByteArray = -1 };
|
||||
enum { ByteArray = -1, String = -3 };
|
||||
qsizetype maxlen_or_type;
|
||||
|
||||
ReadStringChunk(char *ptr, qsizetype maxlen) : ptr(ptr), maxlen_or_type(maxlen) {}
|
||||
ReadStringChunk(QByteArray *array) : array(array), maxlen_or_type(ByteArray) {}
|
||||
ReadStringChunk(QString *str) : string(str), maxlen_or_type(String) {}
|
||||
bool isString() const { return maxlen_or_type == String; }
|
||||
bool isByteArray() const { return maxlen_or_type == ByteArray; }
|
||||
bool isPlainPointer() const { return maxlen_or_type >= 0; }
|
||||
};
|
||||
|
||||
static QCborStreamReader::StringResultCode appendStringChunk(QCborStreamReader &reader, QByteArray *data);
|
||||
QCborStreamReader::StringResult<qsizetype> readStringChunk(ReadStringChunk params);
|
||||
qsizetype readStringChunk_byte(ReadStringChunk params, qsizetype len);
|
||||
qsizetype readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len);
|
||||
bool ensureStringIteration();
|
||||
};
|
||||
|
||||
@ -1354,29 +1360,17 @@ bool QCborStreamReader::leaveContainer()
|
||||
*/
|
||||
QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
|
||||
{
|
||||
auto r = _readByteArray_helper();
|
||||
QCborStreamReader::StringResult<QString> result;
|
||||
auto r = d->readStringChunk(&result.data);
|
||||
result.status = r.status;
|
||||
|
||||
if (r.status == Ok) {
|
||||
// See QUtf8::convertToUnicode() a detailed explanation of why this
|
||||
// conversion uses the same number of words or less.
|
||||
CborError err = CborNoError;
|
||||
if (r.data.size() > MaxStringSize) {
|
||||
err = CborErrorDataTooLarge;
|
||||
} else {
|
||||
QStringConverter::State cs(QStringConverter::Flag::Stateless);
|
||||
result.data = QUtf8::convertToUnicode(r.data, &cs);
|
||||
if (cs.invalidChars != 0 || cs.remainingChars != 0)
|
||||
err = CborErrorInvalidUtf8TextString;
|
||||
}
|
||||
|
||||
if (err) {
|
||||
d->handleError(err);
|
||||
result.data.clear();
|
||||
result.status = Error;
|
||||
}
|
||||
if (r.status == Error) {
|
||||
result.data.clear();
|
||||
} else {
|
||||
Q_ASSERT(r.data == result.data.length());
|
||||
if (r.status == EndOfString && lastError() == QCborError::NoError)
|
||||
preparse();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1547,12 +1541,41 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
||||
return result;
|
||||
}
|
||||
|
||||
// Read the chunk into the user's buffer.
|
||||
qint64 actuallyRead;
|
||||
qptrdiff offset = qptrdiff(content);
|
||||
bufferStart += offset;
|
||||
if (device) {
|
||||
// This first skip can't fail because we've already read this many bytes.
|
||||
device->skip(bufferStart);
|
||||
}
|
||||
|
||||
if (params.isString()) {
|
||||
// readString()
|
||||
result.data = readStringChunk_unicode(params, qsizetype(len));
|
||||
} else {
|
||||
// readByteArray() or readStringChunk()
|
||||
result.data = readStringChunk_byte(params, qsizetype(len));
|
||||
}
|
||||
|
||||
if (result.data < 0)
|
||||
return result; // error
|
||||
|
||||
if (device)
|
||||
updateBufferAfterString(0, len);
|
||||
else
|
||||
bufferStart += len;
|
||||
|
||||
preread();
|
||||
result.status = QCborStreamReader::Ok;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline qsizetype
|
||||
QCborStreamReaderPrivate::readStringChunk_byte(ReadStringChunk params, qsizetype len)
|
||||
{
|
||||
qint64 actuallyRead;
|
||||
qsizetype toRead = qsizetype(len);
|
||||
qsizetype left = 0; // bytes from the chunk not copied to the user buffer, to discard
|
||||
char *ptr;
|
||||
char *ptr = nullptr;
|
||||
|
||||
if (params.isPlainPointer()) {
|
||||
left = toRead - params.maxlen_or_type;
|
||||
@ -1567,7 +1590,7 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
||||
auto newSize = oldSize;
|
||||
if (add_overflow<decltype(newSize)>(oldSize, toRead, &newSize)) {
|
||||
handleError(CborErrorDataTooLarge);
|
||||
return result;
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
params.array->resize(newSize);
|
||||
@ -1576,15 +1599,13 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
||||
// compatibility with Qt 5; in Qt 6, we could consider everything
|
||||
// to be OOM.
|
||||
handleError(newSize > MaxByteArraySize ? CborErrorDataTooLarge: CborErrorOutOfMemory);
|
||||
return result;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ptr = const_cast<char *>(params.array->constData()) + oldSize;
|
||||
}
|
||||
|
||||
if (device) {
|
||||
// This first skip can't fail because we've already read this many bytes.
|
||||
device->skip(bufferStart + qptrdiff(content));
|
||||
actuallyRead = device->read(ptr, toRead);
|
||||
|
||||
if (actuallyRead != toRead) {
|
||||
@ -1597,20 +1618,71 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
|
||||
|
||||
if (actuallyRead < 0) {
|
||||
handleError(CborErrorIO);
|
||||
return result;
|
||||
return -1;
|
||||
}
|
||||
|
||||
updateBufferAfterString(offset, len);
|
||||
} else {
|
||||
actuallyRead = toRead;
|
||||
memcpy(ptr, buffer.constData() + bufferStart + offset, toRead);
|
||||
bufferStart += QByteArray::size_type(offset + len);
|
||||
memcpy(ptr, buffer.constData() + bufferStart, toRead);
|
||||
}
|
||||
|
||||
preread();
|
||||
result.data = actuallyRead;
|
||||
result.status = QCborStreamReader::Ok;
|
||||
return result;
|
||||
return actuallyRead;
|
||||
}
|
||||
|
||||
inline qsizetype
|
||||
QCborStreamReaderPrivate::readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len)
|
||||
{
|
||||
// See QUtf8::convertToUnicode() a detailed explanation of why this
|
||||
// conversion uses the same number of words or less.
|
||||
QChar *begin = nullptr;
|
||||
if (params.isString()) {
|
||||
try {
|
||||
params.string->resize(utf8len);
|
||||
} catch (const std::bad_alloc &) {
|
||||
if (utf8len > MaxStringSize)
|
||||
handleError(CborErrorDataTooLarge);
|
||||
else
|
||||
handleError(CborErrorOutOfMemory);
|
||||
return -1;
|
||||
}
|
||||
|
||||
begin = const_cast<QChar *>(params.string->constData());
|
||||
}
|
||||
|
||||
QChar *ptr = begin;
|
||||
QStringConverter::State cs(QStringConverter::Flag::Stateless);
|
||||
if (device == nullptr) {
|
||||
// Easy case: we can decode straight from the buffer we already have
|
||||
ptr = QUtf8::convertToUnicode(ptr, { buffer.constData() + bufferStart, utf8len }, &cs);
|
||||
} else {
|
||||
// read in chunks, to avoid creating large, intermediate buffers
|
||||
constexpr qsizetype StringChunkSize = 16384;
|
||||
qsizetype chunkSize = qMin(StringChunkSize, utf8len);
|
||||
QVarLengthArray<char> chunk(chunkSize);
|
||||
|
||||
cs = { QStringConverter::Flag::ConvertInitialBom };
|
||||
while (utf8len > 0 && cs.invalidChars == 0) {
|
||||
qsizetype toRead = qMin(chunkSize, utf8len);
|
||||
qint64 actuallyRead = device->read(chunk.data(), toRead);
|
||||
if (actuallyRead == toRead)
|
||||
ptr = QUtf8::convertToUnicode(ptr, { chunk.data(), toRead }, &cs);
|
||||
|
||||
if (actuallyRead != toRead) {
|
||||
handleError(CborErrorIO);
|
||||
return -1;
|
||||
}
|
||||
utf8len -= toRead;
|
||||
}
|
||||
}
|
||||
|
||||
if (cs.invalidChars != 0 || cs.remainingChars != 0) {
|
||||
handleError(CborErrorInvalidUtf8TextString);
|
||||
return -1;
|
||||
}
|
||||
|
||||
qsizetype size = ptr - begin;
|
||||
if (params.isString())
|
||||
params.string->truncate(size);
|
||||
return size;
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
Loading…
x
Reference in New Issue
Block a user