QStringConverter: Introduce finalize()
When doing a streaming conversion, it is not enough to check whether there are errors. Once all input has been consumed, one must also check whether there has been any data that was consumed, but not converted yet. Provide finalize() to do the check, set an error if there was incomplete data, and to optionally write replacement characters for it to an output buffer. [ChangeLog][QtCore][QStringDecoder] Added finalize(), a function to force the converter to consider the sequence of inputs as complete, flushing potential partial character sequences. [ChangeLog][QtCore][QStringEncoder] Added finalize(), a function to force the converter to consider the sequence of inputs as complete, flushing potential partial character sequences or restoring state for stateful text encodings. Change-Id: I5fe26ae8e5d1477a86b365cc49c430b057876893 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
479fb822bf
commit
f1c0bd2e06
@ -19,20 +19,28 @@ QByteArray encodedString = fromUtf16(string);
|
|||||||
auto toUtf16 = QStringDecoder(QStringDecoder::Utf8);
|
auto toUtf16 = QStringDecoder(QStringDecoder::Utf8);
|
||||||
|
|
||||||
QString string;
|
QString string;
|
||||||
while (new_data_available()) {
|
while (new_data_available() && !toUtf16.hasError()) {
|
||||||
QByteArray chunk = get_new_data();
|
QByteArray chunk = get_new_data();
|
||||||
string += toUtf16(chunk);
|
string += toUtf16(chunk);
|
||||||
}
|
}
|
||||||
|
auto result = toUtf16.finalize();
|
||||||
|
if (result.error != QStringDecoder::FinalizeResult::NoError) {
|
||||||
|
// Handle error
|
||||||
|
}
|
||||||
//! [2]
|
//! [2]
|
||||||
|
|
||||||
//! [3]
|
//! [3]
|
||||||
auto fromUtf16 = QStringEncoder(QStringEncoder::Utf8);
|
auto fromUtf16 = QStringEncoder(QStringEncoder::Utf8);
|
||||||
|
|
||||||
QByteArray encoded;
|
QByteArray encoded;
|
||||||
while (new_data_available()) {
|
while (new_data_available() && !fromUtf16.hasError()) {
|
||||||
QString chunk = get_new_data();
|
QString chunk = get_new_data();
|
||||||
encoded += fromUtf16(chunk);
|
encoded += fromUtf16(chunk);
|
||||||
}
|
}
|
||||||
|
auto result = fromUtf16.finalize();
|
||||||
|
if (result.error != QStringEncoder::FinalizeResult::NoError) {
|
||||||
|
// Handle error
|
||||||
|
}
|
||||||
//! [3]
|
//! [3]
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -39,9 +39,8 @@
|
|||||||
#include <QtCore/private/wcharhelpers_win_p.h>
|
#include <QtCore/private/wcharhelpers_win_p.h>
|
||||||
|
|
||||||
#include <QtCore/q20iterator.h>
|
#include <QtCore/q20iterator.h>
|
||||||
#include <QtCore/q26numeric.h>
|
|
||||||
#endif // !QT_BOOTSTRAPPED
|
#endif // !QT_BOOTSTRAPPED
|
||||||
#endif
|
#endif // Q_OS_WIN
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#if __has_include(<bit>) && __cplusplus > 201703L
|
#if __has_include(<bit>) && __cplusplus > 201703L
|
||||||
@ -49,6 +48,9 @@
|
|||||||
#endif
|
#endif
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <QtCore/q20utility.h>
|
#include <QtCore/q20utility.h>
|
||||||
|
#ifndef QT_BOOTSTRAPPED
|
||||||
|
#include <QtCore/q26numeric.h>
|
||||||
|
#endif // !QT_BOOTSTRAPPED
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
|
|
||||||
@ -2517,6 +2519,27 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(QAny
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef QT_BOOTSTRAPPED
|
#ifndef QT_BOOTSTRAPPED
|
||||||
|
namespace QtPrivate {
|
||||||
|
// Note: Check isValid() on the QStringConverter before calling this with its
|
||||||
|
// state!
|
||||||
|
static int partiallyParsedDataCount(QStringConverter::State *state)
|
||||||
|
{
|
||||||
|
#if QT_CONFIG(icu)
|
||||||
|
if (state->flags & QStringConverter::Flag::UsesIcu) {
|
||||||
|
UConverter *converter = static_cast<UConverter *>(state->d[0]);
|
||||||
|
if (!converter)
|
||||||
|
return 0;
|
||||||
|
UErrorCode err = U_ZERO_ERROR;
|
||||||
|
auto leftOver = ucnv_fromUCountPending(converter, &err);
|
||||||
|
// If there is an error, leftOver is -1, so no need for an additional
|
||||||
|
// check.
|
||||||
|
return std::max(leftOver, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return q26::saturate_cast<int>(state->remainingChars);
|
||||||
|
}
|
||||||
|
} // namespace QtPrivate
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Returns the encoding for the content of \a data if it can be determined.
|
Returns the encoding for the content of \a data if it can be determined.
|
||||||
\a expectedFirstCharacter can be passed as an additional hint to help determine
|
\a expectedFirstCharacter can be passed as an additional hint to help determine
|
||||||
@ -2684,6 +2707,205 @@ QStringList QStringConverter::availableCodecs()
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\class QStringConverter::FinalizeResultBase
|
||||||
|
\internal
|
||||||
|
*/
|
||||||
|
/*!
|
||||||
|
\class QStringConverter::FinalizeResultChar
|
||||||
|
\inmodule QtCore
|
||||||
|
\since 6.11
|
||||||
|
\reentrant
|
||||||
|
\brief Holds the result of calling finalize() on QStringDecoder or
|
||||||
|
QStringEncoder.
|
||||||
|
|
||||||
|
This class is used to relay the result of the finalize() call or the reason
|
||||||
|
why the call did not succeed.
|
||||||
|
*/
|
||||||
|
/*!
|
||||||
|
\enum QStringConverter::FinalizeResultBase::Error
|
||||||
|
\value NoError No error.
|
||||||
|
\value InvalidCharacters The encoder successfully finalized, but encountered
|
||||||
|
invalid characters either during finalization or some time earlier.
|
||||||
|
\value NotEnoughSpace finalize() did \e{not} succeed, you must grow the
|
||||||
|
buffer and call finalize() again.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\variable QStringConverter::FinalizeResultChar::error
|
||||||
|
Relays errors discovered during finalization.
|
||||||
|
*/
|
||||||
|
/*!
|
||||||
|
\variable QStringConverter::FinalizeResultChar::next
|
||||||
|
Points to the character position \e{following} the last-written character.
|
||||||
|
*/
|
||||||
|
/*!
|
||||||
|
\variable QStringConverter::FinalizeResultChar::invalidChars
|
||||||
|
The number of invalid characters that were previously counted in the state
|
||||||
|
as well as any that were encountered during the call to finalize().
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\typedef QStringDecoder::FinalizeResult
|
||||||
|
|
||||||
|
This is an alias for QStringConverter::FinalizeResultChar<char16_t>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\typedef QStringDecoder::FinalizeResultQChar
|
||||||
|
|
||||||
|
This is an alias for QStringConverter::FinalizeResultChar<QChar>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\fn QStringDecoder::FinalizeResultQChar QStringDecoder::finalize(QChar *out, qsizetype maxlen)
|
||||||
|
\fn QStringDecoder::FinalizeResult QStringDecoder::finalize(char16_t *out, qsizetype maxlen)
|
||||||
|
\fn QStringDecoder::FinalizeResult QStringDecoder::finalize()
|
||||||
|
|
||||||
|
Signals to the decoder that no further data will arrive.
|
||||||
|
|
||||||
|
May also provide data from residual content that was pending decoding.
|
||||||
|
When there is no residual data to account for, the return's \c error
|
||||||
|
field will be set to \l {QCharConverter::FinalizeResult::Error::}
|
||||||
|
{NoError}.
|
||||||
|
|
||||||
|
If \a out is supplied and non-null, it must have space in which up to
|
||||||
|
\a maxlen characters may be written. Up to this many characters of
|
||||||
|
residual output are written to this space, with the end indicated by
|
||||||
|
the return-value's \c next field. Typically this residual data shall
|
||||||
|
consist of one replacement character per remaining unconverted input
|
||||||
|
character.
|
||||||
|
|
||||||
|
If all residual content has been delivered via \a out, if \a out is
|
||||||
|
\nullptr, or if there is no residual data, the decoder is reset on
|
||||||
|
return from finalize(). Otherwise, the remaining data can be retrieved
|
||||||
|
or discarded by a further call to finalize().
|
||||||
|
|
||||||
|
\since 6.11
|
||||||
|
\sa hasError(), appendToBuffer()
|
||||||
|
*/
|
||||||
|
auto QStringDecoder::finalize(char16_t *out, qsizetype maxlen) -> FinalizeResult
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
if (isValid())
|
||||||
|
count = QtPrivate::partiallyParsedDataCount(&state);
|
||||||
|
using Error = FinalizeResult::Error;
|
||||||
|
const qint16 invalidChars = q26::saturate_cast<qint16>(state.invalidChars + count);
|
||||||
|
if (count == 0 || !out) {
|
||||||
|
resetState();
|
||||||
|
return { {}, out, invalidChars, invalidChars ? Error::InvalidCharacters : Error::NoError };
|
||||||
|
}
|
||||||
|
if (maxlen < count)
|
||||||
|
return { {}, out, invalidChars, Error::NotEnoughSpace };
|
||||||
|
|
||||||
|
const char16_t replacement = (state.flags & QStringConverter::Flag::ConvertInvalidToNull)
|
||||||
|
? QChar::Null
|
||||||
|
: QChar::ReplacementCharacter;
|
||||||
|
out = std::fill_n(out, count, replacement);
|
||||||
|
resetState();
|
||||||
|
return { {}, out, invalidChars, invalidChars ? Error::InvalidCharacters : Error::NoError };
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\typedef QStringEncoder::FinalizeResult
|
||||||
|
|
||||||
|
This is an alias for QStringConverter::FinalizeResultChar<char>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\fn QStringEncoder::FinalizeResult QStringEncoder::finalize(char *out, qsizetype maxlen)
|
||||||
|
\fn QStringEncoder::FinalizeResult QStringEncoder::finalize()
|
||||||
|
|
||||||
|
Signals to the decoder that no further data will arrive.
|
||||||
|
|
||||||
|
May also provide data from residual content that was pending decoding.
|
||||||
|
When there is no residual data to account for, the return's \c error
|
||||||
|
field will be set to \l {QCharConverter::FinalizeResult::Error::}
|
||||||
|
{NoError}.
|
||||||
|
|
||||||
|
If \a out is supplied and non-null, it must have space in which up to
|
||||||
|
\a maxlen characters may be written. Up to this many characters of
|
||||||
|
residual output are written to this space, with the end indicated by
|
||||||
|
the return-value's \c next field. Typically this residual data shall
|
||||||
|
consist of one replacement character per remaining unconverted input
|
||||||
|
character. When using a stateful encoding, such as ISO-2022-JP, this may
|
||||||
|
also write bytes to restore, or end, the current state in the character
|
||||||
|
stream.
|
||||||
|
|
||||||
|
If all residual content has been delivered via \a out, if \a out is
|
||||||
|
\nullptr, or if there is no residual data, the decoder is reset on
|
||||||
|
return from finalize(). Otherwise, the remaining data can be retrieved
|
||||||
|
or discarded by a further call to finalize().
|
||||||
|
|
||||||
|
\since 6.11
|
||||||
|
\sa hasError(), appendToBuffer()
|
||||||
|
*/
|
||||||
|
auto QStringEncoder::finalize(char *out, qsizetype maxlen) -> QStringEncoder::FinalizeResult
|
||||||
|
{
|
||||||
|
qsizetype count = 0;
|
||||||
|
if (isValid())
|
||||||
|
count = QtPrivate::partiallyParsedDataCount(&state);
|
||||||
|
// For ICU we may be using a stateful codec that need to restore or finalize
|
||||||
|
// some state, otherwise we have nothing to do with count == 0
|
||||||
|
using Error = FinalizeResult::Error;
|
||||||
|
const bool usesIcu = !!(state.flags & QStringConverter::Flag::UsesIcu) && !!state.d[0];
|
||||||
|
const qint16 invalidChars = q26::saturate_cast<qint16>(state.invalidChars + count);
|
||||||
|
if (!isValid() || (!count && !usesIcu) || !out) {
|
||||||
|
resetState();
|
||||||
|
return { {}, out, invalidChars, invalidChars ? Error::InvalidCharacters : Error::NoError };
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((false)) {
|
||||||
|
#if defined(QT_USE_ICU_CODECS)
|
||||||
|
} else if (usesIcu) {
|
||||||
|
Q_ASSERT(out);
|
||||||
|
auto *icu_conv = static_cast<UConverter *>(state.d[0]);
|
||||||
|
Q_ASSERT(icu_conv); // bool usesIcu checks that the pointer is non-null
|
||||||
|
UErrorCode err = U_ZERO_ERROR;
|
||||||
|
|
||||||
|
UBool flush = true;
|
||||||
|
|
||||||
|
// If the QStringConverter was moved, the state that we used as a context is stale now.
|
||||||
|
UConverterFromUCallback action;
|
||||||
|
const void *context;
|
||||||
|
ucnv_getFromUCallBack(icu_conv, &action, &context);
|
||||||
|
if (context != &state)
|
||||||
|
ucnv_setFromUCallBack(icu_conv, action, &state, nullptr, nullptr, &err);
|
||||||
|
const UChar *dummyInput = u"";
|
||||||
|
const char *outEnd = out + maxlen;
|
||||||
|
ucnv_fromUnicode(icu_conv, &out, outEnd, &dummyInput, dummyInput, nullptr, flush, &err);
|
||||||
|
if (err == U_BUFFER_OVERFLOW_ERROR)
|
||||||
|
return { {}, out, invalidChars, Error::NotEnoughSpace };
|
||||||
|
resetState();
|
||||||
|
#endif
|
||||||
|
} else if (!(state.flags & QStringConverter::Flag::ConvertInvalidToNull)) {
|
||||||
|
/*
|
||||||
|
We don't really know (in general) how the replacement character
|
||||||
|
looks like in the target encoding. So we just encode 0xfffd, which
|
||||||
|
is the Unicode replacement character.
|
||||||
|
Use 4 as a best-guess for the upper-bound of how many characters
|
||||||
|
would potentially be produced by the leftover UTF-16 characters in
|
||||||
|
the state
|
||||||
|
*/
|
||||||
|
constexpr QChar replacementCharacter = QChar::ReplacementCharacter;
|
||||||
|
constexpr char16_t repl = replacementCharacter.unicode();
|
||||||
|
constexpr std::array<char16_t, 4> replacement{ repl, repl, repl, repl };
|
||||||
|
const qsizetype charactersToEncode = std::min(count, qsizetype(replacement.size()));
|
||||||
|
if (maxlen < requiredSpace(charactersToEncode))
|
||||||
|
return { {}, out, invalidChars, Error::NotEnoughSpace };
|
||||||
|
// we don't want the incomplete data in the internal buffer; we're
|
||||||
|
// flushing the buffer after all
|
||||||
|
resetState();
|
||||||
|
out = appendToBuffer(out, QStringView(replacement.data(), charactersToEncode));
|
||||||
|
} else /* outputting Null characters for each remaining unconverted input character */ {
|
||||||
|
if (maxlen < count)
|
||||||
|
return { {}, out, invalidChars, Error::NotEnoughSpace };
|
||||||
|
out = std::fill_n(out, count, '\0');
|
||||||
|
resetState();
|
||||||
|
}
|
||||||
|
return { {}, out, invalidChars, invalidChars ? Error::InvalidCharacters : Error::NoError };
|
||||||
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Tries to determine the encoding of the HTML in \a data by looking at leading byte
|
Tries to determine the encoding of the HTML in \a data by looking at leading byte
|
||||||
order marks or a charset specifier in the HTML meta tag and returns a QStringDecoder
|
order marks or a charset specifier in the HTML meta tag and returns a QStringDecoder
|
||||||
|
@ -63,6 +63,13 @@ public:
|
|||||||
}
|
}
|
||||||
return iface->fromUtf16(out, in, &state);
|
return iface->fromUtf16(out, in, &state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using FinalizeResult = FinalizeResultChar<char>;
|
||||||
|
Q_REQUIRED_RESULT
|
||||||
|
Q_CORE_EXPORT FinalizeResult finalize(char *out, qsizetype maxlen);
|
||||||
|
Q_REQUIRED_RESULT
|
||||||
|
FinalizeResult finalize() { return finalize(nullptr, 0); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
QByteArray encodeAsByteArray(QStringView in)
|
QByteArray encodeAsByteArray(QStringView in)
|
||||||
{
|
{
|
||||||
@ -128,6 +135,22 @@ public:
|
|||||||
char16_t *appendToBuffer(char16_t *out, QByteArrayView ba)
|
char16_t *appendToBuffer(char16_t *out, QByteArrayView ba)
|
||||||
{ return reinterpret_cast<char16_t *>(appendToBuffer(reinterpret_cast<QChar *>(out), ba)); }
|
{ return reinterpret_cast<char16_t *>(appendToBuffer(reinterpret_cast<QChar *>(out), ba)); }
|
||||||
|
|
||||||
|
|
||||||
|
using FinalizeResult = FinalizeResultChar<char16_t>;
|
||||||
|
using FinalizeResultQChar = FinalizeResultChar<QChar>;
|
||||||
|
FinalizeResultQChar finalize(QChar *out, qsizetype maxlen)
|
||||||
|
{
|
||||||
|
auto r = finalize(reinterpret_cast<char16_t *>(out), maxlen);
|
||||||
|
return { {}, reinterpret_cast<QChar *>(r.next), r.invalidChars, r.error };
|
||||||
|
}
|
||||||
|
Q_REQUIRED_RESULT
|
||||||
|
Q_CORE_EXPORT FinalizeResult finalize(char16_t *out, qsizetype maxlen);
|
||||||
|
Q_REQUIRED_RESULT
|
||||||
|
FinalizeResult finalize()
|
||||||
|
{
|
||||||
|
return finalize(static_cast<char16_t *>(nullptr), 0);
|
||||||
|
}
|
||||||
|
|
||||||
Q_CORE_EXPORT static QStringDecoder decoderForHtml(QByteArrayView data);
|
Q_CORE_EXPORT static QStringDecoder decoderForHtml(QByteArrayView data);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -169,6 +169,25 @@ public:
|
|||||||
|
|
||||||
Q_CORE_EXPORT static QStringList availableCodecs();
|
Q_CORE_EXPORT static QStringList availableCodecs();
|
||||||
|
|
||||||
|
|
||||||
|
struct FinalizeResultBase
|
||||||
|
{
|
||||||
|
enum Error : quint8 {
|
||||||
|
NoError,
|
||||||
|
InvalidCharacters,
|
||||||
|
NotEnoughSpace,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
template <typename Char>
|
||||||
|
struct FinalizeResultChar : FinalizeResultBase
|
||||||
|
{
|
||||||
|
using Error = FinalizeResultBase::Error;
|
||||||
|
|
||||||
|
Char *next;
|
||||||
|
qint16 invalidChars;
|
||||||
|
Error error;
|
||||||
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const Interface *iface;
|
const Interface *iface;
|
||||||
State state;
|
State state;
|
||||||
|
@ -196,6 +196,9 @@ private slots:
|
|||||||
|
|
||||||
void availableCodesAreAvailable();
|
void availableCodesAreAvailable();
|
||||||
|
|
||||||
|
void finalize();
|
||||||
|
void finalizeStateful();
|
||||||
|
|
||||||
#ifdef Q_OS_WIN
|
#ifdef Q_OS_WIN
|
||||||
// On all other systems local 8-bit encoding is UTF-8
|
// On all other systems local 8-bit encoding is UTF-8
|
||||||
void fromLocal8Bit_data();
|
void fromLocal8Bit_data();
|
||||||
@ -2491,6 +2494,104 @@ void tst_QStringConverter::availableCodesAreAvailable()
|
|||||||
QVERIFY(QStringEncoder(codecName.toLatin1()).isValid());
|
QVERIFY(QStringEncoder(codecName.toLatin1()).isValid());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tst_QStringConverter::finalize()
|
||||||
|
{
|
||||||
|
// encoder
|
||||||
|
{
|
||||||
|
auto fromUtf16 = QStringEncoder(QStringEncoder::Utf8);
|
||||||
|
QString incompleteInput(QChar(0xd800));
|
||||||
|
QByteArray buffer("cdcdcdcd");
|
||||||
|
fromUtf16.appendToBuffer(buffer.data(), incompleteInput);
|
||||||
|
QVERIFY(!fromUtf16.hasError());
|
||||||
|
QCOMPARE(buffer, "cdcdcdcd");
|
||||||
|
QStringEncoder::FinalizeResult r = fromUtf16.finalize(buffer.data(), buffer.size());
|
||||||
|
QCOMPARE_GT(r.next, buffer.constData());
|
||||||
|
QCOMPARE(r.error, QStringEncoder::FinalizeResult::Error::InvalidCharacters);
|
||||||
|
QCOMPARE_GT(r.invalidChars, 0);
|
||||||
|
QVERIFY(!fromUtf16.hasError());
|
||||||
|
QVERIFY(buffer.startsWith(QString(QChar(QChar::ReplacementCharacter)).toUtf8()));
|
||||||
|
// Try calling finalize again, no new bytes should be output
|
||||||
|
std::array<char, 3> extraBytes;
|
||||||
|
r = fromUtf16.finalize(extraBytes.data(), extraBytes.size());
|
||||||
|
// Ugly-cast to void to circumvent smart testlib
|
||||||
|
QCOMPARE((void *)r.next, (void *)extraBytes.data());
|
||||||
|
QCOMPARE(r.invalidChars, 0);
|
||||||
|
QCOMPARE(r.error, QStringEncoder::FinalizeResult::Error::NoError);
|
||||||
|
}
|
||||||
|
// decoder
|
||||||
|
{
|
||||||
|
auto toUtf16 = QStringDecoder(QStringConverter::Utf8);
|
||||||
|
QByteArray incompleteInput("\xf0", 1);
|
||||||
|
QString buffer = u"cdcdcdcd"_s;
|
||||||
|
toUtf16.appendToBuffer(buffer.data(), incompleteInput);
|
||||||
|
QVERIFY(!toUtf16.hasError());
|
||||||
|
QCOMPARE(buffer, u"cdcdcdcd"_s);
|
||||||
|
auto result = toUtf16.finalize(buffer.data(), buffer.size());
|
||||||
|
QCOMPARE_GT(result.next, buffer.constData());
|
||||||
|
QCOMPARE(result.error, QStringDecoder::FinalizeResult::Error::InvalidCharacters);
|
||||||
|
QVERIFY(buffer.startsWith(QChar(QChar::ReplacementCharacter)));
|
||||||
|
// Try calling finalize again, no new bytes should be output
|
||||||
|
std::array<QChar, 3> extraBytes;
|
||||||
|
result = toUtf16.finalize(extraBytes.data(), extraBytes.size());
|
||||||
|
// Ugly-cast to void to circumvent smart testlib
|
||||||
|
QCOMPARE((void *)result.next, (void *)extraBytes.data());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_QStringConverter::finalizeStateful()
|
||||||
|
{
|
||||||
|
#if !QT_CONFIG(icu) && !QT_CONFIG(winsdkicu)
|
||||||
|
// Technically there is _access_ to stateful encoding on Windows, but only
|
||||||
|
// through the System encoder.
|
||||||
|
QSKIP("ICU is not enabled in this build => stateful encoding is not tested.");
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
// Test that calling finalize() restores ASCII mode in this stateful encoding:
|
||||||
|
static const char expected[] = {
|
||||||
|
0x1b, 0x24, 0x42, 0x25, 0x26, 0x25, 0x23, 0x25, 0x2d, 0x25, 0x5a, 0x25,
|
||||||
|
0x47, 0x25, 0x23, 0x25, 0x22, 0x1b, 0x28, 0x42
|
||||||
|
};
|
||||||
|
QString input = u"ウィキペディア"_s; // "Wikipedia"
|
||||||
|
QByteArray buffer(20, '\0');
|
||||||
|
auto stateful = QStringEncoder("ISO-2022-JP");
|
||||||
|
if (!stateful.isValid())
|
||||||
|
QSKIP("ICU without support for ISO-2022-JP, cannot continue test.");
|
||||||
|
char *out = stateful.appendToBuffer(buffer.data(), input);
|
||||||
|
QCOMPARE(std::distance(buffer.data(), out), 17);
|
||||||
|
// First without enough space. We assume ICU may or may not output the
|
||||||
|
// start of the 1b 28 42 sequence, so we handle either.
|
||||||
|
char * const end = buffer.end();
|
||||||
|
QStringEncoder::FinalizeResult result = stateful.finalize(out, 1);
|
||||||
|
QCOMPARE(result.error, QStringEncoder::FinalizeResult::Error::NotEnoughSpace);
|
||||||
|
// Then with enough space
|
||||||
|
result = stateful.finalize(result.next, std::distance(result.next, end));
|
||||||
|
QCOMPARE((void *)result.next, (void *)buffer.constEnd());
|
||||||
|
QCOMPARE(buffer.toHex(' '), QByteArrayView(expected).toByteArray().toHex(' '));
|
||||||
|
QCOMPARE(result.invalidChars, 0);
|
||||||
|
// Try calling finalize again, no new bytes should be output
|
||||||
|
std::array<char, 3> extraBytes;
|
||||||
|
result = stateful.finalize(extraBytes.data(), extraBytes.size());
|
||||||
|
QCOMPARE((void *)result.next, (void *)extraBytes.data());
|
||||||
|
QCOMPARE(result.error, QStringEncoder::FinalizeResult::Error::NoError);
|
||||||
|
QCOMPARE(result.invalidChars, 0);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
// Repeat, but calling finalize() without an output
|
||||||
|
QString input = u"ウィキペディア"_s; // "Wikipedia"
|
||||||
|
QByteArray buffer(20, '\0');
|
||||||
|
auto stateful = QStringEncoder("ISO-2022-JP");
|
||||||
|
QVERIFY(stateful.isValid());
|
||||||
|
char *out = stateful.appendToBuffer(buffer.data(), input);
|
||||||
|
QCOMPARE(std::distance(buffer.data(), out), 17);
|
||||||
|
// This passes some pointers to ICU, we just shouldn't crash
|
||||||
|
QStringEncoder::FinalizeResult r = stateful.finalize();
|
||||||
|
QCOMPARE(r.error, QStringEncoder::FinalizeResult::Error::NoError);
|
||||||
|
QCOMPARE(r.invalidChars, 0);
|
||||||
|
QCOMPARE(r.next, nullptr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
class LoadAndConvert: public QRunnable
|
class LoadAndConvert: public QRunnable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user