string_decoder: fix number of replacement chars
Fixes: https://github.com/nodejs/node/issues/22626 PR-URL: https://github.com/nodejs/node/pull/22709 Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
This commit is contained in:
parent
ab6ddc0634
commit
06f6ac179c
@ -71,16 +71,17 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
|
|||||||
kIncompleteCharactersEnd);
|
kIncompleteCharactersEnd);
|
||||||
if (Encoding() == UTF8) {
|
if (Encoding() == UTF8) {
|
||||||
// For UTF-8, we need special treatment to align with the V8 decoder:
|
// For UTF-8, we need special treatment to align with the V8 decoder:
|
||||||
// If an incomplete character is found at a chunk boundary, we turn
|
// If an incomplete character is found at a chunk boundary, we use
|
||||||
// that character into a single invalid one.
|
// its remainder and pass it to V8 as-is.
|
||||||
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
|
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
|
||||||
if ((data[i] & 0xC0) != 0x80) {
|
if ((data[i] & 0xC0) != 0x80) {
|
||||||
// This byte is not a continuation byte even though it should have
|
// This byte is not a continuation byte even though it should have
|
||||||
// been one.
|
// been one. We stop decoding of the incomplete character at this
|
||||||
// Act as if there was a 1-byte incomplete character, which does
|
// point (but still use the rest of the incomplete bytes from this
|
||||||
// not make sense but works here because we know it's invalid.
|
// chunk) and assume that the new, unexpected byte starts a new one.
|
||||||
state_[kMissingBytes] = 0;
|
state_[kMissingBytes] = 0;
|
||||||
state_[kBufferedBytes] = 1;
|
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
|
||||||
|
state_[kBufferedBytes] += i;
|
||||||
data += i;
|
data += i;
|
||||||
nread -= i;
|
nread -= i;
|
||||||
break;
|
break;
|
||||||
|
@ -162,6 +162,17 @@ assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
|
|||||||
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
|
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
|
||||||
assert.strictEqual(decoder.end(), '');
|
assert.strictEqual(decoder.end(), '');
|
||||||
|
|
||||||
|
// Regression tests for https://github.com/nodejs/node/issues/22626
|
||||||
|
// (not enough replacement chars when having seen more than one byte of an
|
||||||
|
// incomplete multibyte characters).
|
||||||
|
decoder = new StringDecoder('utf8');
|
||||||
|
assert.strictEqual(decoder.write(Buffer.from('f69b', 'hex')), '');
|
||||||
|
assert.strictEqual(decoder.write(Buffer.from('d1', 'hex')), '\ufffd\ufffd');
|
||||||
|
assert.strictEqual(decoder.end(), '\ufffd');
|
||||||
|
assert.strictEqual(decoder.write(Buffer.from('f4', 'hex')), '');
|
||||||
|
assert.strictEqual(decoder.write(Buffer.from('bde5', 'hex')), '\ufffd\ufffd');
|
||||||
|
assert.strictEqual(decoder.end(), '\ufffd');
|
||||||
|
|
||||||
common.expectsError(
|
common.expectsError(
|
||||||
() => new StringDecoder(1),
|
() => new StringDecoder(1),
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user