string_decoder: fix number of replacement chars
Fixes: https://github.com/nodejs/node/issues/22626 PR-URL: https://github.com/nodejs/node/pull/22709 Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
This commit is contained in:
parent
ab6ddc0634
commit
06f6ac179c
@ -71,16 +71,17 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
|
||||
kIncompleteCharactersEnd);
|
||||
if (Encoding() == UTF8) {
|
||||
// For UTF-8, we need special treatment to align with the V8 decoder:
|
||||
// If an incomplete character is found at a chunk boundary, we turn
|
||||
// that character into a single invalid one.
|
||||
// If an incomplete character is found at a chunk boundary, we use
|
||||
// its remainder and pass it to V8 as-is.
|
||||
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
|
||||
if ((data[i] & 0xC0) != 0x80) {
|
||||
// This byte is not a continuation byte even though it should have
|
||||
// been one.
|
||||
// Act as if there was a 1-byte incomplete character, which does
|
||||
// not make sense but works here because we know it's invalid.
|
||||
// been one. We stop decoding of the incomplete character at this
|
||||
// point (but still use the rest of the incomplete bytes from this
|
||||
// chunk) and assume that the new, unexpected byte starts a new one.
|
||||
state_[kMissingBytes] = 0;
|
||||
state_[kBufferedBytes] = 1;
|
||||
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
|
||||
state_[kBufferedBytes] += i;
|
||||
data += i;
|
||||
nread -= i;
|
||||
break;
|
||||
|
@ -162,6 +162,17 @@ assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
|
||||
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
|
||||
assert.strictEqual(decoder.end(), '');
|
||||
|
||||
// Regression tests for https://github.com/nodejs/node/issues/22626
|
||||
// (not enough replacement chars when having seen more than one byte of an
|
||||
// incomplete multibyte characters).
|
||||
decoder = new StringDecoder('utf8');
|
||||
assert.strictEqual(decoder.write(Buffer.from('f69b', 'hex')), '');
|
||||
assert.strictEqual(decoder.write(Buffer.from('d1', 'hex')), '\ufffd\ufffd');
|
||||
assert.strictEqual(decoder.end(), '\ufffd');
|
||||
assert.strictEqual(decoder.write(Buffer.from('f4', 'hex')), '');
|
||||
assert.strictEqual(decoder.write(Buffer.from('bde5', 'hex')), '\ufffd\ufffd');
|
||||
assert.strictEqual(decoder.end(), '\ufffd');
|
||||
|
||||
common.expectsError(
|
||||
() => new StringDecoder(1),
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user