string_decoder: fix number of replacement chars

Fixes: https://github.com/nodejs/node/issues/22626

PR-URL: https://github.com/nodejs/node/pull/22709
Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
This commit is contained in:
Anna Henningsen 2018-09-05 14:24:45 +02:00
parent ab6ddc0634
commit 06f6ac179c
No known key found for this signature in database
GPG Key ID: 9C63F3A6CD2AD8F9
2 changed files with 18 additions and 6 deletions

View File

@ -71,16 +71,17 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
kIncompleteCharactersEnd);
if (Encoding() == UTF8) {
// For UTF-8, we need special treatment to align with the V8 decoder:
// If an incomplete character is found at a chunk boundary, we turn
// that character into a single invalid one.
// If an incomplete character is found at a chunk boundary, we use
// its remainder and pass it to V8 as-is.
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
if ((data[i] & 0xC0) != 0x80) {
// This byte is not a continuation byte even though it should have
// been one.
// Act as if there was a 1-byte incomplete character, which does
// not make sense but works here because we know it's invalid.
// been one. We stop decoding of the incomplete character at this
// point (but still use the rest of the incomplete bytes from this
// chunk) and assume that the new, unexpected byte starts a new one.
state_[kMissingBytes] = 0;
state_[kBufferedBytes] = 1;
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
state_[kBufferedBytes] += i;
data += i;
nread -= i;
break;

View File

@ -162,6 +162,17 @@ assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
assert.strictEqual(decoder.end(), '');
// Regression tests for https://github.com/nodejs/node/issues/22626
// (not enough replacement chars when having seen more than one byte of an
// incomplete multibyte characters).
decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('f69b', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('d1', 'hex')), '\ufffd\ufffd');
assert.strictEqual(decoder.end(), '\ufffd');
assert.strictEqual(decoder.write(Buffer.from('f4', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('bde5', 'hex')), '\ufffd\ufffd');
assert.strictEqual(decoder.end(), '\ufffd');
common.expectsError(
() => new StringDecoder(1),
{