nodejs/test/parallel/test-string-decoder-end.js
Brian White 24ef1e6775 string_decoder: align UTF-8 handling with V8
V8 5.5 changed how invalid characters are handled and it now appears
to follow the WHATWG Encoding standard, where all of an invalid
character's bytes are replaced by a single replacement character
(\ufffd) instead of replacing each invalid byte with separate
replacement characters.

Example: the byte sequence 0xF0,0xB8,0x41 is decoded as '\ufffdA' in
V8 5.5, but is decoded as '\ufffd\ufffdA' in previous versions of V8.

PR-URL: https://github.com/nodejs/node/pull/9618
Reviewed-By: Ali Ijaz Sheikh <ofrobots@google.com>
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
2017-01-26 22:46:18 +01:00

48 lines
1.3 KiB
JavaScript

'use strict';
// verify that the string decoder works getting 1 byte at a time,
// the whole buffer at once, and that both match the .toString(enc)
// result of the entire buffer.
require('../common');
const assert = require('assert');
const SD = require('string_decoder').StringDecoder;
const encodings = ['base64', 'hex', 'utf8', 'utf16le', 'ucs2'];
const bufs = [ '☃💩', 'asdf' ].map((b) => Buffer.from(b));
// also test just arbitrary bytes from 0-15.
for (let i = 1; i <= 16; i++) {
const bytes = new Array(i).join('.').split('.').map((_, j) => j + 0x78);
bufs.push(Buffer.from(bytes));
}
encodings.forEach(testEncoding);
function testEncoding(encoding) {
bufs.forEach((buf) => {
testBuf(encoding, buf);
});
}
function testBuf(encoding, buf) {
// write one byte at a time.
let s = new SD(encoding);
let res1 = '';
for (let i = 0; i < buf.length; i++) {
res1 += s.write(buf.slice(i, i + 1));
}
res1 += s.end();
// write the whole buffer at once.
let res2 = '';
s = new SD(encoding);
res2 += s.write(buf);
res2 += s.end();
// .toString() on the buffer
const res3 = buf.toString(encoding);
assert.strictEqual(res1, res3, 'one byte at a time should match toString');
assert.strictEqual(res2, res3, 'all bytes at once should match toString');
}