util: graduate TextEncoder/TextDecoder, tests
Add tests ported from Web Platform Tests. Graduate TextEncoder / TextDecoder from experimental PR-URL: https://github.com/nodejs/node/pull/15743 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Refael Ackermann <refack@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com> Reviewed-By: Timothy Gu <timothygu99@gmail.com>
This commit is contained in:
parent
b8bc652869
commit
7f9eb4c29c
@ -551,8 +551,6 @@ see [Custom promisified functions][].
|
|||||||
added: v8.3.0
|
added: v8.3.0
|
||||||
-->
|
-->
|
||||||
|
|
||||||
> Stability: 1 - Experimental
|
|
||||||
|
|
||||||
An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.
|
An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.
|
||||||
|
|
||||||
```js
|
```js
|
||||||
@ -690,8 +688,6 @@ mark.
|
|||||||
added: v8.3.0
|
added: v8.3.0
|
||||||
-->
|
-->
|
||||||
|
|
||||||
> Stability: 1 - Experimental
|
|
||||||
|
|
||||||
An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
|
An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
|
||||||
instances of `TextEncoder` only support UTF-8 encoding.
|
instances of `TextEncoder` only support UTF-8 encoding.
|
||||||
|
|
||||||
|
@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding');
|
|||||||
const kDecoder = Symbol('decoder');
|
const kDecoder = Symbol('decoder');
|
||||||
const kEncoder = Symbol('encoder');
|
const kEncoder = Symbol('encoder');
|
||||||
|
|
||||||
let warned = false;
|
|
||||||
const experimental =
|
|
||||||
'The WHATWG Encoding Standard implementation is an experimental API. It ' +
|
|
||||||
'should not yet be used in production applications.';
|
|
||||||
|
|
||||||
const {
|
const {
|
||||||
getConstructorOf,
|
getConstructorOf,
|
||||||
customInspectSymbol: inspect
|
customInspectSymbol: inspect
|
||||||
@ -289,11 +284,6 @@ function getEncodingFromLabel(label) {
|
|||||||
|
|
||||||
class TextEncoder {
|
class TextEncoder {
|
||||||
constructor() {
|
constructor() {
|
||||||
if (!warned) {
|
|
||||||
warned = true;
|
|
||||||
process.emitWarning(experimental, 'ExperimentalWarning');
|
|
||||||
}
|
|
||||||
|
|
||||||
this[kEncoder] = true;
|
this[kEncoder] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -353,11 +343,6 @@ function makeTextDecoderICU() {
|
|||||||
|
|
||||||
class TextDecoder {
|
class TextDecoder {
|
||||||
constructor(encoding = 'utf-8', options = {}) {
|
constructor(encoding = 'utf-8', options = {}) {
|
||||||
if (!warned) {
|
|
||||||
warned = true;
|
|
||||||
process.emitWarning(experimental, 'ExperimentalWarning');
|
|
||||||
}
|
|
||||||
|
|
||||||
encoding = `${encoding}`;
|
encoding = `${encoding}`;
|
||||||
if (typeof options !== 'object')
|
if (typeof options !== 'object')
|
||||||
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
|
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
|
||||||
@ -430,11 +415,6 @@ function makeTextDecoderJS() {
|
|||||||
|
|
||||||
class TextDecoder {
|
class TextDecoder {
|
||||||
constructor(encoding = 'utf-8', options = {}) {
|
constructor(encoding = 'utf-8', options = {}) {
|
||||||
if (!warned) {
|
|
||||||
warned = true;
|
|
||||||
process.emitWarning(experimental, 'ExperimentalWarning');
|
|
||||||
}
|
|
||||||
|
|
||||||
encoding = `${encoding}`;
|
encoding = `${encoding}`;
|
||||||
if (typeof options !== 'object')
|
if (typeof options !== 'object')
|
||||||
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
|
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
|
||||||
|
76
test/parallel/test-whatwg-encoding-fatal-streaming.js
Normal file
76
test/parallel/test-whatwg-encoding-fatal-streaming.js
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/d74324b53c/encoding/textdecoder-fatal-streaming.html
|
||||||
|
|
||||||
|
const common = require('../common');
|
||||||
|
|
||||||
|
if (!common.hasIntl)
|
||||||
|
common.skip('missing Intl');
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const {
|
||||||
|
TextDecoder
|
||||||
|
} = require('util');
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
[
|
||||||
|
{ encoding: 'utf-8', sequence: [0xC0] },
|
||||||
|
{ encoding: 'utf-16le', sequence: [0x00] },
|
||||||
|
{ encoding: 'utf-16be', sequence: [0x00] }
|
||||||
|
].forEach((testCase) => {
|
||||||
|
const data = new Uint8Array([testCase.sequence]);
|
||||||
|
common.expectsError(
|
||||||
|
() => {
|
||||||
|
const decoder = new TextDecoder(testCase.encoding, { fatal: true });
|
||||||
|
decoder.decode(data);
|
||||||
|
}, {
|
||||||
|
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
|
||||||
|
type: TypeError,
|
||||||
|
message:
|
||||||
|
`The encoded data was not valid for encoding ${testCase.encoding}`
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.strictEqual(
|
||||||
|
new TextDecoder(testCase.encoding).decode(data),
|
||||||
|
'\uFFFD'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const decoder = new TextDecoder('utf-16le', { fatal: true });
|
||||||
|
const odd = new Uint8Array([0x00]);
|
||||||
|
const even = new Uint8Array([0x00, 0x00]);
|
||||||
|
|
||||||
|
assert.strictEqual(decoder.decode(odd, { stream: true }), '');
|
||||||
|
assert.strictEqual(decoder.decode(odd), '\u0000');
|
||||||
|
|
||||||
|
common.expectsError(
|
||||||
|
() => {
|
||||||
|
decoder.decode(even, { stream: true });
|
||||||
|
decoder.decode(odd);
|
||||||
|
}, {
|
||||||
|
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
|
||||||
|
type: TypeError,
|
||||||
|
message:
|
||||||
|
'The encoded data was not valid for encoding utf-16le'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
common.expectsError(
|
||||||
|
() => {
|
||||||
|
decoder.decode(odd, { stream: true });
|
||||||
|
decoder.decode(even);
|
||||||
|
}, {
|
||||||
|
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
|
||||||
|
type: TypeError,
|
||||||
|
message:
|
||||||
|
'The encoded data was not valid for encoding utf-16le'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000');
|
||||||
|
assert.strictEqual(decoder.decode(even), '\u0000');
|
||||||
|
}
|
@ -2,6 +2,7 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
require('../common');
|
require('../common');
|
||||||
|
|
||||||
const assert = require('assert');
|
const assert = require('assert');
|
||||||
const { getEncodingFromLabel } = require('internal/encoding');
|
const { getEncodingFromLabel } = require('internal/encoding');
|
||||||
|
|
||||||
|
56
test/parallel/test-whatwg-encoding-surrogates-utf8.js
Normal file
56
test/parallel/test-whatwg-encoding-surrogates-utf8.js
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/api-surrogates-utf8.html
|
||||||
|
|
||||||
|
require('../common');
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const {
|
||||||
|
TextDecoder,
|
||||||
|
TextEncoder
|
||||||
|
} = require('util');
|
||||||
|
|
||||||
|
const badStrings = [
|
||||||
|
{
|
||||||
|
input: 'abc123',
|
||||||
|
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
|
||||||
|
decoded: 'abc123',
|
||||||
|
name: 'Sanity check'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uD800',
|
||||||
|
expected: [0xef, 0xbf, 0xbd],
|
||||||
|
decoded: '\uFFFD',
|
||||||
|
name: 'Surrogate half (low)'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uDC00',
|
||||||
|
expected: [0xef, 0xbf, 0xbd],
|
||||||
|
decoded: '\uFFFD',
|
||||||
|
name: 'Surrogate half (high)'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: 'abc\uD800123',
|
||||||
|
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
|
||||||
|
decoded: 'abc\uFFFD123',
|
||||||
|
name: 'Surrogate half (low), in a string'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: 'abc\uDC00123',
|
||||||
|
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
|
||||||
|
decoded: 'abc\uFFFD123',
|
||||||
|
name: 'Surrogate half (high), in a string'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uDC00\uD800',
|
||||||
|
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
|
||||||
|
decoded: '\uFFFD\uFFFD',
|
||||||
|
name: 'Wrong order'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
badStrings.forEach((t) => {
|
||||||
|
const encoded = new TextEncoder().encode(t.input);
|
||||||
|
assert.deepStrictEqual([].slice.call(encoded), t.expected);
|
||||||
|
assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded);
|
||||||
|
});
|
93
test/parallel/test-whatwg-encoding-textdecoder-fatal.js
Normal file
93
test/parallel/test-whatwg-encoding-textdecoder-fatal.js
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-fatal.html
|
||||||
|
|
||||||
|
const common = require('../common');
|
||||||
|
|
||||||
|
if (!common.hasIntl)
|
||||||
|
common.skip('missing Intl');
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const {
|
||||||
|
TextDecoder
|
||||||
|
} = require('util');
|
||||||
|
|
||||||
|
const bad = [
|
||||||
|
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
|
||||||
|
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
|
||||||
|
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
|
||||||
|
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
|
||||||
|
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
|
||||||
|
name: '> 0x10FFFF' },
|
||||||
|
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80],
|
||||||
|
name: 'obsolete lead byte' },
|
||||||
|
// Overlong encodings
|
||||||
|
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80],
|
||||||
|
name: 'overlong U+0000 - 3 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80],
|
||||||
|
name: 'overlong U+0000 - 4 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80],
|
||||||
|
name: 'overlong U+0000 - 5 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
|
||||||
|
name: 'overlong U+0000 - 6 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF],
|
||||||
|
name: 'overlong U+007F - 3 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF],
|
||||||
|
name: 'overlong U+007F - 4 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF],
|
||||||
|
name: 'overlong U+007F - 5 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF],
|
||||||
|
name: 'overlong U+007F - 6 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF],
|
||||||
|
name: 'overlong U+07FF - 3 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF],
|
||||||
|
name: 'overlong U+07FF - 4 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF],
|
||||||
|
name: 'overlong U+07FF - 5 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF],
|
||||||
|
name: 'overlong U+07FF - 6 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF],
|
||||||
|
name: 'overlong U+FFFF - 4 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF],
|
||||||
|
name: 'overlong U+FFFF - 5 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF],
|
||||||
|
name: 'overlong U+FFFF - 6 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF],
|
||||||
|
name: 'overlong U+10FFFF - 5 bytes' },
|
||||||
|
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF],
|
||||||
|
name: 'overlong U+10FFFF - 6 bytes' },
|
||||||
|
// UTF-16 surrogates encoded as code points in UTF-8
|
||||||
|
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
|
||||||
|
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
|
||||||
|
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80],
|
||||||
|
name: 'surrogate pair' },
|
||||||
|
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
|
||||||
|
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
|
||||||
|
// FIXME: Add legacy encoding cases
|
||||||
|
];
|
||||||
|
|
||||||
|
bad.forEach((t) => {
|
||||||
|
common.expectsError(
|
||||||
|
() => {
|
||||||
|
new TextDecoder(t.encoding, { fatal: true })
|
||||||
|
.decode(new Uint8Array(t.input));
|
||||||
|
}, {
|
||||||
|
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
|
||||||
|
type: TypeError
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
assert('fatal' in new TextDecoder());
|
||||||
|
assert.strictEqual(typeof new TextDecoder().fatal, 'boolean');
|
||||||
|
assert(!new TextDecoder().fatal);
|
||||||
|
assert(new TextDecoder('utf-8', { fatal: true }).fatal);
|
||||||
|
}
|
50
test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js
Normal file
50
test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/7f567fa29c/encoding/textdecoder-ignorebom.html
|
||||||
|
|
||||||
|
const common = require('../common');
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const {
|
||||||
|
TextDecoder
|
||||||
|
} = require('util');
|
||||||
|
|
||||||
|
const cases = [
|
||||||
|
{
|
||||||
|
encoding: 'utf-8',
|
||||||
|
bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63],
|
||||||
|
skipNoIntl: false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
encoding: 'utf-16le',
|
||||||
|
bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00],
|
||||||
|
skipNoIntl: false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
encoding: 'utf-16be',
|
||||||
|
bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63],
|
||||||
|
skipNoIntl: true
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
cases.forEach((testCase) => {
|
||||||
|
if (testCase.skipNoIntl && !common.hasIntl) {
|
||||||
|
console.log(`skipping ${testCase.encoding} because missing Intl`);
|
||||||
|
return; // skipping
|
||||||
|
}
|
||||||
|
const BOM = '\uFEFF';
|
||||||
|
let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true });
|
||||||
|
const bytes = new Uint8Array(testCase.bytes);
|
||||||
|
assert.strictEqual(decoder.decode(bytes), `${BOM}abc`);
|
||||||
|
decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false });
|
||||||
|
assert.strictEqual(decoder.decode(bytes), 'abc');
|
||||||
|
decoder = new TextDecoder(testCase.encoding);
|
||||||
|
assert.strictEqual(decoder.decode(bytes), 'abc');
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
assert('ignoreBOM' in new TextDecoder());
|
||||||
|
assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean');
|
||||||
|
assert(!new TextDecoder().ignoreBOM);
|
||||||
|
assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM);
|
||||||
|
}
|
49
test/parallel/test-whatwg-encoding-textdecoder-streaming.js
Normal file
49
test/parallel/test-whatwg-encoding-textdecoder-streaming.js
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/textdecoder-streaming.html
|
||||||
|
|
||||||
|
const common = require('../common');
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const {
|
||||||
|
TextDecoder
|
||||||
|
} = require('util');
|
||||||
|
|
||||||
|
const string =
|
||||||
|
'\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF';
|
||||||
|
const octets = {
|
||||||
|
'utf-8': [
|
||||||
|
0x00, 0x31, 0x32, 0x33, 0x41, 0x42, 0x43, 0x61, 0x62, 0x63, 0xc2, 0x80,
|
||||||
|
0xc3, 0xbf, 0xc4, 0x80, 0xe1, 0x80, 0x80, 0xef, 0xbf, 0xbd, 0xf0, 0x90,
|
||||||
|
0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf],
|
||||||
|
'utf-16le': [
|
||||||
|
0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00,
|
||||||
|
0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF, 0x00,
|
||||||
|
0x00, 0x01, 0x00, 0x10, 0xFD, 0xFF, 0x00, 0xD8, 0x00, 0xDC, 0xFF, 0xDB,
|
||||||
|
0xFF, 0xDF],
|
||||||
|
'utf-16be': [
|
||||||
|
0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42,
|
||||||
|
0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF,
|
||||||
|
0x01, 0x00, 0x10, 0x00, 0xFF, 0xFD, 0xD8, 0x00, 0xDC, 0x00, 0xDB, 0xFF,
|
||||||
|
0xDF, 0xFF]
|
||||||
|
};
|
||||||
|
|
||||||
|
Object.keys(octets).forEach((encoding) => {
|
||||||
|
if (encoding === 'utf-16be' && !common.hasIntl) {
|
||||||
|
console.log('skipping utf-16be because missing Intl');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (let len = 1; len <= 5; ++len) {
|
||||||
|
const encoded = octets[encoding];
|
||||||
|
const decoder = new TextDecoder(encoding);
|
||||||
|
let out = '';
|
||||||
|
for (let i = 0; i < encoded.length; i += len) {
|
||||||
|
const sub = [];
|
||||||
|
for (let j = i; j < encoded.length && j < i + len; ++j)
|
||||||
|
sub.push(encoded[j]);
|
||||||
|
out += decoder.decode(new Uint8Array(sub), { stream: true });
|
||||||
|
}
|
||||||
|
out += decoder.decode();
|
||||||
|
assert.strictEqual(out, string);
|
||||||
|
}
|
||||||
|
});
|
@ -0,0 +1,63 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-utf16-surrogates.html
|
||||||
|
|
||||||
|
const common = require('../common');
|
||||||
|
|
||||||
|
if (!common.hasIntl)
|
||||||
|
common.skip('missing Intl');
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const {
|
||||||
|
TextDecoder
|
||||||
|
} = require('util');
|
||||||
|
|
||||||
|
const bad = [
|
||||||
|
{
|
||||||
|
encoding: 'utf-16le',
|
||||||
|
input: [0x00, 0xd8],
|
||||||
|
expected: '\uFFFD',
|
||||||
|
name: 'lone surrogate lead'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
encoding: 'utf-16le',
|
||||||
|
input: [0x00, 0xdc],
|
||||||
|
expected: '\uFFFD',
|
||||||
|
name: 'lone surrogate trail'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
encoding: 'utf-16le',
|
||||||
|
input: [0x00, 0xd8, 0x00, 0x00],
|
||||||
|
expected: '\uFFFD\u0000',
|
||||||
|
name: 'unmatched surrogate lead'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
encoding: 'utf-16le',
|
||||||
|
input: [0x00, 0xdc, 0x00, 0x00],
|
||||||
|
expected: '\uFFFD\u0000',
|
||||||
|
name: 'unmatched surrogate trail'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
encoding: 'utf-16le',
|
||||||
|
input: [0x00, 0xdc, 0x00, 0xd8],
|
||||||
|
expected: '\uFFFD\uFFFD',
|
||||||
|
name: 'swapped surrogate pair'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
bad.forEach((t) => {
|
||||||
|
|
||||||
|
assert.strictEqual(
|
||||||
|
new TextDecoder(t.encoding).decode(new Uint8Array(t.input)),
|
||||||
|
t.expected);
|
||||||
|
|
||||||
|
common.expectsError(
|
||||||
|
() => {
|
||||||
|
new TextDecoder(t.encoding, { fatal: true })
|
||||||
|
.decode(new Uint8Array(t.input));
|
||||||
|
}, {
|
||||||
|
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
|
||||||
|
type: TypeError
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
@ -2,6 +2,7 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const common = require('../common');
|
const common = require('../common');
|
||||||
|
|
||||||
const assert = require('assert');
|
const assert = require('assert');
|
||||||
const { TextDecoder, TextEncoder } = require('util');
|
const { TextDecoder, TextEncoder } = require('util');
|
||||||
const { customInspectSymbol: inspect } = require('internal/util');
|
const { customInspectSymbol: inspect } = require('internal/util');
|
||||||
@ -16,6 +17,7 @@ assert(TextDecoder);
|
|||||||
{
|
{
|
||||||
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
|
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
|
||||||
const dec = new TextDecoder(i);
|
const dec = new TextDecoder(i);
|
||||||
|
assert.strictEqual(dec.encoding, 'utf-8');
|
||||||
const res = dec.decode(buf);
|
const res = dec.decode(buf);
|
||||||
assert.strictEqual(res, 'test€');
|
assert.strictEqual(res, 'test€');
|
||||||
});
|
});
|
||||||
@ -102,3 +104,105 @@ if (common.hasIntl) {
|
|||||||
}));
|
}));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/api-basics.html
|
||||||
|
function testDecodeSample(encoding, string, bytes) {
|
||||||
|
assert.strictEqual(
|
||||||
|
new TextDecoder(encoding).decode(new Uint8Array(bytes)),
|
||||||
|
string);
|
||||||
|
assert.strictEqual(
|
||||||
|
new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer),
|
||||||
|
string);
|
||||||
|
}
|
||||||
|
|
||||||
|
// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
|
||||||
|
// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
|
||||||
|
// byte-swapped BOM (non-character U+FFFE)
|
||||||
|
const sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';
|
||||||
|
|
||||||
|
{
|
||||||
|
const encoding = 'utf-8';
|
||||||
|
const string = sample;
|
||||||
|
const bytes = [
|
||||||
|
0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4,
|
||||||
|
0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3,
|
||||||
|
0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF,
|
||||||
|
0xBF, 0xBE
|
||||||
|
];
|
||||||
|
const encoded = new TextEncoder().encode(string);
|
||||||
|
assert.deepStrictEqual([].slice.call(encoded), bytes);
|
||||||
|
assert.strictEqual(
|
||||||
|
new TextDecoder(encoding).decode(new Uint8Array(bytes)),
|
||||||
|
string);
|
||||||
|
assert.strictEqual(
|
||||||
|
new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer),
|
||||||
|
string);
|
||||||
|
}
|
||||||
|
|
||||||
|
testDecodeSample(
|
||||||
|
'utf-16le',
|
||||||
|
sample,
|
||||||
|
[
|
||||||
|
0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C,
|
||||||
|
0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8,
|
||||||
|
0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (common.hasIntl) {
|
||||||
|
testDecodeSample(
|
||||||
|
'utf-16be',
|
||||||
|
sample,
|
||||||
|
[
|
||||||
|
0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34,
|
||||||
|
0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF,
|
||||||
|
0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
testDecodeSample(
|
||||||
|
'utf-16',
|
||||||
|
sample,
|
||||||
|
[
|
||||||
|
0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C,
|
||||||
|
0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8,
|
||||||
|
0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/api-invalid-label.html
|
||||||
|
[
|
||||||
|
'utf-8',
|
||||||
|
'unicode-1-1-utf-8',
|
||||||
|
'utf8',
|
||||||
|
'utf-16be',
|
||||||
|
'utf-16le',
|
||||||
|
'utf-16'
|
||||||
|
].forEach((i) => {
|
||||||
|
['\u0000', '\u000b', '\u00a0', '\u2028', '\u2029'].forEach((ws) => {
|
||||||
|
common.expectsError(
|
||||||
|
() => new TextDecoder(`${ws}${i}`),
|
||||||
|
{
|
||||||
|
code: 'ERR_ENCODING_NOT_SUPPORTED',
|
||||||
|
type: RangeError
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
common.expectsError(
|
||||||
|
() => new TextDecoder(`${i}${ws}`),
|
||||||
|
{
|
||||||
|
code: 'ERR_ENCODING_NOT_SUPPORTED',
|
||||||
|
type: RangeError
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
common.expectsError(
|
||||||
|
() => new TextDecoder(`${ws}${i}${ws}`),
|
||||||
|
{
|
||||||
|
code: 'ERR_ENCODING_NOT_SUPPORTED',
|
||||||
|
type: RangeError
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
@ -0,0 +1,52 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/textencoder-utf16-surrogates.html
|
||||||
|
|
||||||
|
require('../common');
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const {
|
||||||
|
TextDecoder,
|
||||||
|
TextEncoder
|
||||||
|
} = require('util');
|
||||||
|
|
||||||
|
const bad = [
|
||||||
|
{
|
||||||
|
input: '\uD800',
|
||||||
|
expected: '\uFFFD',
|
||||||
|
name: 'lone surrogate lead'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uDC00',
|
||||||
|
expected: '\uFFFD',
|
||||||
|
name: 'lone surrogate trail'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uD800\u0000',
|
||||||
|
expected: '\uFFFD\u0000',
|
||||||
|
name: 'unmatched surrogate lead'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uDC00\u0000',
|
||||||
|
expected: '\uFFFD\u0000',
|
||||||
|
name: 'unmatched surrogate trail'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uDC00\uD800',
|
||||||
|
expected: '\uFFFD\uFFFD',
|
||||||
|
name: 'swapped surrogate pair'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: '\uD834\uDD1E',
|
||||||
|
expected: '\uD834\uDD1E',
|
||||||
|
name: 'properly encoded MUSICAL SYMBOL G CLEF (U+1D11E)'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
bad.forEach((t) => {
|
||||||
|
const encoded = new TextEncoder().encode(t.input);
|
||||||
|
const decoded = new TextDecoder().decode(encoded);
|
||||||
|
assert.strictEqual(decoded, t.expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.strictEqual(new TextEncoder().encode().length, 0);
|
@ -2,6 +2,7 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const common = require('../common');
|
const common = require('../common');
|
||||||
|
|
||||||
const assert = require('assert');
|
const assert = require('assert');
|
||||||
const { TextDecoder, TextEncoder } = require('util');
|
const { TextDecoder, TextEncoder } = require('util');
|
||||||
const { customInspectSymbol: inspect } = require('internal/util');
|
const { customInspectSymbol: inspect } = require('internal/util');
|
||||||
@ -13,11 +14,25 @@ const encoded = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65,
|
|||||||
assert(TextEncoder);
|
assert(TextEncoder);
|
||||||
|
|
||||||
// Test TextEncoder
|
// Test TextEncoder
|
||||||
const enc = new TextEncoder();
|
{
|
||||||
assert(enc);
|
const enc = new TextEncoder();
|
||||||
const buf = enc.encode('\ufefftest€');
|
assert.strictEqual(enc.encoding, 'utf-8');
|
||||||
|
assert(enc);
|
||||||
|
const buf = enc.encode('\ufefftest€');
|
||||||
|
assert.strictEqual(Buffer.compare(buf, encoded), 0);
|
||||||
|
}
|
||||||
|
|
||||||
assert.strictEqual(Buffer.compare(buf, encoded), 0);
|
{
|
||||||
|
const enc = new TextEncoder();
|
||||||
|
const buf = enc.encode();
|
||||||
|
assert.strictEqual(buf.length, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const enc = new TextEncoder();
|
||||||
|
const buf = enc.encode(undefined);
|
||||||
|
assert.strictEqual(buf.length, 0);
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const fn = TextEncoder.prototype[inspect];
|
const fn = TextEncoder.prototype[inspect];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user