buffer: consolidate encoding parsing

PR-URL: https://github.com/nodejs/node/pull/29217
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
Brian White 2019-08-19 23:25:28 -04:00
parent 167960f3ca
commit bb1af6c82f
No known key found for this signature in database
GPG Key ID: 606D7358F94DA209
2 changed files with 189 additions and 191 deletions

View File

@ -60,6 +60,8 @@ const {
const {
inspect: utilInspect
} = require('internal/util/inspect');
const { encodings } = internalBinding('string_decoder');
const {
codes: {
@ -109,6 +111,10 @@ let poolSize, poolOffset, allocPool;
// do not own the ArrayBuffer allocator. Zero fill is always on in that case.
const zeroFill = bindingZeroFill || [0];
const encodingsMap = Object.create(null);
for (let i = 0; i < encodings.length; ++i)
encodingsMap[encodings[i]] = i;
function createUnsafeBuffer(size) {
zeroFill[0] = 0;
try {
@ -376,28 +382,16 @@ function allocate(size) {
return createUnsafeBuffer(size);
}
function fromString(string, encoding) {
let length;
if (typeof encoding !== 'string' || encoding.length === 0) {
if (string.length === 0)
return new FastBuffer();
encoding = 'utf8';
length = byteLengthUtf8(string);
} else {
length = byteLength(string, encoding, true);
if (length === -1)
throw new ERR_UNKNOWN_ENCODING(encoding);
if (string.length === 0)
return new FastBuffer();
}
function fromStringFast(string, ops) {
const length = ops.byteLength(string);
if (length >= (Buffer.poolSize >>> 1))
return createFromString(string, encoding);
return createFromString(string, ops.encodingVal);
if (length > (poolSize - poolOffset))
createPool();
let b = new FastBuffer(allocPool, poolOffset, length);
const actual = b.write(string, encoding);
const actual = ops.write(b, string, 0, length);
if (actual !== length) {
// byteLength() may overestimate. That's a rare case, though.
b = new FastBuffer(allocPool, poolOffset, actual);
@ -407,6 +401,23 @@ function fromString(string, encoding) {
return b;
}
function fromString(string, encoding) {
let ops;
if (typeof encoding !== 'string' || encoding.length === 0) {
if (string.length === 0)
return new FastBuffer();
ops = encodingOps.utf8;
encoding = undefined;
} else {
ops = getEncodingOps(encoding);
if (ops === undefined)
throw new ERR_UNKNOWN_ENCODING(encoding);
if (string.length === 0)
return new FastBuffer();
}
return fromStringFast(string, ops);
}
function fromArrayLike(obj) {
const length = obj.length;
const b = allocate(length);
@ -553,6 +564,126 @@ function base64ByteLength(str, bytes) {
return (bytes * 3) >>> 2;
}
const encodingOps = {
utf8: {
encoding: 'utf8',
encodingVal: encodingsMap.utf8,
byteLength: byteLengthUtf8,
write: (buf, string, offset, len) => buf.utf8Write(string, offset, len),
slice: (buf, start, end) => buf.utf8Slice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir)
},
ucs2: {
encoding: 'ucs2',
encodingVal: encodingsMap.utf16le,
byteLength: (string) => string.length * 2,
write: (buf, string, offset, len) => buf.ucs2Write(string, offset, len),
slice: (buf, start, end) => buf.ucs2Slice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir)
},
utf16le: {
encoding: 'utf16le',
encodingVal: encodingsMap.utf16le,
byteLength: (string) => string.length * 2,
write: (buf, string, offset, len) => buf.ucs2Write(string, offset, len),
slice: (buf, start, end) => buf.ucs2Slice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir)
},
latin1: {
encoding: 'latin1',
encodingVal: encodingsMap.latin1,
byteLength: (string) => string.length,
write: (buf, string, offset, len) => buf.latin1Write(string, offset, len),
slice: (buf, start, end) => buf.latin1Slice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir)
},
ascii: {
encoding: 'ascii',
encodingVal: encodingsMap.ascii,
byteLength: (string) => string.length,
write: (buf, string, offset, len) => buf.asciiWrite(string, offset, len),
slice: (buf, start, end) => buf.asciiSlice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfBuffer(buf,
fromStringFast(val, encodingOps.ascii),
byteOffset,
encodingsMap.ascii,
dir)
},
base64: {
encoding: 'base64',
encodingVal: encodingsMap.base64,
byteLength: (string) => base64ByteLength(string, string.length),
write: (buf, string, offset, len) => buf.base64Write(string, offset, len),
slice: (buf, start, end) => buf.base64Slice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfBuffer(buf,
fromStringFast(val, encodingOps.base64),
byteOffset,
encodingsMap.base64,
dir)
},
hex: {
encoding: 'hex',
encodingVal: encodingsMap.hex,
byteLength: (string) => string.length >>> 1,
write: (buf, string, offset, len) => buf.hexWrite(string, offset, len),
slice: (buf, start, end) => buf.hexSlice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfBuffer(buf,
fromStringFast(val, encodingOps.hex),
byteOffset,
encodingsMap.hex,
dir)
}
};
function getEncodingOps(encoding) {
encoding += '';
switch (encoding.length) {
case 4:
if (encoding === 'utf8') return encodingOps.utf8;
if (encoding === 'ucs2') return encodingOps.ucs2;
encoding = encoding.toLowerCase();
if (encoding === 'utf8') return encodingOps.utf8;
if (encoding === 'ucs2') return encodingOps.ucs2;
break;
case 5:
if (encoding === 'utf-8') return encodingOps.utf8;
if (encoding === 'ascii') return encodingOps.ascii;
if (encoding === 'ucs-2') return encodingOps.ucs2;
encoding = encoding.toLowerCase();
if (encoding === 'utf-8') return encodingOps.utf8;
if (encoding === 'ascii') return encodingOps.ascii;
if (encoding === 'ucs-2') return encodingOps.ucs2;
break;
case 7:
if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
return encodingOps.utf16le;
break;
case 8:
if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
return encodingOps.utf16le;
break;
case 6:
if (encoding === 'latin1' || encoding === 'binary')
return encodingOps.latin1;
if (encoding === 'base64') return encodingOps.base64;
encoding = encoding.toLowerCase();
if (encoding === 'latin1' || encoding === 'binary')
return encodingOps.latin1;
if (encoding === 'base64') return encodingOps.base64;
break;
case 3:
if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
return encodingOps.hex;
break;
}
}
function byteLength(string, encoding) {
if (typeof string !== 'string') {
if (isArrayBufferView(string) || isAnyArrayBuffer(string)) {
@ -572,45 +703,10 @@ function byteLength(string, encoding) {
if (!encoding)
return (mustMatch ? -1 : byteLengthUtf8(string));
encoding += '';
switch (encoding.length) {
case 4:
if (encoding === 'utf8') return byteLengthUtf8(string);
if (encoding === 'ucs2') return len * 2;
encoding = encoding.toLowerCase();
if (encoding === 'utf8') return byteLengthUtf8(string);
if (encoding === 'ucs2') return len * 2;
break;
case 5:
if (encoding === 'utf-8') return byteLengthUtf8(string);
if (encoding === 'ascii') return len;
if (encoding === 'ucs-2') return len * 2;
encoding = encoding.toLowerCase();
if (encoding === 'utf-8') return byteLengthUtf8(string);
if (encoding === 'ascii') return len;
if (encoding === 'ucs-2') return len * 2;
break;
case 7:
if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
return len * 2;
break;
case 8:
if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
return len * 2;
break;
case 6:
if (encoding === 'latin1' || encoding === 'binary') return len;
if (encoding === 'base64') return base64ByteLength(string, len);
encoding = encoding.toLowerCase();
if (encoding === 'latin1' || encoding === 'binary') return len;
if (encoding === 'base64') return base64ByteLength(string, len);
break;
case 3:
if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
return len >>> 1;
break;
}
return (mustMatch ? -1 : byteLengthUtf8(string));
const ops = getEncodingOps(encoding);
if (ops === undefined)
return (mustMatch ? -1 : byteLengthUtf8(string));
return ops.byteLength(string);
}
Buffer.byteLength = byteLength;
@ -633,51 +729,6 @@ Object.defineProperty(Buffer.prototype, 'offset', {
}
});
function stringSlice(buf, encoding, start, end) {
if (encoding === undefined) return buf.utf8Slice(start, end);
encoding += '';
switch (encoding.length) {
case 4:
if (encoding === 'utf8') return buf.utf8Slice(start, end);
if (encoding === 'ucs2') return buf.ucs2Slice(start, end);
encoding = encoding.toLowerCase();
if (encoding === 'utf8') return buf.utf8Slice(start, end);
if (encoding === 'ucs2') return buf.ucs2Slice(start, end);
break;
case 5:
if (encoding === 'utf-8') return buf.utf8Slice(start, end);
if (encoding === 'ascii') return buf.asciiSlice(start, end);
if (encoding === 'ucs-2') return buf.ucs2Slice(start, end);
encoding = encoding.toLowerCase();
if (encoding === 'utf-8') return buf.utf8Slice(start, end);
if (encoding === 'ascii') return buf.asciiSlice(start, end);
if (encoding === 'ucs-2') return buf.ucs2Slice(start, end);
break;
case 6:
if (encoding === 'latin1' || encoding === 'binary')
return buf.latin1Slice(start, end);
if (encoding === 'base64') return buf.base64Slice(start, end);
encoding = encoding.toLowerCase();
if (encoding === 'latin1' || encoding === 'binary')
return buf.latin1Slice(start, end);
if (encoding === 'base64') return buf.base64Slice(start, end);
break;
case 3:
if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
return buf.hexSlice(start, end);
break;
case 7:
if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
return buf.ucs2Slice(start, end);
break;
case 8:
if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
return buf.ucs2Slice(start, end);
break;
}
throw new ERR_UNKNOWN_ENCODING(encoding);
}
Buffer.prototype.copy =
function copy(target, targetStart, sourceStart, sourceEnd) {
return _copy(this, target, targetStart, sourceStart, sourceEnd);
@ -708,7 +759,15 @@ Buffer.prototype.toString = function toString(encoding, start, end) {
if (end <= start)
return '';
return stringSlice(this, encoding, start, end);
if (encoding === undefined)
return this.utf8Slice(start, end);
const ops = getEncodingOps(encoding);
if (ops === undefined)
throw new ERR_UNKNOWN_ENCODING(encoding);
return ops.slice(this, start, end);
};
Buffer.prototype.equals = function equals(otherBuffer) {
@ -826,15 +885,25 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) {
}
dir = !!dir; // Cast to bool.
if (typeof val === 'string') {
if (encoding === undefined) {
return indexOfString(buffer, val, byteOffset, encoding, dir);
}
return slowIndexOf(buffer, val, byteOffset, encoding, dir);
} else if (isUint8Array(val)) {
return indexOfBuffer(buffer, val, byteOffset, encoding, dir);
} else if (typeof val === 'number') {
if (typeof val === 'number')
return indexOfNumber(buffer, val >>> 0, byteOffset, dir);
let ops;
if (encoding === undefined)
ops = encodingOps.utf8;
else
ops = getEncodingOps(encoding);
if (typeof val === 'string') {
if (ops === undefined)
throw new ERR_UNKNOWN_ENCODING(encoding);
return ops.indexOf(buffer, val, byteOffset, dir);
}
if (isUint8Array(val)) {
const encodingVal =
(ops === undefined ? encodingsMap.utf8 : ops.encodingVal);
return indexOfBuffer(buffer, val, byteOffset, encodingVal, dir);
}
throw new ERR_INVALID_ARG_TYPE(
@ -842,37 +911,6 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) {
);
}
function slowIndexOf(buffer, val, byteOffset, encoding, dir) {
let loweredCase = false;
for (;;) {
switch (encoding) {
case 'utf8':
case 'utf-8':
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
case 'latin1':
case 'binary':
return indexOfString(buffer, val, byteOffset, encoding, dir);
case 'base64':
case 'ascii':
case 'hex':
return indexOfBuffer(
buffer, Buffer.from(val, encoding), byteOffset, encoding, dir);
default:
if (loweredCase) {
throw new ERR_UNKNOWN_ENCODING(encoding);
}
encoding = ('' + encoding).toLowerCase();
loweredCase = true;
}
}
}
Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) {
return bidirectionalIndexOf(this, val, byteOffset, encoding, true);
};
@ -985,49 +1023,10 @@ Buffer.prototype.write = function write(string, offset, length, encoding) {
if (!encoding)
return this.utf8Write(string, offset, length);
encoding += '';
switch (encoding.length) {
case 4:
if (encoding === 'utf8') return this.utf8Write(string, offset, length);
if (encoding === 'ucs2') return this.ucs2Write(string, offset, length);
encoding = encoding.toLowerCase();
if (encoding === 'utf8') return this.utf8Write(string, offset, length);
if (encoding === 'ucs2') return this.ucs2Write(string, offset, length);
break;
case 5:
if (encoding === 'utf-8') return this.utf8Write(string, offset, length);
if (encoding === 'ascii') return this.asciiWrite(string, offset, length);
if (encoding === 'ucs-2') return this.ucs2Write(string, offset, length);
encoding = encoding.toLowerCase();
if (encoding === 'utf-8') return this.utf8Write(string, offset, length);
if (encoding === 'ascii') return this.asciiWrite(string, offset, length);
if (encoding === 'ucs-2') return this.ucs2Write(string, offset, length);
break;
case 7:
if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
return this.ucs2Write(string, offset, length);
break;
case 8:
if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
return this.ucs2Write(string, offset, length);
break;
case 6:
if (encoding === 'latin1' || encoding === 'binary')
return this.latin1Write(string, offset, length);
if (encoding === 'base64')
return this.base64Write(string, offset, length);
encoding = encoding.toLowerCase();
if (encoding === 'latin1' || encoding === 'binary')
return this.latin1Write(string, offset, length);
if (encoding === 'base64')
return this.base64Write(string, offset, length);
break;
case 3:
if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
return this.hexWrite(string, offset, length);
break;
}
throw new ERR_UNKNOWN_ENCODING(encoding);
const ops = getEncodingOps(encoding);
if (ops === undefined)
throw new ERR_UNKNOWN_ENCODING(encoding);
return ops.write(this, string, offset, length);
};
Buffer.prototype.toJSON = function toJSON() {

View File

@ -63,6 +63,7 @@ using v8::Context;
using v8::EscapableHandleScope;
using v8::FunctionCallbackInfo;
using v8::Global;
using v8::Int32;
using v8::Integer;
using v8::Isolate;
using v8::Just;
@ -446,11 +447,9 @@ namespace {
void CreateFromString(const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
CHECK(args[1]->IsString());
CHECK(args[1]->IsInt32());
enum encoding enc = ParseEncoding(args.GetIsolate(),
args[1].As<String>(),
UTF8);
enum encoding enc = static_cast<enum encoding>(args[1].As<Int32>()->Value());
Local<Object> buf;
if (New(args.GetIsolate(), args[0].As<String>(), enc).ToLocal(&buf))
args.GetReturnValue().Set(buf);
@ -786,9 +785,10 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
CHECK(args[1]->IsString());
CHECK(args[2]->IsNumber());
CHECK(args[3]->IsInt32());
CHECK(args[4]->IsBoolean());
enum encoding enc = ParseEncoding(isolate, args[3], UTF8);
enum encoding enc = static_cast<enum encoding>(args[3].As<Int32>()->Value());
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
ArrayBufferViewContents<char> buffer(args[0]);
@ -900,11 +900,10 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
CHECK(args[1]->IsObject());
CHECK(args[2]->IsNumber());
CHECK(args[3]->IsInt32());
CHECK(args[4]->IsBoolean());
enum encoding enc = ParseEncoding(args.GetIsolate(),
args[3],
UTF8);
enum encoding enc = static_cast<enum encoding>(args[3].As<Int32>()->Value());
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);