buffer: consolidate encoding parsing

PR-URL: https://github.com/nodejs/node/pull/29217 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Anna Henningsen <anna@addaleax.net>
2019-08-19 23:25:28 -04:00 · 2019-08-19 23:25:28 -04:00 · bb1af6c82f
commit bb1af6c82f
parent 167960f3ca
2 changed files with 189 additions and 191 deletions
--- a/lib/buffer.js
+++ b/lib/buffer.js
@ -60,6 +60,8 @@ const {
 const {
  inspect: utilInspect
 } = require('internal/util/inspect');
+const { encodings } = internalBinding('string_decoder');
+

 const {
  codes: {
@ -109,6 +111,10 @@ let poolSize, poolOffset, allocPool;
 // do not own the ArrayBuffer allocator.  Zero fill is always on in that case.
 const zeroFill = bindingZeroFill || [0];

+const encodingsMap = Object.create(null);
+for (let i = 0; i < encodings.length; ++i)
+  encodingsMap[encodings[i]] = i;
+
 function createUnsafeBuffer(size) {
  zeroFill[0] = 0;
  try {
@ -376,28 +382,16 @@ function allocate(size) {
  return createUnsafeBuffer(size);
 }

-function fromString(string, encoding) {
-  let length;
-  if (typeof encoding !== 'string' || encoding.length === 0) {
-    if (string.length === 0)
-      return new FastBuffer();
-    encoding = 'utf8';
-    length = byteLengthUtf8(string);
-  } else {
-    length = byteLength(string, encoding, true);
-    if (length === -1)
-      throw new ERR_UNKNOWN_ENCODING(encoding);
-    if (string.length === 0)
-      return new FastBuffer();
-  }
+function fromStringFast(string, ops) {
+  const length = ops.byteLength(string);

  if (length >= (Buffer.poolSize >>> 1))
-    return createFromString(string, encoding);
+    return createFromString(string, ops.encodingVal);

  if (length > (poolSize - poolOffset))
    createPool();
  let b = new FastBuffer(allocPool, poolOffset, length);
-  const actual = b.write(string, encoding);
+  const actual = ops.write(b, string, 0, length);
  if (actual !== length) {
    // byteLength() may overestimate. That's a rare case, though.
    b = new FastBuffer(allocPool, poolOffset, actual);
@ -407,6 +401,23 @@ function fromString(string, encoding) {
  return b;
 }

+function fromString(string, encoding) {
+  let ops;
+  if (typeof encoding !== 'string' || encoding.length === 0) {
+    if (string.length === 0)
+      return new FastBuffer();
+    ops = encodingOps.utf8;
+    encoding = undefined;
+  } else {
+    ops = getEncodingOps(encoding);
+    if (ops === undefined)
+      throw new ERR_UNKNOWN_ENCODING(encoding);
+    if (string.length === 0)
+      return new FastBuffer();
+  }
+  return fromStringFast(string, ops);
+}
+
 function fromArrayLike(obj) {
  const length = obj.length;
  const b = allocate(length);
@ -553,6 +564,126 @@ function base64ByteLength(str, bytes) {
  return (bytes * 3) >>> 2;
 }

+const encodingOps = {
+  utf8: {
+    encoding: 'utf8',
+    encodingVal: encodingsMap.utf8,
+    byteLength: byteLengthUtf8,
+    write: (buf, string, offset, len) => buf.utf8Write(string, offset, len),
+    slice: (buf, start, end) => buf.utf8Slice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir)
+  },
+  ucs2: {
+    encoding: 'ucs2',
+    encodingVal: encodingsMap.utf16le,
+    byteLength: (string) => string.length * 2,
+    write: (buf, string, offset, len) => buf.ucs2Write(string, offset, len),
+    slice: (buf, start, end) => buf.ucs2Slice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir)
+  },
+  utf16le: {
+    encoding: 'utf16le',
+    encodingVal: encodingsMap.utf16le,
+    byteLength: (string) => string.length * 2,
+    write: (buf, string, offset, len) => buf.ucs2Write(string, offset, len),
+    slice: (buf, start, end) => buf.ucs2Slice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir)
+  },
+  latin1: {
+    encoding: 'latin1',
+    encodingVal: encodingsMap.latin1,
+    byteLength: (string) => string.length,
+    write: (buf, string, offset, len) => buf.latin1Write(string, offset, len),
+    slice: (buf, start, end) => buf.latin1Slice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir)
+  },
+  ascii: {
+    encoding: 'ascii',
+    encodingVal: encodingsMap.ascii,
+    byteLength: (string) => string.length,
+    write: (buf, string, offset, len) => buf.asciiWrite(string, offset, len),
+    slice: (buf, start, end) => buf.asciiSlice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfBuffer(buf,
+                    fromStringFast(val, encodingOps.ascii),
+                    byteOffset,
+                    encodingsMap.ascii,
+                    dir)
+  },
+  base64: {
+    encoding: 'base64',
+    encodingVal: encodingsMap.base64,
+    byteLength: (string) => base64ByteLength(string, string.length),
+    write: (buf, string, offset, len) => buf.base64Write(string, offset, len),
+    slice: (buf, start, end) => buf.base64Slice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfBuffer(buf,
+                    fromStringFast(val, encodingOps.base64),
+                    byteOffset,
+                    encodingsMap.base64,
+                    dir)
+  },
+  hex: {
+    encoding: 'hex',
+    encodingVal: encodingsMap.hex,
+    byteLength: (string) => string.length >>> 1,
+    write: (buf, string, offset, len) => buf.hexWrite(string, offset, len),
+    slice: (buf, start, end) => buf.hexSlice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfBuffer(buf,
+                    fromStringFast(val, encodingOps.hex),
+                    byteOffset,
+                    encodingsMap.hex,
+                    dir)
+  }
+};
+function getEncodingOps(encoding) {
+  encoding += '';
+  switch (encoding.length) {
+    case 4:
+      if (encoding === 'utf8') return encodingOps.utf8;
+      if (encoding === 'ucs2') return encodingOps.ucs2;
+      encoding = encoding.toLowerCase();
+      if (encoding === 'utf8') return encodingOps.utf8;
+      if (encoding === 'ucs2') return encodingOps.ucs2;
+      break;
+    case 5:
+      if (encoding === 'utf-8') return encodingOps.utf8;
+      if (encoding === 'ascii') return encodingOps.ascii;
+      if (encoding === 'ucs-2') return encodingOps.ucs2;
+      encoding = encoding.toLowerCase();
+      if (encoding === 'utf-8') return encodingOps.utf8;
+      if (encoding === 'ascii') return encodingOps.ascii;
+      if (encoding === 'ucs-2') return encodingOps.ucs2;
+      break;
+    case 7:
+      if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
+        return encodingOps.utf16le;
+      break;
+    case 8:
+      if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
+        return encodingOps.utf16le;
+      break;
+    case 6:
+      if (encoding === 'latin1' || encoding === 'binary')
+        return encodingOps.latin1;
+      if (encoding === 'base64') return encodingOps.base64;
+      encoding = encoding.toLowerCase();
+      if (encoding === 'latin1' || encoding === 'binary')
+        return encodingOps.latin1;
+      if (encoding === 'base64') return encodingOps.base64;
+      break;
+    case 3:
+      if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
+        return encodingOps.hex;
+      break;
+  }
+}
+
 function byteLength(string, encoding) {
  if (typeof string !== 'string') {
    if (isArrayBufferView(string) || isAnyArrayBuffer(string)) {
@ -572,45 +703,10 @@ function byteLength(string, encoding) {
  if (!encoding)
    return (mustMatch ? -1 : byteLengthUtf8(string));

-  encoding += '';
-  switch (encoding.length) {
-    case 4:
-      if (encoding === 'utf8') return byteLengthUtf8(string);
-      if (encoding === 'ucs2') return len * 2;
-      encoding = encoding.toLowerCase();
-      if (encoding === 'utf8') return byteLengthUtf8(string);
-      if (encoding === 'ucs2') return len * 2;
-      break;
-    case 5:
-      if (encoding === 'utf-8') return byteLengthUtf8(string);
-      if (encoding === 'ascii') return len;
-      if (encoding === 'ucs-2') return len * 2;
-      encoding = encoding.toLowerCase();
-      if (encoding === 'utf-8') return byteLengthUtf8(string);
-      if (encoding === 'ascii') return len;
-      if (encoding === 'ucs-2') return len * 2;
-      break;
-    case 7:
-      if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
-        return len * 2;
-      break;
-    case 8:
-      if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
-        return len * 2;
-      break;
-    case 6:
-      if (encoding === 'latin1' || encoding === 'binary') return len;
-      if (encoding === 'base64') return base64ByteLength(string, len);
-      encoding = encoding.toLowerCase();
-      if (encoding === 'latin1' || encoding === 'binary') return len;
-      if (encoding === 'base64') return base64ByteLength(string, len);
-      break;
-    case 3:
-      if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
-        return len >>> 1;
-      break;
-  }
-  return (mustMatch ? -1 : byteLengthUtf8(string));
+  const ops = getEncodingOps(encoding);
+  if (ops === undefined)
+    return (mustMatch ? -1 : byteLengthUtf8(string));
+  return ops.byteLength(string);
 }

 Buffer.byteLength = byteLength;
@ -633,51 +729,6 @@ Object.defineProperty(Buffer.prototype, 'offset', {
  }
 });

-function stringSlice(buf, encoding, start, end) {
-  if (encoding === undefined) return buf.utf8Slice(start, end);
-  encoding += '';
-  switch (encoding.length) {
-    case 4:
-      if (encoding === 'utf8') return buf.utf8Slice(start, end);
-      if (encoding === 'ucs2') return buf.ucs2Slice(start, end);
-      encoding = encoding.toLowerCase();
-      if (encoding === 'utf8') return buf.utf8Slice(start, end);
-      if (encoding === 'ucs2') return buf.ucs2Slice(start, end);
-      break;
-    case 5:
-      if (encoding === 'utf-8') return buf.utf8Slice(start, end);
-      if (encoding === 'ascii') return buf.asciiSlice(start, end);
-      if (encoding === 'ucs-2') return buf.ucs2Slice(start, end);
-      encoding = encoding.toLowerCase();
-      if (encoding === 'utf-8') return buf.utf8Slice(start, end);
-      if (encoding === 'ascii') return buf.asciiSlice(start, end);
-      if (encoding === 'ucs-2') return buf.ucs2Slice(start, end);
-      break;
-    case 6:
-      if (encoding === 'latin1' || encoding === 'binary')
-        return buf.latin1Slice(start, end);
-      if (encoding === 'base64') return buf.base64Slice(start, end);
-      encoding = encoding.toLowerCase();
-      if (encoding === 'latin1' || encoding === 'binary')
-        return buf.latin1Slice(start, end);
-      if (encoding === 'base64') return buf.base64Slice(start, end);
-      break;
-    case 3:
-      if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
-        return buf.hexSlice(start, end);
-      break;
-    case 7:
-      if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
-        return buf.ucs2Slice(start, end);
-      break;
-    case 8:
-      if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
-        return buf.ucs2Slice(start, end);
-      break;
-  }
-  throw new ERR_UNKNOWN_ENCODING(encoding);
-}
-
 Buffer.prototype.copy =
  function copy(target, targetStart, sourceStart, sourceEnd) {
    return _copy(this, target, targetStart, sourceStart, sourceEnd);
@ -708,7 +759,15 @@ Buffer.prototype.toString = function toString(encoding, start, end) {

  if (end <= start)
    return '';
-  return stringSlice(this, encoding, start, end);
+
+  if (encoding === undefined)
+    return this.utf8Slice(start, end);
+
+  const ops = getEncodingOps(encoding);
+  if (ops === undefined)
+    throw new ERR_UNKNOWN_ENCODING(encoding);
+
+  return ops.slice(this, start, end);
 };

 Buffer.prototype.equals = function equals(otherBuffer) {
@ -826,15 +885,25 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) {
  }
  dir = !!dir;  // Cast to bool.

-  if (typeof val === 'string') {
-    if (encoding === undefined) {
-      return indexOfString(buffer, val, byteOffset, encoding, dir);
-    }
-    return slowIndexOf(buffer, val, byteOffset, encoding, dir);
-  } else if (isUint8Array(val)) {
-    return indexOfBuffer(buffer, val, byteOffset, encoding, dir);
-  } else if (typeof val === 'number') {
+  if (typeof val === 'number')
    return indexOfNumber(buffer, val >>> 0, byteOffset, dir);
+
+  let ops;
+  if (encoding === undefined)
+    ops = encodingOps.utf8;
+  else
+    ops = getEncodingOps(encoding);
+
+  if (typeof val === 'string') {
+    if (ops === undefined)
+      throw new ERR_UNKNOWN_ENCODING(encoding);
+    return ops.indexOf(buffer, val, byteOffset, dir);
+  }
+
+  if (isUint8Array(val)) {
+    const encodingVal =
+      (ops === undefined ? encodingsMap.utf8 : ops.encodingVal);
+    return indexOfBuffer(buffer, val, byteOffset, encodingVal, dir);
  }

  throw new ERR_INVALID_ARG_TYPE(
@ -842,37 +911,6 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) {
  );
 }

-function slowIndexOf(buffer, val, byteOffset, encoding, dir) {
-  let loweredCase = false;
-  for (;;) {
-    switch (encoding) {
-      case 'utf8':
-      case 'utf-8':
-      case 'ucs2':
-      case 'ucs-2':
-      case 'utf16le':
-      case 'utf-16le':
-      case 'latin1':
-      case 'binary':
-        return indexOfString(buffer, val, byteOffset, encoding, dir);
-
-      case 'base64':
-      case 'ascii':
-      case 'hex':
-        return indexOfBuffer(
-          buffer, Buffer.from(val, encoding), byteOffset, encoding, dir);
-
-      default:
-        if (loweredCase) {
-          throw new ERR_UNKNOWN_ENCODING(encoding);
-        }
-
-        encoding = ('' + encoding).toLowerCase();
-        loweredCase = true;
-    }
-  }
-}
-
 Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) {
  return bidirectionalIndexOf(this, val, byteOffset, encoding, true);
 };
@ -985,49 +1023,10 @@ Buffer.prototype.write = function write(string, offset, length, encoding) {
  if (!encoding)
    return this.utf8Write(string, offset, length);

-  encoding += '';
-  switch (encoding.length) {
-    case 4:
-      if (encoding === 'utf8') return this.utf8Write(string, offset, length);
-      if (encoding === 'ucs2') return this.ucs2Write(string, offset, length);
-      encoding = encoding.toLowerCase();
-      if (encoding === 'utf8') return this.utf8Write(string, offset, length);
-      if (encoding === 'ucs2') return this.ucs2Write(string, offset, length);
-      break;
-    case 5:
-      if (encoding === 'utf-8') return this.utf8Write(string, offset, length);
-      if (encoding === 'ascii') return this.asciiWrite(string, offset, length);
-      if (encoding === 'ucs-2') return this.ucs2Write(string, offset, length);
-      encoding = encoding.toLowerCase();
-      if (encoding === 'utf-8') return this.utf8Write(string, offset, length);
-      if (encoding === 'ascii') return this.asciiWrite(string, offset, length);
-      if (encoding === 'ucs-2') return this.ucs2Write(string, offset, length);
-      break;
-    case 7:
-      if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
-        return this.ucs2Write(string, offset, length);
-      break;
-    case 8:
-      if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
-        return this.ucs2Write(string, offset, length);
-      break;
-    case 6:
-      if (encoding === 'latin1' || encoding === 'binary')
-        return this.latin1Write(string, offset, length);
-      if (encoding === 'base64')
-        return this.base64Write(string, offset, length);
-      encoding = encoding.toLowerCase();
-      if (encoding === 'latin1' || encoding === 'binary')
-        return this.latin1Write(string, offset, length);
-      if (encoding === 'base64')
-        return this.base64Write(string, offset, length);
-      break;
-    case 3:
-      if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
-        return this.hexWrite(string, offset, length);
-      break;
-  }
-  throw new ERR_UNKNOWN_ENCODING(encoding);
+  const ops = getEncodingOps(encoding);
+  if (ops === undefined)
+    throw new ERR_UNKNOWN_ENCODING(encoding);
+  return ops.write(this, string, offset, length);
 };

 Buffer.prototype.toJSON = function toJSON() {
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@ -63,6 +63,7 @@ using v8::Context;
 using v8::EscapableHandleScope;
 using v8::FunctionCallbackInfo;
 using v8::Global;
+using v8::Int32;
 using v8::Integer;
 using v8::Isolate;
 using v8::Just;
@ -446,11 +447,9 @@ namespace {

 void CreateFromString(const FunctionCallbackInfo<Value>& args) {
  CHECK(args[0]->IsString());
-  CHECK(args[1]->IsString());
+  CHECK(args[1]->IsInt32());

-  enum encoding enc = ParseEncoding(args.GetIsolate(),
-                                    args[1].As<String>(),
-                                    UTF8);
+  enum encoding enc = static_cast<enum encoding>(args[1].As<Int32>()->Value());
  Local<Object> buf;
  if (New(args.GetIsolate(), args[0].As<String>(), enc).ToLocal(&buf))
    args.GetReturnValue().Set(buf);
@ -786,9 +785,10 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {

  CHECK(args[1]->IsString());
  CHECK(args[2]->IsNumber());
+  CHECK(args[3]->IsInt32());
  CHECK(args[4]->IsBoolean());

-  enum encoding enc = ParseEncoding(isolate, args[3], UTF8);
+  enum encoding enc = static_cast<enum encoding>(args[3].As<Int32>()->Value());

  THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
  ArrayBufferViewContents<char> buffer(args[0]);
@ -900,11 +900,10 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
 void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
  CHECK(args[1]->IsObject());
  CHECK(args[2]->IsNumber());
+  CHECK(args[3]->IsInt32());
  CHECK(args[4]->IsBoolean());

-  enum encoding enc = ParseEncoding(args.GetIsolate(),
-                                    args[3],
-                                    UTF8);
+  enum encoding enc = static_cast<enum encoding>(args[3].As<Int32>()->Value());

  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);