buffer: improve Buffer#fill performance

1) This improves the performance for Buffer#fill by using shortcuts. 2) It also ports throwing errors to JS. That way they contain the proper error code. 3) Using negative `end` values will from now on result in an error instead of just doing nothing. 4) Passing in `null` as encoding is from now on accepted as 'utf8'. PR-URL: https://github.com/nodejs/node/pull/18790 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
2018-02-14 23:42:57 +01:00 · 2018-02-14 23:42:57 +01:00 · 177b7314cf
commit 177b7314cf
parent d3af120f7a
6 changed files with 85 additions and 136 deletions
--- a/doc/api/buffer.md
+++ b/doc/api/buffer.md
@ -1202,6 +1202,9 @@ console.log(buf1.equals(buf3));
 <!-- YAML
 added: v0.5.0
 changes:
+  - version: REPLACEME
+    pr-url: https://github.com/nodejs/node/pull/18790
+    description: Negative `end` values throw an `ERR_INDEX_OUT_OF_RANGE` error.
  - version: REPLACEME
    pr-url: https://github.com/nodejs/node/pull/18129
    description: Attempting to fill a non-zero length buffer with a zero length
--- a/lib/buffer.js
+++ b/lib/buffer.js
@ -254,16 +254,8 @@ function assertSize(size) {
 **/
 Buffer.alloc = function alloc(size, fill, encoding) {
  assertSize(size);
-  if (size > 0 && fill !== undefined) {
-    // Since we are filling anyway, don't zero fill initially.
-    // Only pay attention to encoding if it's a string. This
-    // prevents accidentally sending in a number that would
-    // be interpreted as a start offset.
-    if (typeof encoding !== 'string')
-      encoding = undefined;
-    const ret = createUnsafeBuffer(size);
-    if (fill_(ret, fill, encoding) > 0)
-      return ret;
+  if (fill !== undefined && size > 0) {
+    return _fill(createUnsafeBuffer(size), fill, encoding);
  }
  return new FastBuffer(size);
 };
@ -834,14 +826,12 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
 //    buffer.fill(buffer[, offset[, end]])
 //    buffer.fill(string[, offset[, end]][, encoding])
 Buffer.prototype.fill = function fill(val, start, end, encoding) {
-  fill_(this, val, start, end, encoding);
-  return this;
+  return _fill(this, val, start, end, encoding);
 };

-function fill_(buf, val, start, end, encoding) {
-  // Handle string cases:
+function _fill(buf, val, start, end, encoding) {
  if (typeof val === 'string') {
-    if (typeof start === 'string') {
+    if (start === undefined || typeof start === 'string') {
      encoding = start;
      start = 0;
      end = buf.length;
@ -850,46 +840,60 @@ function fill_(buf, val, start, end, encoding) {
      end = buf.length;
    }

-    if (encoding !== undefined && typeof encoding !== 'string') {
-      throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'encoding',
-                                 'string', encoding);
-    }
-    var normalizedEncoding = normalizeEncoding(encoding);
+    const normalizedEncoding = normalizeEncoding(encoding);
    if (normalizedEncoding === undefined) {
+      if (typeof encoding !== 'string') {
+        throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'encoding', 'string',
+                                   encoding);
+      }
      throw new errors.TypeError('ERR_UNKNOWN_ENCODING', encoding);
    }

    if (val.length === 0) {
-      // Previously, if val === '', the Buffer would not fill,
-      // which is rather surprising.
+      // If val === '' default to zero.
      val = 0;
    } else if (val.length === 1) {
-      var code = val.charCodeAt(0);
-      if ((normalizedEncoding === 'utf8' && code < 128) ||
-          normalizedEncoding === 'latin1') {
      // Fast path: If `val` fits into a single byte, use that numeric value.
+      if (normalizedEncoding === 'utf8') {
+        const code = val.charCodeAt(0);
+        if (code < 128) {
          val = code;
        }
+      } else if (normalizedEncoding === 'latin1') {
+        val = val.charCodeAt(0);
      }
-  } else if (typeof val === 'number') {
-    val = val & 255;
+    }
+  } else {
+    encoding = undefined;
  }

+  if (start === undefined) {
+    start = 0;
+    end = buf.length;
+  } else {
    // Invalid ranges are not set to a default, so can range check early.
-  if (start < 0 || end > buf.length)
+    if (end === undefined) {
+      if (start < 0)
        throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
-
-  if (end <= start)
-    return 0;
-
+      end = buf.length;
+    } else {
+      if (start < 0 || end > buf.length || end < 0)
+        throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
+      end = end >>> 0;
+    }
    start = start >>> 0;
-  end = end === undefined ? buf.length : end >>> 0;
-  const fillLength = bindingFill(buf, val, start, end, encoding);
+    if (start >= end)
+      return buf;
+  }

-  if (fillLength === -1)
+  const res = bindingFill(buf, val, start, end, encoding);
+  if (res < 0) {
+    if (res === -1)
      throw new errors.TypeError('ERR_INVALID_ARG_VALUE', 'value', val);
+    throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
+  }

-  return fillLength;
+  return buf;
 }


--- a/lib/internal/util.js
+++ b/lib/internal/util.js
@ -102,7 +102,7 @@ function assertCrypto() {
 // function in order to avoid the performance hit.
 // Return undefined if there is no match.
 function normalizeEncoding(enc) {
-  if (!enc) return 'utf8';
+  if (enc == null || enc === '') return 'utf8';
  let retried;
  while (true) {
    switch (enc) {
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@ -571,10 +571,10 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
  Local<String> str_obj;
  size_t str_length;
  enum encoding enc;
-  THROW_AND_RETURN_IF_OOB(start <= end);
-  THROW_AND_RETURN_IF_OOB(fill_length + start <= ts_obj_length);

-  args.GetReturnValue().Set(static_cast<double>(fill_length));
+  // OOB Check. Throw the error in JS.
+  if (start > end || fill_length + start > ts_obj_length)
+    return args.GetReturnValue().Set(-2);

  // First check if Buffer has been passed.
  if (Buffer::HasInstance(args[1])) {
@ -593,22 +593,16 @@ void Fill(const FunctionCallbackInfo<Value>& args) {

  str_obj = args[1]->ToString(env->context()).ToLocalChecked();
  enc = ParseEncoding(env->isolate(), args[4], UTF8);
-  str_length =
-      enc == UTF8 ? str_obj->Utf8Length() :
-      enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();
-
-  if (str_length == 0) {
-    args.GetReturnValue().Set(0);
-    return;
-  }

  // Can't use StringBytes::Write() in all cases. For example if attempting
  // to write a two byte character into a one byte Buffer.
  if (enc == UTF8) {
+    str_length = str_obj->Utf8Length();
    node::Utf8Value str(env->isolate(), args[1]);
    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));

  } else if (enc == UCS2) {
+    str_length = str_obj->Length() * sizeof(uint16_t);
    node::TwoByteValue str(env->isolate(), args[1]);
    if (IsBigEndian())
      SwapBytes16(reinterpret_cast<char*>(&str[0]), str_length);
@ -616,6 +610,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));

  } else {
+    str_length = str_obj->Length();
    // Write initial String to Buffer, then use that memory to copy remainder
    // of string. Correct the string length for cases like HEX where less than
    // the total string length is written.
--- a/test/parallel/test-buffer-fill.js
+++ b/test/parallel/test-buffer-fill.js
@ -19,16 +19,12 @@ testBufs('\u0222aa', 2);
 testBufs('\u0222aa', 8);
 testBufs('a\u0234b\u0235c\u0236', 4);
 testBufs('a\u0234b\u0235c\u0236', 12);
-testBufs('abc', 4, -1);
 testBufs('abc', 4, 1);
 testBufs('abc', 5, 1);
-testBufs('\u0222aa', 2, -1);
 testBufs('\u0222aa', 8, 1);
-testBufs('a\u0234b\u0235c\u0236', 4, -1);
 testBufs('a\u0234b\u0235c\u0236', 4, 1);
 testBufs('a\u0234b\u0235c\u0236', 12, 1);

-
 // UTF8
 testBufs('abc', 'utf8');
 testBufs('\u0222aa', 'utf8');
@ -40,17 +36,13 @@ testBufs('\u0222aa', 2, 'utf8');
 testBufs('\u0222aa', 8, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 4, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 12, 'utf8');
-testBufs('abc', 4, -1, 'utf8');
 testBufs('abc', 4, 1, 'utf8');
 testBufs('abc', 5, 1, 'utf8');
-testBufs('\u0222aa', 2, -1, 'utf8');
 testBufs('\u0222aa', 8, 1, 'utf8');
-testBufs('a\u0234b\u0235c\u0236', 4, -1, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 4, 1, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 12, 1, 'utf8');
 assert.strictEqual(Buffer.allocUnsafe(1).fill(0).fill('\u0222')[0], 0xc8);

-
 // BINARY
 testBufs('abc', 'binary');
 testBufs('\u0222aa', 'binary');
@ -62,16 +54,12 @@ testBufs('\u0222aa', 2, 'binary');
 testBufs('\u0222aa', 8, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 4, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 12, 'binary');
-testBufs('abc', 4, -1, 'binary');
 testBufs('abc', 4, 1, 'binary');
 testBufs('abc', 5, 1, 'binary');
-testBufs('\u0222aa', 2, -1, 'binary');
 testBufs('\u0222aa', 8, 1, 'binary');
-testBufs('a\u0234b\u0235c\u0236', 4, -1, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 4, 1, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 12, 1, 'binary');

-
 // LATIN1
 testBufs('abc', 'latin1');
 testBufs('\u0222aa', 'latin1');
@ -83,16 +71,12 @@ testBufs('\u0222aa', 2, 'latin1');
 testBufs('\u0222aa', 8, 'latin1');
 testBufs('a\u0234b\u0235c\u0236', 4, 'latin1');
 testBufs('a\u0234b\u0235c\u0236', 12, 'latin1');
-testBufs('abc', 4, -1, 'latin1');
 testBufs('abc', 4, 1, 'latin1');
 testBufs('abc', 5, 1, 'latin1');
-testBufs('\u0222aa', 2, -1, 'latin1');
 testBufs('\u0222aa', 8, 1, 'latin1');
-testBufs('a\u0234b\u0235c\u0236', 4, -1, 'latin1');
 testBufs('a\u0234b\u0235c\u0236', 4, 1, 'latin1');
 testBufs('a\u0234b\u0235c\u0236', 12, 1, 'latin1');

-
 // UCS2
 testBufs('abc', 'ucs2');
 testBufs('\u0222aa', 'ucs2');
@ -103,17 +87,13 @@ testBufs('\u0222aa', 2, 'ucs2');
 testBufs('\u0222aa', 8, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 4, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 12, 'ucs2');
-testBufs('abc', 4, -1, 'ucs2');
 testBufs('abc', 4, 1, 'ucs2');
 testBufs('abc', 5, 1, 'ucs2');
-testBufs('\u0222aa', 2, -1, 'ucs2');
 testBufs('\u0222aa', 8, 1, 'ucs2');
-testBufs('a\u0234b\u0235c\u0236', 4, -1, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 4, 1, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 12, 1, 'ucs2');
 assert.strictEqual(Buffer.allocUnsafe(1).fill('\u0222', 'ucs2')[0], 0x22);

-
 // HEX
 testBufs('616263', 'hex');
 testBufs('c8a26161', 'hex');
@ -125,12 +105,9 @@ testBufs('c8a26161', 2, 'hex');
 testBufs('c8a26161', 8, 'hex');
 testBufs('61c8b462c8b563c8b6', 4, 'hex');
 testBufs('61c8b462c8b563c8b6', 12, 'hex');
-testBufs('616263', 4, -1, 'hex');
 testBufs('616263', 4, 1, 'hex');
 testBufs('616263', 5, 1, 'hex');
-testBufs('c8a26161', 2, -1, 'hex');
 testBufs('c8a26161', 8, 1, 'hex');
-testBufs('61c8b462c8b563c8b6', 4, -1, 'hex');
 testBufs('61c8b462c8b563c8b6', 4, 1, 'hex');
 testBufs('61c8b462c8b563c8b6', 12, 1, 'hex');

@ -162,16 +139,12 @@ testBufs('yKJhYQ==', 2, 'ucs2');
 testBufs('yKJhYQ==', 8, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 4, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 12, 'ucs2');
-testBufs('YWJj', 4, -1, 'ucs2');
 testBufs('YWJj', 4, 1, 'ucs2');
 testBufs('YWJj', 5, 1, 'ucs2');
-testBufs('yKJhYQ==', 2, -1, 'ucs2');
 testBufs('yKJhYQ==', 8, 1, 'ucs2');
-testBufs('Yci0Ysi1Y8i2', 4, -1, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 4, 1, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 12, 1, 'ucs2');

-
 // Buffer
 function deepStrictEqualValues(buf, arr) {
  for (const [index, value] of buf.entries()) {
@ -179,27 +152,24 @@ function deepStrictEqualValues(buf, arr) {
  }
 }

-
 const buf2Fill = Buffer.allocUnsafe(1).fill(2);
 deepStrictEqualValues(genBuffer(4, [buf2Fill]), [2, 2, 2, 2]);
 deepStrictEqualValues(genBuffer(4, [buf2Fill, 1]), [0, 2, 2, 2]);
 deepStrictEqualValues(genBuffer(4, [buf2Fill, 1, 3]), [0, 2, 2, 0]);
 deepStrictEqualValues(genBuffer(4, [buf2Fill, 1, 1]), [0, 0, 0, 0]);
-deepStrictEqualValues(genBuffer(4, [buf2Fill, 1, -1]), [0, 0, 0, 0]);
 const hexBufFill = Buffer.allocUnsafe(2).fill(0).fill('0102', 'hex');
 deepStrictEqualValues(genBuffer(4, [hexBufFill]), [1, 2, 1, 2]);
 deepStrictEqualValues(genBuffer(4, [hexBufFill, 1]), [0, 1, 2, 1]);
 deepStrictEqualValues(genBuffer(4, [hexBufFill, 1, 3]), [0, 1, 2, 0]);
 deepStrictEqualValues(genBuffer(4, [hexBufFill, 1, 1]), [0, 0, 0, 0]);
-deepStrictEqualValues(genBuffer(4, [hexBufFill, 1, -1]), [0, 0, 0, 0]);
-

 // Check exceptions
 [
  [0, -1],
  [0, 0, buf1.length + 1],
  ['', -1],
-  ['', 0, buf1.length + 1]
+  ['', 0, buf1.length + 1],
+  ['', 1, -1],
 ].forEach((args) => {
  common.expectsError(
    () => buf1.fill(...args),
@ -218,7 +188,7 @@ common.expectsError(

 [
  ['a', 0, 0, NaN],
-  ['a', 0, 0, null]
+  ['a', 0, 0, false]
 ].forEach((args) => {
  common.expectsError(
    () => buf1.fill(...args),
@ -356,42 +326,10 @@ Buffer.alloc(8, '');
  assert.strictEqual(buf.toString(), 'էէէէէ');
 }

-// Testing public API. Make sure "start" is properly checked, even if it's
-// magically mangled using Symbol.toPrimitive.
-{
-  let elseWasLast = false;
-  common.expectsError(() => {
-    let ctr = 0;
-    const start = {
-      [Symbol.toPrimitive]() {
-        // We use this condition to get around the check in lib/buffer.js
-        if (ctr <= 0) {
-          elseWasLast = false;
-          ctr = ctr + 1;
-          return 0;
-        } else {
-          elseWasLast = true;
-          // Once buffer.js calls the C++ implementation of fill, return -1
-          return -1;
-        }
-      }
-    };
-    Buffer.alloc(1).fill(Buffer.alloc(1), start, 1);
-  }, {
-    code: undefined, type: RangeError, message: 'Index out of range'
-  });
-  // Make sure -1 is making it to Buffer::Fill().
-  assert.ok(elseWasLast,
-            'internal API changed, -1 no longer in correct location');
-}
-
 // Testing process.binding. Make sure "start" is properly checked for -1 wrap
 // around.
-common.expectsError(() => {
-  process.binding('buffer').fill(Buffer.alloc(1), 1, -1, 0, 1);
-}, {
-  code: undefined, type: RangeError, message: 'Index out of range'
-});
+assert.strictEqual(
+  process.binding('buffer').fill(Buffer.alloc(1), 1, -1, 0, 1), -2);

 // Make sure "end" is properly checked, even if it's magically mangled using
 // Symbol.toPrimitive.
@ -402,20 +340,21 @@ common.expectsError(() => {
    const end = {
      [Symbol.toPrimitive]() {
        // We use this condition to get around the check in lib/buffer.js
-        if (ctr <= 1) {
+        if (ctr === 0) {
          elseWasLast = false;
-          ctr = ctr + 1;
+          ctr++;
          return 1;
-        } else {
+        }
        elseWasLast = true;
        // Once buffer.js calls the C++ implementation of fill, return -1
        return -1;
      }
-      }
    };
    Buffer.alloc(1).fill(Buffer.alloc(1), 0, end);
  }, {
-    code: undefined, type: RangeError, message: 'Index out of range'
+    code: 'ERR_INDEX_OUT_OF_RANGE',
+    type: RangeError,
+    message: 'Index out of range'
  });
  // Make sure -1 is making it to Buffer::Fill().
  assert.ok(elseWasLast,
@ -424,9 +363,8 @@ common.expectsError(() => {

 // Testing process.binding. Make sure "end" is properly checked for -1 wrap
 // around.
-common.expectsError(() => {
-  process.binding('buffer').fill(Buffer.alloc(1), 1, 1, -2, 1);
-}, { code: undefined, type: RangeError, message: 'Index out of range' });
+assert.strictEqual(
+  process.binding('buffer').fill(Buffer.alloc(1), 1, 1, -2, 1), -2);

 // Test that bypassing 'length' won't cause an abort.
 common.expectsError(() => {
@ -436,7 +374,11 @@ common.expectsError(() => {
    enumerable: true
  });
  buf.fill('');
-}, { code: undefined, type: RangeError, message: 'Index out of range' });
+}, {
+  code: 'ERR_INDEX_OUT_OF_RANGE',
+  type: RangeError,
+  message: 'Index out of range'
+});

 assert.deepStrictEqual(
  Buffer.allocUnsafeSlow(16).fill('ab', 'utf16le'),
--- a/test/parallel/test-internal-util-normalizeencoding.js
+++ b/test/parallel/test-internal-util-normalizeencoding.js
@ -6,6 +6,8 @@ const assert = require('assert');
 const util = require('internal/util');

 const tests = [
+  [undefined, 'utf8'],
+  [null, 'utf8'],
  ['', 'utf8'],
  ['utf8', 'utf8'],
  ['utf-8', 'utf8'],
@ -23,17 +25,20 @@ const tests = [
  ['binary', 'latin1'],
  ['BINARY', 'latin1'],
  ['latin1', 'latin1'],
+  ['LaTiN1', 'latin1'],
  ['base64', 'base64'],
  ['BASE64', 'base64'],
  ['hex', 'hex'],
  ['HEX', 'hex'],
  ['foo', undefined],
  [1, undefined],
-  [false, 'utf8'],
-  [undefined, 'utf8'],
+  [false, undefined],
+  [NaN, undefined],
+  [0, undefined],
  [[], undefined],
 ];

-tests.forEach((i) => {
-  assert.strictEqual(util.normalizeEncoding(i[0]), i[1]);
+tests.forEach((e, i) => {
+  const res = util.normalizeEncoding(e[0]);
+  assert.strictEqual(res, e[1], `#${i} failed: expected ${e[1]}, got ${res}`);
 });