buffer: optimize Buffer.byteLength
Buffer.byteLength is important for speed because it is called whenever a new Buffer is created from a string. This commit optimizes Buffer.byteLength execution by: - moving base64 length calculation into JS-land, which is now much faster - remove redundant code and streamline the UTF8 length calculation It also adds a benchmark and better tests. PR-URL: https://github.com/nodejs/io.js/pull/1713 Reviewed-By: Trevor Norris <trev.norris@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
This commit is contained in:
parent
2a71f02988
commit
9da168b71f
55
benchmark/buffers/buffer-bytelength.js
Normal file
55
benchmark/buffers/buffer-bytelength.js
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
var common = require('../common');
|
||||||
|
|
||||||
|
var bench = common.createBenchmark(main, {
|
||||||
|
encoding: ['utf8', 'base64'],
|
||||||
|
len: [1, 2, 4, 16, 64, 256], // x16
|
||||||
|
n: [5e6]
|
||||||
|
});
|
||||||
|
|
||||||
|
// 16 chars each
|
||||||
|
var chars = [
|
||||||
|
'hello brendan!!!', // 1 byte
|
||||||
|
'ΰαβγδεζηθικλμνξο', // 2 bytes
|
||||||
|
'挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿', // 3 bytes
|
||||||
|
'𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢' // 4 bytes
|
||||||
|
];
|
||||||
|
|
||||||
|
function main(conf) {
|
||||||
|
var n = conf.n | 0;
|
||||||
|
var len = conf.len | 0;
|
||||||
|
var encoding = conf.encoding;
|
||||||
|
|
||||||
|
var strings = [];
|
||||||
|
for (var string of chars) {
|
||||||
|
// Strings must be built differently, depending on encoding
|
||||||
|
var data = buildString(string, len);
|
||||||
|
if (encoding === 'utf8') {
|
||||||
|
strings.push(data);
|
||||||
|
} else if (encoding === 'base64') {
|
||||||
|
// Base64 strings will be much longer than their UTF8 counterparts
|
||||||
|
strings.push(new Buffer(data, 'utf8').toString('base64'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the result to ensure it is *properly* optimized
|
||||||
|
var results = strings.map(function(val) {
|
||||||
|
return Buffer.byteLength(val, encoding);
|
||||||
|
});
|
||||||
|
|
||||||
|
bench.start();
|
||||||
|
for (var i = 0; i < n; i++) {
|
||||||
|
var index = n % strings.length;
|
||||||
|
// Go!
|
||||||
|
var r = Buffer.byteLength(strings[index], encoding);
|
||||||
|
|
||||||
|
if (r !== results[index])
|
||||||
|
throw Error('incorrect return value');
|
||||||
|
}
|
||||||
|
bench.end(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildString(str, times) {
|
||||||
|
if (times == 1) return str;
|
||||||
|
|
||||||
|
return str + buildString(str, times - 1);
|
||||||
|
}
|
@ -272,30 +272,62 @@ Buffer.concat = function(list, length) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
function byteLength(string, encoding) {
|
function base64ByteLength(str, bytes) {
|
||||||
if (typeof(string) !== 'string')
|
// Handle padding
|
||||||
string = String(string);
|
if (str.charCodeAt(bytes - 1) === 0x3D)
|
||||||
|
bytes--;
|
||||||
|
if (bytes > 1 && str.charCodeAt(bytes - 1) === 0x3D)
|
||||||
|
bytes--;
|
||||||
|
|
||||||
if (string.length === 0)
|
// Base64 ratio: 3/4
|
||||||
|
return (bytes * 3) >>> 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function byteLength(string, encoding) {
|
||||||
|
if (typeof string !== 'string')
|
||||||
|
string = '' + string;
|
||||||
|
|
||||||
|
var len = string.length;
|
||||||
|
if (len === 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
// Use a for loop to avoid recursion
|
||||||
|
var loweredCase = false;
|
||||||
|
for (;;) {
|
||||||
switch (encoding) {
|
switch (encoding) {
|
||||||
case 'ascii':
|
case 'ascii':
|
||||||
case 'binary':
|
case 'binary':
|
||||||
|
// Deprecated
|
||||||
case 'raw':
|
case 'raw':
|
||||||
return string.length;
|
case 'raws':
|
||||||
|
return len;
|
||||||
|
|
||||||
|
case 'utf8':
|
||||||
|
case 'utf-8':
|
||||||
|
return binding.byteLengthUtf8(string);
|
||||||
|
|
||||||
case 'ucs2':
|
case 'ucs2':
|
||||||
case 'ucs-2':
|
case 'ucs-2':
|
||||||
case 'utf16le':
|
case 'utf16le':
|
||||||
case 'utf-16le':
|
case 'utf-16le':
|
||||||
return string.length * 2;
|
return len * 2;
|
||||||
|
|
||||||
case 'hex':
|
case 'hex':
|
||||||
return string.length >>> 1;
|
return len >>> 1;
|
||||||
}
|
|
||||||
|
|
||||||
return binding.byteLength(string, encoding);
|
case 'base64':
|
||||||
|
return base64ByteLength(string, len);
|
||||||
|
|
||||||
|
default:
|
||||||
|
// The C++ binding defaulted to UTF8, we should too.
|
||||||
|
if (loweredCase)
|
||||||
|
return binding.byteLengthUtf8(string);
|
||||||
|
|
||||||
|
encoding = ('' + encoding).toLowerCase();
|
||||||
|
loweredCase = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Buffer.byteLength = byteLength;
|
Buffer.byteLength = byteLength;
|
||||||
|
@ -541,17 +541,11 @@ void WriteDoubleBE(const FunctionCallbackInfo<Value>& args) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ByteLength(const FunctionCallbackInfo<Value> &args) {
|
void ByteLengthUtf8(const FunctionCallbackInfo<Value> &args) {
|
||||||
Environment* env = Environment::GetCurrent(args);
|
CHECK(args[0]->IsString());
|
||||||
|
|
||||||
if (!args[0]->IsString())
|
// Fast case: avoid StringBytes on UTF8 string. Jump to v8.
|
||||||
return env->ThrowTypeError("Argument must be a string");
|
args.GetReturnValue().Set(args[0].As<String>()->Utf8Length());
|
||||||
|
|
||||||
Local<String> s = args[0]->ToString(env->isolate());
|
|
||||||
enum encoding e = ParseEncoding(env->isolate(), args[1], UTF8);
|
|
||||||
|
|
||||||
uint32_t size = StringBytes::Size(env->isolate(), s, e);
|
|
||||||
args.GetReturnValue().Set(size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -745,7 +739,7 @@ void Initialize(Handle<Object> target,
|
|||||||
|
|
||||||
env->SetMethod(target, "setupBufferJS", SetupBufferJS);
|
env->SetMethod(target, "setupBufferJS", SetupBufferJS);
|
||||||
|
|
||||||
env->SetMethod(target, "byteLength", ByteLength);
|
env->SetMethod(target, "byteLengthUtf8", ByteLengthUtf8);
|
||||||
env->SetMethod(target, "compare", Compare);
|
env->SetMethod(target, "compare", Compare);
|
||||||
env->SetMethod(target, "fill", Fill);
|
env->SetMethod(target, "fill", Fill);
|
||||||
env->SetMethod(target, "indexOfBuffer", IndexOfBuffer);
|
env->SetMethod(target, "indexOfBuffer", IndexOfBuffer);
|
||||||
|
46
test/parallel/test-buffer-bytelength.js
Normal file
46
test/parallel/test-buffer-bytelength.js
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
var common = require('../common');
|
||||||
|
var assert = require('assert');
|
||||||
|
var Buffer = require('buffer').Buffer;
|
||||||
|
|
||||||
|
// coerce values to string
|
||||||
|
assert.equal(Buffer.byteLength(32, 'raw'), 2);
|
||||||
|
assert.equal(Buffer.byteLength(NaN, 'utf8'), 3);
|
||||||
|
assert.equal(Buffer.byteLength({}, 'raws'), 15);
|
||||||
|
assert.equal(Buffer.byteLength(), 9);
|
||||||
|
|
||||||
|
// special case: zero length string
|
||||||
|
assert.equal(Buffer.byteLength('', 'ascii'), 0);
|
||||||
|
assert.equal(Buffer.byteLength('', 'HeX'), 0);
|
||||||
|
|
||||||
|
// utf8
|
||||||
|
assert.equal(Buffer.byteLength('∑éllö wørl∂!', 'utf-8'), 19);
|
||||||
|
assert.equal(Buffer.byteLength('κλμνξο', 'utf8'), 12);
|
||||||
|
assert.equal(Buffer.byteLength('挵挶挷挸挹', 'utf-8'), 15);
|
||||||
|
assert.equal(Buffer.byteLength('𠝹𠱓𠱸', 'UTF8'), 12);
|
||||||
|
// without an encoding, utf8 should be assumed
|
||||||
|
assert.equal(Buffer.byteLength('hey there'), 9);
|
||||||
|
assert.equal(Buffer.byteLength('𠱸挶νξ#xx :)'), 17);
|
||||||
|
assert.equal(Buffer.byteLength('hello world', ''), 11);
|
||||||
|
// it should also be assumed with unrecognized encoding
|
||||||
|
assert.equal(Buffer.byteLength('hello world', 'abc'), 11);
|
||||||
|
assert.equal(Buffer.byteLength('ßœ∑≈', 'unkn0wn enc0ding'), 10);
|
||||||
|
|
||||||
|
// base64
|
||||||
|
assert.equal(Buffer.byteLength('aGVsbG8gd29ybGQ=', 'base64'), 11);
|
||||||
|
assert.equal(Buffer.byteLength('bm9kZS5qcyByb2NrcyE=', 'base64'), 14);
|
||||||
|
assert.equal(Buffer.byteLength('aGkk', 'base64'), 3);
|
||||||
|
assert.equal(Buffer.byteLength('bHNrZGZsa3NqZmtsc2xrZmFqc2RsZmtqcw==',
|
||||||
|
'base64'), 25);
|
||||||
|
// special padding
|
||||||
|
assert.equal(Buffer.byteLength('aaa=', 'base64'), 2);
|
||||||
|
assert.equal(Buffer.byteLength('aaaa==', 'base64'), 3);
|
||||||
|
|
||||||
|
assert.equal(Buffer.byteLength('Il était tué'), 14);
|
||||||
|
assert.equal(Buffer.byteLength('Il était tué', 'utf8'), 14);
|
||||||
|
assert.equal(Buffer.byteLength('Il était tué', 'ascii'), 12);
|
||||||
|
assert.equal(Buffer.byteLength('Il était tué', 'binary'), 12);
|
||||||
|
['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach(function(encoding) {
|
||||||
|
assert.equal(24, Buffer.byteLength('Il était tué', encoding));
|
||||||
|
});
|
@ -561,15 +561,6 @@ assert.equal(sb, s);
|
|||||||
b = new Buffer('abcde');
|
b = new Buffer('abcde');
|
||||||
assert.equal('bcde', b.slice(1).toString());
|
assert.equal('bcde', b.slice(1).toString());
|
||||||
|
|
||||||
// byte length
|
|
||||||
assert.equal(14, Buffer.byteLength('Il était tué'));
|
|
||||||
assert.equal(14, Buffer.byteLength('Il était tué', 'utf8'));
|
|
||||||
['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach(function(encoding) {
|
|
||||||
assert.equal(24, Buffer.byteLength('Il était tué', encoding));
|
|
||||||
});
|
|
||||||
assert.equal(12, Buffer.byteLength('Il était tué', 'ascii'));
|
|
||||||
assert.equal(12, Buffer.byteLength('Il était tué', 'binary'));
|
|
||||||
|
|
||||||
// slice(0,0).length === 0
|
// slice(0,0).length === 0
|
||||||
assert.equal(0, Buffer('hello').slice(0, 0).length);
|
assert.equal(0, Buffer('hello').slice(0, 0).length);
|
||||||
|
|
||||||
@ -1074,10 +1065,6 @@ assert.equal(buf.readInt8(0), -1);
|
|||||||
assert.ok(typeof Buffer(5).slice(0, 5).parent === 'object');
|
assert.ok(typeof Buffer(5).slice(0, 5).parent === 'object');
|
||||||
})();
|
})();
|
||||||
|
|
||||||
// Make sure byteLength properly checks for base64 padding
|
|
||||||
assert.equal(Buffer.byteLength('aaa=', 'base64'), 2);
|
|
||||||
assert.equal(Buffer.byteLength('aaaa==', 'base64'), 3);
|
|
||||||
|
|
||||||
// Regression test for #5482: should throw but not assert in C++ land.
|
// Regression test for #5482: should throw but not assert in C++ land.
|
||||||
assert.throws(function() {
|
assert.throws(function() {
|
||||||
Buffer('', 'buffer');
|
Buffer('', 'buffer');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user