lib: improve normalize encoding performance
This focuses on the common case by making sure they are prioritized. It also changes some typeof checks to test for undefined since that is faster and it adds a benchmark. PR-URL: https://github.com/nodejs/node/pull/18790 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
This commit is contained in:
parent
876836b135
commit
341770fedf
43
benchmark/buffers/buffer-normalize-encoding.js
Normal file
43
benchmark/buffers/buffer-normalize-encoding.js
Normal file
@ -0,0 +1,43 @@
|
||||
'use strict';
|
||||
|
||||
const common = require('../common.js');
|
||||
|
||||
const bench = common.createBenchmark(main, {
|
||||
encoding: [
|
||||
'ascii',
|
||||
'ASCII',
|
||||
'base64',
|
||||
'BASE64',
|
||||
'binary',
|
||||
'BINARY',
|
||||
'hex',
|
||||
'HEX',
|
||||
'latin1',
|
||||
'LATIN1',
|
||||
'ucs-2',
|
||||
'UCS-2',
|
||||
'ucs2',
|
||||
'UCS2',
|
||||
'utf-16le',
|
||||
'UTF-16LE',
|
||||
'utf-8',
|
||||
'UTF-8',
|
||||
'utf16le',
|
||||
'UTF16LE',
|
||||
'utf8',
|
||||
'UTF8'
|
||||
],
|
||||
n: [1e6]
|
||||
}, {
|
||||
flags: ['--expose-internals']
|
||||
});
|
||||
|
||||
function main({ encoding, n }) {
|
||||
const { normalizeEncoding } = require('internal/util');
|
||||
|
||||
bench.start();
|
||||
for (var i = 0; i < n; i++) {
|
||||
normalizeEncoding(encoding);
|
||||
}
|
||||
bench.end(n);
|
||||
}
|
@ -242,7 +242,7 @@ function assertSize(size) {
|
||||
err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
if (err !== null) {
|
||||
Error.captureStackTrace(err, assertSize);
|
||||
throw err;
|
||||
}
|
||||
@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {
|
||||
|
||||
Buffer.isEncoding = function isEncoding(encoding) {
|
||||
return typeof encoding === 'string' &&
|
||||
typeof normalizeEncoding(encoding) === 'string';
|
||||
normalizeEncoding(encoding) !== undefined;
|
||||
};
|
||||
Buffer[kIsEncodingSymbol] = Buffer.isEncoding;
|
||||
|
||||
|
@ -96,36 +96,59 @@ function assertCrypto() {
|
||||
throw new errors.Error('ERR_NO_CRYPTO');
|
||||
}
|
||||
|
||||
// The loop should only run at most twice, retrying with lowercased enc
|
||||
// if there is no match in the first pass.
|
||||
// We use a loop instead of branching to retry with a helper
|
||||
// function in order to avoid the performance hit.
|
||||
// Return undefined if there is no match.
|
||||
// Move the "slow cases" to a separate function to make sure this function gets
|
||||
// inlined properly. That prioritizes the common case.
|
||||
function normalizeEncoding(enc) {
|
||||
if (enc == null || enc === '') return 'utf8';
|
||||
let retried;
|
||||
while (true) {
|
||||
switch (enc) {
|
||||
case 'utf8':
|
||||
case 'utf-8':
|
||||
return 'utf8';
|
||||
case 'ucs2':
|
||||
case 'ucs-2':
|
||||
case 'utf16le':
|
||||
case 'utf-16le':
|
||||
if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
|
||||
return slowCases(enc);
|
||||
}
|
||||
|
||||
function slowCases(enc) {
|
||||
switch (enc.length) {
|
||||
case 4:
|
||||
if (enc === 'UTF8') return 'utf8';
|
||||
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
|
||||
enc = `${enc}`.toLowerCase();
|
||||
if (enc === 'utf8') return 'utf8';
|
||||
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
|
||||
break;
|
||||
case 3:
|
||||
if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
|
||||
return 'hex';
|
||||
break;
|
||||
case 5:
|
||||
if (enc === 'ascii') return 'ascii';
|
||||
if (enc === 'ucs-2') return 'utf16le';
|
||||
if (enc === 'UTF-8') return 'utf8';
|
||||
if (enc === 'ASCII') return 'ascii';
|
||||
if (enc === 'UCS-2') return 'utf16le';
|
||||
enc = `${enc}`.toLowerCase();
|
||||
if (enc === 'utf-8') return 'utf8';
|
||||
if (enc === 'ascii') return 'ascii';
|
||||
if (enc === 'usc-2') return 'utf16le';
|
||||
break;
|
||||
case 6:
|
||||
if (enc === 'base64') return 'base64';
|
||||
if (enc === 'latin1' || enc === 'binary') return 'latin1';
|
||||
if (enc === 'BASE64') return 'base64';
|
||||
if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
|
||||
enc = `${enc}`.toLowerCase();
|
||||
if (enc === 'base64') return 'base64';
|
||||
if (enc === 'latin1' || enc === 'binary') return 'latin1';
|
||||
break;
|
||||
case 7:
|
||||
if (enc === 'utf16le' || enc === 'UTF16LE' ||
|
||||
`${enc}`.toLowerCase() === 'utf16le')
|
||||
return 'utf16le';
|
||||
case 'latin1':
|
||||
case 'binary':
|
||||
return 'latin1';
|
||||
case 'base64':
|
||||
case 'ascii':
|
||||
case 'hex':
|
||||
return enc;
|
||||
default:
|
||||
if (retried) return; // undefined
|
||||
enc = ('' + enc).toLowerCase();
|
||||
retried = true;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
|
||||
`${enc}`.toLowerCase() === 'utf-16le')
|
||||
return 'utf16le';
|
||||
break;
|
||||
default:
|
||||
if (enc === '') return 'utf8';
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
|
||||
// modules monkey-patch it to support additional encodings
|
||||
function normalizeEncoding(enc) {
|
||||
const nenc = internalUtil.normalizeEncoding(enc);
|
||||
if (typeof nenc !== 'string' &&
|
||||
(Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
|
||||
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
|
||||
return nenc || enc;
|
||||
if (nenc === undefined) {
|
||||
if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
|
||||
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
|
||||
return enc;
|
||||
}
|
||||
return nenc;
|
||||
}
|
||||
|
||||
const encodingsMap = {};
|
||||
|
Loading…
x
Reference in New Issue
Block a user