lib: add ASCII fast path to getStringWidth()
A lot of strings that are going to be passed to `getStringWidth()` are ASCII strings, for which the calculation is rather easy and calling into C++ can be skipped. confidence improvement accuracy (*) (**) (***) misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77% misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26% misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76% PR-URL: https://github.com/nodejs/node/pull/29301 Reviewed-By: Gus Caplan <me@gus.host> Reviewed-By: Trivikram Kamat <trivikr.dev@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Minwoo Jung <minwoo@nodesource.com> Reviewed-By: Rich Trott <rtrott@gmail.com>
This commit is contained in:
parent
020c2eaf4b
commit
ab841d5fba
26
benchmark/misc/getstringwidth.js
Normal file
26
benchmark/misc/getstringwidth.js
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
const common = require('../common.js');
|
||||||
|
|
||||||
|
const bench = common.createBenchmark(main, {
|
||||||
|
type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'],
|
||||||
|
n: [10e4]
|
||||||
|
}, {
|
||||||
|
flags: ['--expose-internals']
|
||||||
|
});
|
||||||
|
|
||||||
|
function main({ n, type }) {
|
||||||
|
const { getStringWidth } = require('internal/readline/utils');
|
||||||
|
|
||||||
|
const str = ({
|
||||||
|
ascii: 'foobar'.repeat(100),
|
||||||
|
mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100),
|
||||||
|
emojiseq: '👨👨👧👦👨👩👦👦👨👩👧👧👩👩👧👦'.repeat(10),
|
||||||
|
fullwidth: '你好'.repeat(150)
|
||||||
|
})[type];
|
||||||
|
|
||||||
|
bench.start();
|
||||||
|
for (let j = 0; j < n; j += 1)
|
||||||
|
getStringWidth(str);
|
||||||
|
bench.end(n);
|
||||||
|
}
|
@ -34,13 +34,32 @@ if (internalBinding('config').hasIntl) {
|
|||||||
const icu = internalBinding('icu');
|
const icu = internalBinding('icu');
|
||||||
getStringWidth = function getStringWidth(str, options) {
|
getStringWidth = function getStringWidth(str, options) {
|
||||||
options = options || {};
|
options = options || {};
|
||||||
if (!Number.isInteger(str))
|
if (Number.isInteger(str)) {
|
||||||
str = stripVTControlCharacters(String(str));
|
// Provide information about the character with code point 'str'.
|
||||||
return icu.getStringWidth(
|
return icu.getStringWidth(
|
||||||
str,
|
str,
|
||||||
Boolean(options.ambiguousAsFullWidth),
|
Boolean(options.ambiguousAsFullWidth),
|
||||||
|
false
|
||||||
|
);
|
||||||
|
}
|
||||||
|
str = stripVTControlCharacters(String(str));
|
||||||
|
let width = 0;
|
||||||
|
for (let i = 0; i < str.length; i++) {
|
||||||
|
// Try to avoid calling into C++ by first handling the ASCII portion of
|
||||||
|
// the string. If it is fully ASCII, we skip the C++ part.
|
||||||
|
const code = str.charCodeAt(i);
|
||||||
|
if (code < 127) {
|
||||||
|
width += code >= 32;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
width += icu.getStringWidth(
|
||||||
|
str.slice(i),
|
||||||
|
Boolean(options.ambiguousAsFullWidth),
|
||||||
Boolean(options.expandEmojiSequence)
|
Boolean(options.expandEmojiSequence)
|
||||||
);
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return width;
|
||||||
};
|
};
|
||||||
isFullWidthCodePoint =
|
isFullWidthCodePoint =
|
||||||
function isFullWidthCodePoint(code, options) {
|
function isFullWidthCodePoint(code, options) {
|
||||||
|
@ -69,3 +69,25 @@ assert.strictEqual(
|
|||||||
|
|
||||||
// Control chars and combining chars are zero
|
// Control chars and combining chars are zero
|
||||||
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
|
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
|
||||||
|
|
||||||
|
// Test that the fast path for ASCII characters yields results consistent
|
||||||
|
// with the 'slow' path.
|
||||||
|
for (const ambiguousAsFullWidth of [ false, true ]) {
|
||||||
|
for (let i = 0; i < 256; i++) {
|
||||||
|
const char = String.fromCharCode(i);
|
||||||
|
assert.strictEqual(
|
||||||
|
readline.getStringWidth(i, { ambiguousAsFullWidth }),
|
||||||
|
readline.getStringWidth(char, { ambiguousAsFullWidth }));
|
||||||
|
assert.strictEqual(
|
||||||
|
readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }),
|
||||||
|
readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2);
|
||||||
|
|
||||||
|
if (i < 32 || (i >= 127 && i < 160)) { // Control character
|
||||||
|
assert.strictEqual(
|
||||||
|
readline.getStringWidth(i, { ambiguousAsFullWidth }), 0);
|
||||||
|
} else if (i < 127) { // Regular ASCII character
|
||||||
|
assert.strictEqual(
|
||||||
|
readline.getStringWidth(i, { ambiguousAsFullWidth }), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user