lib: add ASCII fast path to getStringWidth()
A lot of strings that are going to be passed to `getStringWidth()` are ASCII strings, for which the calculation is rather easy and calling into C++ can be skipped. confidence improvement accuracy (*) (**) (***) misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77% misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26% misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76% PR-URL: https://github.com/nodejs/node/pull/29301 Reviewed-By: Gus Caplan <me@gus.host> Reviewed-By: Trivikram Kamat <trivikr.dev@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Minwoo Jung <minwoo@nodesource.com> Reviewed-By: Rich Trott <rtrott@gmail.com>
This commit is contained in:
parent
020c2eaf4b
commit
ab841d5fba
26
benchmark/misc/getstringwidth.js
Normal file
26
benchmark/misc/getstringwidth.js
Normal file
@ -0,0 +1,26 @@
|
||||
'use strict';
|
||||
|
||||
const common = require('../common.js');
|
||||
|
||||
const bench = common.createBenchmark(main, {
|
||||
type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'],
|
||||
n: [10e4]
|
||||
}, {
|
||||
flags: ['--expose-internals']
|
||||
});
|
||||
|
||||
function main({ n, type }) {
|
||||
const { getStringWidth } = require('internal/readline/utils');
|
||||
|
||||
const str = ({
|
||||
ascii: 'foobar'.repeat(100),
|
||||
mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100),
|
||||
emojiseq: '👨👨👧👦👨👩👦👦👨👩👧👧👩👩👧👦'.repeat(10),
|
||||
fullwidth: '你好'.repeat(150)
|
||||
})[type];
|
||||
|
||||
bench.start();
|
||||
for (let j = 0; j < n; j += 1)
|
||||
getStringWidth(str);
|
||||
bench.end(n);
|
||||
}
|
@ -34,13 +34,32 @@ if (internalBinding('config').hasIntl) {
|
||||
const icu = internalBinding('icu');
|
||||
getStringWidth = function getStringWidth(str, options) {
|
||||
options = options || {};
|
||||
if (!Number.isInteger(str))
|
||||
str = stripVTControlCharacters(String(str));
|
||||
return icu.getStringWidth(
|
||||
str,
|
||||
Boolean(options.ambiguousAsFullWidth),
|
||||
Boolean(options.expandEmojiSequence)
|
||||
);
|
||||
if (Number.isInteger(str)) {
|
||||
// Provide information about the character with code point 'str'.
|
||||
return icu.getStringWidth(
|
||||
str,
|
||||
Boolean(options.ambiguousAsFullWidth),
|
||||
false
|
||||
);
|
||||
}
|
||||
str = stripVTControlCharacters(String(str));
|
||||
let width = 0;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
// Try to avoid calling into C++ by first handling the ASCII portion of
|
||||
// the string. If it is fully ASCII, we skip the C++ part.
|
||||
const code = str.charCodeAt(i);
|
||||
if (code < 127) {
|
||||
width += code >= 32;
|
||||
continue;
|
||||
}
|
||||
width += icu.getStringWidth(
|
||||
str.slice(i),
|
||||
Boolean(options.ambiguousAsFullWidth),
|
||||
Boolean(options.expandEmojiSequence)
|
||||
);
|
||||
break;
|
||||
}
|
||||
return width;
|
||||
};
|
||||
isFullWidthCodePoint =
|
||||
function isFullWidthCodePoint(code, options) {
|
||||
|
@ -69,3 +69,25 @@ assert.strictEqual(
|
||||
|
||||
// Control chars and combining chars are zero
|
||||
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
|
||||
|
||||
// Test that the fast path for ASCII characters yields results consistent
|
||||
// with the 'slow' path.
|
||||
for (const ambiguousAsFullWidth of [ false, true ]) {
|
||||
for (let i = 0; i < 256; i++) {
|
||||
const char = String.fromCharCode(i);
|
||||
assert.strictEqual(
|
||||
readline.getStringWidth(i, { ambiguousAsFullWidth }),
|
||||
readline.getStringWidth(char, { ambiguousAsFullWidth }));
|
||||
assert.strictEqual(
|
||||
readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }),
|
||||
readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2);
|
||||
|
||||
if (i < 32 || (i >= 127 && i < 160)) { // Control character
|
||||
assert.strictEqual(
|
||||
readline.getStringWidth(i, { ambiguousAsFullWidth }), 0);
|
||||
} else if (i < 127) { // Regular ASCII character
|
||||
assert.strictEqual(
|
||||
readline.getStringWidth(i, { ambiguousAsFullWidth }), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user