test_runner: parse non-ascii character correctly
PR-URL: https://github.com/nodejs/node/pull/45736 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Moshe Atlow <moshe@atlow.co.il>
This commit is contained in:
parent
cafc0b2b33
commit
3c6547fcc8
@ -5,13 +5,15 @@ const {
|
||||
ArrayPrototypePush,
|
||||
MathMax,
|
||||
SafeSet,
|
||||
StringPrototypeIncludes,
|
||||
StringPrototypeCodePointAt,
|
||||
StringPrototypeTrim,
|
||||
} = primordials;
|
||||
const {
|
||||
codes: { ERR_TAP_LEXER_ERROR },
|
||||
} = require('internal/errors');
|
||||
|
||||
const { isZeroWidthCodePoint } = require('internal/util/inspect');
|
||||
|
||||
const kEOL = '';
|
||||
const kEOF = '';
|
||||
|
||||
@ -474,18 +476,28 @@ class TapLexer {
|
||||
}
|
||||
|
||||
#isLiteralSymbol(char) {
|
||||
return (
|
||||
(char >= 'a' && char <= 'z') ||
|
||||
(char >= 'A' && char <= 'Z') ||
|
||||
this.#isSpecialCharacterSymbol(char)
|
||||
);
|
||||
}
|
||||
if (typeof char !== 'string') return false;
|
||||
const charCode = StringPrototypeCodePointAt(char);
|
||||
|
||||
#isSpecialCharacterSymbol(char) {
|
||||
// We deliberately do not include "# \ + -"" in this list
|
||||
// these are used for comments/reasons explanations, pragma and escape characters
|
||||
// whitespace is not included because it is handled separately
|
||||
return StringPrototypeIncludes('!"$%&\'()*,./:;<=>?@[]^_`{|}~', char);
|
||||
if (isZeroWidthCodePoint(charCode)) return false;
|
||||
if (this.#isWhitespaceSymbol(char)) return false;
|
||||
const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long
|
||||
// Allow all non-latin characters.
|
||||
if (charCode > MAX_ASCII_CHAR_CODE) return true;
|
||||
const ZERO = 48; // 0
|
||||
const NINE = 58; // 9
|
||||
// Disallow numeric values
|
||||
if (charCode >= ZERO && char <= NINE) return false;
|
||||
|
||||
// Disallow characters with special meaning in TAP
|
||||
const HASH = 35; // #
|
||||
const BACKSLASH = 92; // \
|
||||
const PLUS = 43; // +
|
||||
const DASH = 45; // -
|
||||
|
||||
// Disallow characters with special meaning in TAP
|
||||
return charCode !== HASH && charCode !== BACKSLASH &&
|
||||
charCode !== PLUS && charCode !== DASH;
|
||||
}
|
||||
|
||||
#isWhitespaceSymbol(char) {
|
||||
|
@ -2295,6 +2295,18 @@ function formatWithOptionsInternal(inspectOptions, args) {
|
||||
return str;
|
||||
}
|
||||
|
||||
function isZeroWidthCodePoint(code) {
|
||||
return code <= 0x1F || // C0 control codes
|
||||
(code >= 0x7F && code <= 0x9F) || // C1 control codes
|
||||
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
|
||||
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
|
||||
// Combining Diacritical Marks for Symbols
|
||||
(code >= 0x20D0 && code <= 0x20FF) ||
|
||||
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
|
||||
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
|
||||
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
|
||||
}
|
||||
|
||||
if (internalBinding('config').hasIntl) {
|
||||
const icu = internalBinding('icu');
|
||||
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
|
||||
@ -2384,17 +2396,6 @@ if (internalBinding('config').hasIntl) {
|
||||
);
|
||||
};
|
||||
|
||||
const isZeroWidthCodePoint = (code) => {
|
||||
return code <= 0x1F || // C0 control codes
|
||||
(code >= 0x7F && code <= 0x9F) || // C1 control codes
|
||||
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
|
||||
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
|
||||
// Combining Diacritical Marks for Symbols
|
||||
(code >= 0x20D0 && code <= 0x20FF) ||
|
||||
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
|
||||
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
|
||||
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2414,4 +2415,5 @@ module.exports = {
|
||||
formatWithOptions,
|
||||
getStringWidth,
|
||||
stripVTControlCharacters,
|
||||
isZeroWidthCodePoint,
|
||||
};
|
||||
|
@ -444,3 +444,39 @@ ok 1
|
||||
assert.strictEqual(tokens[index].value, token.value);
|
||||
});
|
||||
}
|
||||
|
||||
// Test isLiteralSymbol method
|
||||
{
|
||||
const tokens = TAPLexer('ok 1 - description أتث讲演講👍🔥');
|
||||
|
||||
[
|
||||
{ kind: TokenKind.TAP_TEST_OK, value: 'ok' },
|
||||
{ kind: TokenKind.WHITESPACE, value: ' ' },
|
||||
{ kind: TokenKind.NUMERIC, value: '1' },
|
||||
{ kind: TokenKind.WHITESPACE, value: ' ' },
|
||||
{ kind: TokenKind.DASH, value: '-' },
|
||||
{ kind: TokenKind.WHITESPACE, value: ' ' },
|
||||
{ kind: TokenKind.LITERAL, value: 'description' },
|
||||
{ kind: TokenKind.WHITESPACE, value: ' ' },
|
||||
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
|
||||
{ kind: TokenKind.EOL, value: '' },
|
||||
].forEach((token, index) => {
|
||||
assert.strictEqual(tokens[index].kind, token.kind);
|
||||
assert.strictEqual(tokens[index].value, token.value);
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = TAPLexer('# comment أتث讲演講👍🔥');
|
||||
[
|
||||
{ kind: TokenKind.COMMENT, value: '#' },
|
||||
{ kind: TokenKind.WHITESPACE, value: ' ' },
|
||||
{ kind: TokenKind.LITERAL, value: 'comment' },
|
||||
{ kind: TokenKind.WHITESPACE, value: ' ' },
|
||||
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
|
||||
{ kind: TokenKind.EOL, value: '' },
|
||||
].forEach((token, index) => {
|
||||
assert.strictEqual(tokens[index].kind, token.kind);
|
||||
assert.strictEqual(tokens[index].value, token.value);
|
||||
});
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user