test_runner: parse non-ascii character correctly

PR-URL: https://github.com/nodejs/node/pull/45736
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
Reviewed-By: Moshe Atlow <moshe@atlow.co.il>
This commit is contained in:
Mert Can Altın 2023-02-18 21:10:16 +03:00 committed by GitHub
parent cafc0b2b33
commit 3c6547fcc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 73 additions and 23 deletions

View File

@ -5,13 +5,15 @@ const {
ArrayPrototypePush,
MathMax,
SafeSet,
StringPrototypeIncludes,
StringPrototypeCodePointAt,
StringPrototypeTrim,
} = primordials;
const {
codes: { ERR_TAP_LEXER_ERROR },
} = require('internal/errors');
const { isZeroWidthCodePoint } = require('internal/util/inspect');
const kEOL = '';
const kEOF = '';
@ -474,18 +476,28 @@ class TapLexer {
}
#isLiteralSymbol(char) {
return (
(char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
this.#isSpecialCharacterSymbol(char)
);
}
if (typeof char !== 'string') return false;
const charCode = StringPrototypeCodePointAt(char);
#isSpecialCharacterSymbol(char) {
// We deliberately do not include "# \ + -"" in this list
// these are used for comments/reasons explanations, pragma and escape characters
// whitespace is not included because it is handled separately
return StringPrototypeIncludes('!"$%&\'()*,./:;<=>?@[]^_`{|}~', char);
if (isZeroWidthCodePoint(charCode)) return false;
if (this.#isWhitespaceSymbol(char)) return false;
const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long
// Allow all non-latin characters.
if (charCode > MAX_ASCII_CHAR_CODE) return true;
const ZERO = 48; // 0
const NINE = 58; // 9
// Disallow numeric values
if (charCode >= ZERO && char <= NINE) return false;
// Disallow characters with special meaning in TAP
const HASH = 35; // #
const BACKSLASH = 92; // \
const PLUS = 43; // +
const DASH = 45; // -
// Disallow characters with special meaning in TAP
return charCode !== HASH && charCode !== BACKSLASH &&
charCode !== PLUS && charCode !== DASH;
}
#isWhitespaceSymbol(char) {

View File

@ -2295,6 +2295,18 @@ function formatWithOptionsInternal(inspectOptions, args) {
return str;
}
function isZeroWidthCodePoint(code) {
return code <= 0x1F || // C0 control codes
(code >= 0x7F && code <= 0x9F) || // C1 control codes
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
// Combining Diacritical Marks for Symbols
(code >= 0x20D0 && code <= 0x20FF) ||
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
}
if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
@ -2384,17 +2396,6 @@ if (internalBinding('config').hasIntl) {
);
};
const isZeroWidthCodePoint = (code) => {
return code <= 0x1F || // C0 control codes
(code >= 0x7F && code <= 0x9F) || // C1 control codes
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
// Combining Diacritical Marks for Symbols
(code >= 0x20D0 && code <= 0x20FF) ||
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
};
}
/**
@ -2414,4 +2415,5 @@ module.exports = {
formatWithOptions,
getStringWidth,
stripVTControlCharacters,
isZeroWidthCodePoint,
};

View File

@ -444,3 +444,39 @@ ok 1
assert.strictEqual(tokens[index].value, token.value);
});
}
// Test isLiteralSymbol method
{
const tokens = TAPLexer('ok 1 - description أتث讲演講👍🔥');
[
{ kind: TokenKind.TAP_TEST_OK, value: 'ok' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.NUMERIC, value: '1' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.DASH, value: '-' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'description' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
{ kind: TokenKind.EOL, value: '' },
].forEach((token, index) => {
assert.strictEqual(tokens[index].kind, token.kind);
assert.strictEqual(tokens[index].value, token.value);
});
}
{
const tokens = TAPLexer('# comment أتث讲演講👍🔥');
[
{ kind: TokenKind.COMMENT, value: '#' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'comment' },
{ kind: TokenKind.WHITESPACE, value: ' ' },
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
{ kind: TokenKind.EOL, value: '' },
].forEach((token, index) => {
assert.strictEqual(tokens[index].kind, token.kind);
assert.strictEqual(tokens[index].value, token.value);
});
}