BUG/MINOR: tools: do not create an empty arg from trailing spaces

Trailing spaces on the lines of the config file create an empty arg
which makes it complicated to detect really empty args. Let's first
address this. Note that it is not user-visible but prevents from
fixing user-visible issues. No backport is needed.

The initial issue was introduced with this fix that already tried to
address it:

    8a6767d266 ("BUG/MINOR: config: don't count trailing spaces as empty arg (v2)")

The current patch properly addresses leading and trailing spaces by
only counting arguments if non-lws chars were found on the line. LWS
do not cause a transition to a new arg anymore but they complete the
current one. The whole new code relies on a state machine to detect
when to create an arg (!in_arg->in_arg), and when to close the current
arg. A special care was taken for word expansion in the form of
"${ARGS[*]}" which still continue to emit individual arguments past
the first LWS. This example works fine:

    ARGS="100 check inter 1000"
    server name 192.168.1."${ARGS[*]}"

It properly results in 6 args:

    "server", "name", "192.168.1.100", "check", "inter", "1000"

This fix should not have any visible user impact and is a bit tricky,
so it's best not to backport it, at least for a while.

Co-authored-by: Valentine Krasnobaeva <vkrasnobaeva@haproxy.com>
This commit is contained in:
Willy Tarreau 2025-05-02 15:46:18 +02:00
parent af5bbce664
commit 7e4a2f39ef

View File

@ -6178,10 +6178,13 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
size_t outmax = *outlen;
int argsmax = *nbargs - 1;
size_t outpos = 0;
size_t arg_start = 0; // copy of outpos before EMIT_CHAR().
int squote = 0;
int dquote = 0;
int arg = 0;
uint32_t err = 0;
int in_arg = 0;
int prev_in_arg = 0;
*nbargs = 0;
*outlen = 0;
@ -6191,12 +6194,15 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
args[arg] = out;
while (1) {
prev_in_arg = in_arg;
arg_start = outpos;
if (*in >= '-' && *in != '\\') {
in_arg = 1;
/* speedup: directly send all regular chars starting
* with '-', '.', '/', alnum etc...
*/
EMIT_CHAR(*in++);
continue;
}
else if (*in == '\0' || *in == '\n' || *in == '\r') {
/* end of line */
@ -6207,6 +6213,7 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
break;
}
else if (*in == '"' && !squote && (opts & PARSE_OPT_DQUOTE)) { /* double quote outside single quotes */
in_arg = 1;
if (dquote) {
dquote = 0;
quote = NULL;
@ -6216,9 +6223,9 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
quote = in;
}
in++;
continue;
}
else if (*in == '\'' && !dquote && (opts & PARSE_OPT_SQUOTE)) { /* single quote outside double quotes */
in_arg = 1;
if (squote) {
squote = 0;
quote = NULL;
@ -6228,7 +6235,6 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
quote = in;
}
in++;
continue;
}
else if (*in == '\\' && !squote && (opts & PARSE_OPT_BKSLASH)) {
/* first, we'll replace \\, \<space>, \#, \r, \n, \t, \xXX with their
@ -6237,6 +6243,7 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
*/
char tosend = *in;
in_arg = 1;
switch (in[1]) {
case ' ':
case '\\':
@ -6313,14 +6320,9 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
}
else if (isspace((unsigned char)*in) && !squote && !dquote) {
/* a non-escaped space is an argument separator */
in_arg = 0;
while (isspace((unsigned char)*in))
in++;
EMIT_CHAR(0);
arg++;
if (arg < argsmax)
args[arg] = out + outpos;
else
err |= PARSE_ERR_TOOMANY;
}
else if (*in == '$' && (opts & PARSE_OPT_ENV) && (dquote || !(opts & PARSE_OPT_DQUOTE))) {
/* environment variables are evaluated anywhere, or only
@ -6414,19 +6416,27 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
while (*value) {
/* expand as individual parameters on a space character */
if (word_expand && isspace((unsigned char)*value)) {
EMIT_CHAR(0);
++arg;
if (arg < argsmax)
args[arg] = out + outpos;
else
err |= PARSE_ERR_TOOMANY;
in_arg = 0;
/* skip consecutive spaces */
while (isspace((unsigned char)*++value))
;
} else {
in_arg = 1;
EMIT_CHAR(*value++);
}
if (!prev_in_arg && in_arg) {
if (arg < argsmax)
args[arg] = out + arg_start;
else
err |= PARSE_ERR_TOOMANY;
}
if (prev_in_arg && !in_arg) {
EMIT_CHAR(0);
arg++;
}
prev_in_arg = in_arg;
arg_start = outpos;
}
}
else {
@ -6434,6 +6444,7 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
* Let's skip the trailing double-quote character
* and spaces.
*/
in_arg = 1;
if (likely(*var_name != '.') && *in == '"') {
in++;
while (isspace((unsigned char)*in))
@ -6448,20 +6459,28 @@ uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbarg
}
else {
/* any other regular char */
in_arg = 1;
EMIT_CHAR(*in++);
}
if (!prev_in_arg && in_arg) {
if (arg < argsmax)
args[arg] = out + arg_start;
else
err |= PARSE_ERR_TOOMANY;
}
if (prev_in_arg && !in_arg) {
EMIT_CHAR(0);
arg++;
}
}
/* end of output string */
EMIT_CHAR(0);
/* Don't add an empty arg after trailing spaces. Note that args[arg]
* may contain some distances relative to NULL if <out> was NULL, or
* pointers beyond the end of <out> in case <outlen> is too short, thus
* we must not dereference it.
*/
if (arg < argsmax && args[arg] != out + outpos - 1)
if (in_arg) {
EMIT_CHAR(0);
arg++;
}
if (quote) {
/* unmatched quote */