diff --git a/doc/configuration.txt b/doc/configuration.txt index 3ce29ca5d..ab09e8367 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -13791,12 +13791,6 @@ sub() This prefix is followed by a name. The separator is a '.'. The name may only contain characters 'a-z', 'A-Z', '0-9', '.' and '_'. -svarint - Converts a binary input sample of a protocol buffers signed "varint" ("sint32" - and "sint64") to an integer. - More information may be found here about the protocol buffers message field types: - https://developers.google.com/protocol-buffers/docs/encoding - table_bytes_in_rate() Uses the string representation of the input sample to perform a look up in the specified table. If the key is not found in the table, integer value zero @@ -13967,9 +13961,17 @@ url_dec Takes an url-encoded string provided as input and returns the decoded version as output. The input and the output are of type string. -ungrpc() : binary +ungrpc(,[]) This extracts the protocol buffers message field in raw mode of an input binary - sample with as field number (dotted notation). + sample with as field number (dotted notation) if + is not present, or as an integer sample if this field is present. + The list of the authorized types is the following one: "int32", "int64", "uint32", + "uint64", "sint32", "sint64", "bool", "enum" for the "varint" wire type 0 + "fixed64", "sfixed64", "double" for the 64bit wire type 1, "fixed32", "sfixed32", + "float" for the wire type 5. Note that "string" is considered as a length-delimited + type. So it does not require any argument to be extracted. + More information may be found here about the protocol buffers message field types: + https://developers.google.com/protocol-buffers/docs/encoding Example: // with such a protocol buffer .proto file content adapted from @@ -13995,10 +13997,15 @@ ungrpc() : binary protocol buffers messages), the four protocol buffers fields could be extracted with these "ungrpc" directives: - req.body,ungrpc(48.59.1) # "latitude" of "lo" first PPoint - req.body,ungrpc(48.59.2) # "longitude" of "lo" first PPoint - req.body,ungrpc(49.59.1) # "latidude" of "hi" second PPoint - req.body,ungrpc(49.59.2) # "longitude" of "hi" second PPoint + req.body,ungrpc(48.59.1,int32) # "latitude" of "lo" first PPoint + req.body,ungrpc(48.59.2,int32) # "longitude" of "lo" first PPoint + req.body,ungrpc(49.59.1,int32) # "latidude" of "hi" second PPoint + req.body,ungrpc(49.59.2,int32) # "longitude" of "hi" second PPoint + + We could also extract the intermediary 48.59 field as a binary sample as follows: + + req.body,ungrpc(48.59) + unset-var() Unsets a variable if the input content is defined. The name of the variable @@ -14026,12 +14033,6 @@ utime([,]) # e.g. 20140710162350 127.0.0.1:57325 log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp -varint - Converts a binary input sample of a protocol buffers "varint", excepted - the signed ones "sint32" and "sint64", to an integer. - More information may be found here about the protocol buffers message field types: - https://developers.google.com/protocol-buffers/docs/encoding - word(,[,]) Extracts the nth word counting from the beginning (positive index) or from the end (negative index) considering given delimiters from an input string. diff --git a/include/proto/protocol_buffers.h b/include/proto/protocol_buffers.h index 58378a966..97f9bf55a 100644 --- a/include/proto/protocol_buffers.h +++ b/include/proto/protocol_buffers.h @@ -32,16 +32,16 @@ /* .skip and .smp_store prototypes. */ int protobuf_skip_varint(unsigned char **pos, size_t *len, size_t vlen); -int protobuf_smp_store_varint(struct sample *smp, +int protobuf_smp_store_varint(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen); int protobuf_skip_64bit(unsigned char **pos, size_t *len, size_t vlen); -int protobuf_smp_store_64bit(struct sample *smp, +int protobuf_smp_store_64bit(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen); int protobuf_skip_vlen(unsigned char **pos, size_t *len, size_t vlen); -int protobuf_smp_store_vlen(struct sample *smp, +int protobuf_smp_store_vlen(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen); int protobuf_skip_32bit(unsigned char **pos, size_t *len, size_t vlen); -int protobuf_smp_store_32bit(struct sample *smp, +int protobuf_smp_store_32bit(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen); struct protobuf_parser_def protobuf_parser_defs [] = { @@ -69,6 +69,66 @@ struct protobuf_parser_def protobuf_parser_defs [] = { }, }; +/* + * Note that the field values with protocol buffers 32bit and 64bit fixed size as type + * are sent in little-endian byte order to the network. + */ + +/* Convert a little-endian ordered 32bit integer to the byte order of the host. */ +static inline uint32_t pbuf_le32toh(uint32_t v) +{ + uint8_t *p = (uint8_t *)&v; + return (p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); +} + +/* Convert a little-endian ordered 64bit integer to the byte order of the host. */ +static inline uint64_t pbuf_le64toh(uint64_t v) +{ + return (uint64_t)(pbuf_le32toh(v >> 32)) << 32 | pbuf_le32toh(v); +} + +/* + * Return a protobuf type enum from string if succedeed, -1 if not. + */ +int protobuf_type(const char *s) +{ + /* varint types. */ + if (!strcmp(s, "int32")) + return PBUF_T_VARINT_INT32; + else if (!strcmp(s, "uint32")) + return PBUF_T_VARINT_UINT32; + else if (!strcmp(s, "sint32")) + return PBUF_T_VARINT_SINT32; + else if (!strcmp(s, "int64")) + return PBUF_T_VARINT_INT64; + else if (!strcmp(s, "uint64")) + return PBUF_T_VARINT_UINT64; + else if (!strcmp(s, "sint64")) + return PBUF_T_VARINT_SINT64; + else if (!strcmp(s, "bool")) + return PBUF_T_VARINT_BOOL; + else if (!strcmp(s, "enum")) + return PBUF_T_VARINT_ENUM; + + /* 32bit fixed size types. */ + else if (!strcmp(s, "fixed32")) + return PBUF_T_32BIT_FIXED32; + else if (!strcmp(s, "sfixed32")) + return PBUF_T_32BIT_SFIXED32; + else if (!strcmp(s, "float")) + return PBUF_T_32BIT_FLOAT; + + /* 64bit fixed size types. */ + else if (!strcmp(s, "fixed64")) + return PBUF_T_64BIT_FIXED64; + else if (!strcmp(s, "sfixed64")) + return PBUF_T_64BIT_SFIXED64; + else if (!strcmp(s, "double")) + return PBUF_T_64BIT_DOUBLE; + else + return -1; +} + /* * Decode a protocol buffers varint located in a buffer at address with * as length. The decoded value is stored at . @@ -210,23 +270,59 @@ protobuf_varint_getlen(unsigned char *pos, size_t len) } /* - * Store a raw varint field value in a sample from buffer - * with available bytes. + * Store a varint field value in a sample from buffer + * with available bytes after having decoded it if needed + * depending on the expected protocol buffer type of the field. * Return 1 if succeeded, 0 if not. */ -int protobuf_smp_store_varint(struct sample *smp, +int protobuf_smp_store_varint(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen) { - int varint_len; + switch (type) { + case PBUF_T_BINARY: + { + int varint_len; - varint_len = protobuf_varint_getlen(pos, len); - if (varint_len == -1) + varint_len = protobuf_varint_getlen(pos, len); + if (varint_len == -1) + return 0; + + smp->data.type = SMP_T_BIN; + smp->data.u.str.area = (char *)pos; + smp->data.u.str.data = varint_len; + smp->flags = SMP_F_VOL_TEST; + break; + } + + case PBUF_T_VARINT_INT32 ... PBUF_T_VARINT_ENUM: + { + uint64_t varint; + + if (!protobuf_varint(&varint, pos, len)) + return 0; + + smp->data.u.sint = varint; + smp->data.type = SMP_T_SINT; + break; + } + + case PBUF_T_VARINT_SINT32 ... PBUF_T_VARINT_SINT64: + { + uint64_t varint; + + if (!protobuf_varint(&varint, pos, len)) + return 0; + + /* zigzag decoding. */ + smp->data.u.sint = (varint >> 1) ^ -(varint & 1); + smp->data.type = SMP_T_SINT; + break; + } + + default: return 0; - smp->data.type = SMP_T_BIN; - smp->data.u.str.area = (char *)pos; - smp->data.u.str.data = varint_len; - smp->flags = SMP_F_VOL_TEST; + } return 1; } @@ -247,19 +343,40 @@ int protobuf_skip_64bit(unsigned char **pos, size_t *len, size_t vlen) /* * Store a fixed size 64bit field value in a sample from buffer - * with available bytes. + * with available bytes after having decoded it depending on + * the expected protocol buffer type of the field. * Return 1 if succeeded, 0 if not. */ -int protobuf_smp_store_64bit(struct sample *smp, +int protobuf_smp_store_64bit(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen) { if (len < sizeof(uint64_t)) return 0; - smp->data.type = SMP_T_BIN; - smp->data.u.str.area = (char *)pos; - smp->data.u.str.data = sizeof(uint64_t); - smp->flags = SMP_F_VOL_TEST; + switch (type) { + case PBUF_T_BINARY: + smp->data.type = SMP_T_BIN; + smp->data.u.str.area = (char *)pos; + smp->data.u.str.data = sizeof(uint64_t); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_64BIT_FIXED64: + case PBUF_T_64BIT_SFIXED64: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le64toh(*(uint64_t *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_64BIT_DOUBLE: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le64toh(*(double *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + default: + return 0; + } return 1; } @@ -284,12 +401,15 @@ int protobuf_skip_vlen(unsigned char **pos, size_t *len, size_t vlen) * buffer with available bytes. * Return 1 if succeeded, 0 if not. */ -int protobuf_smp_store_vlen(struct sample *smp, +int protobuf_smp_store_vlen(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen) { if (len < vlen) return 0; + if (type != PBUF_T_BINARY) + return 0; + smp->data.type = SMP_T_BIN; smp->data.u.str.area = (char *)pos; smp->data.u.str.data = vlen; @@ -314,19 +434,45 @@ int protobuf_skip_32bit(unsigned char **pos, size_t *len, size_t vlen) /* * Store a fixed size 32bit field value in a sample from buffer - * with available bytes. + * with available bytes after having decoded it depending on + * the expected protocol buffer type of the field. * Return 1 if succeeded, 0 if not. */ -int protobuf_smp_store_32bit(struct sample *smp, +int protobuf_smp_store_32bit(struct sample *smp, int type, unsigned char *pos, size_t len, size_t vlen) { if (len < sizeof(uint32_t)) return 0; - smp->data.type = SMP_T_BIN; - smp->data.u.str.area = (char *)pos; - smp->data.u.str.data = sizeof(uint32_t); - smp->flags = SMP_F_VOL_TEST; + switch (type) { + case PBUF_T_BINARY: + smp->data.type = SMP_T_BIN; + smp->data.u.str.area = (char *)pos; + smp->data.u.str.data = sizeof(uint32_t); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_32BIT_FIXED32: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le32toh(*(uint32_t *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_32BIT_SFIXED32: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = (int32_t)pbuf_le32toh(*(uint32_t *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_32BIT_FLOAT: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le32toh(*(float *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + default: + return 0; + } return 1; } diff --git a/include/types/protocol_buffers.h b/include/types/protocol_buffers.h index 9b067f277..8509b01c4 100644 --- a/include/types/protocol_buffers.h +++ b/include/types/protocol_buffers.h @@ -31,6 +31,38 @@ enum protobuf_wire_type { PBUF_TYPE_32BIT, }; +enum protobuf_type { + /* These enums are used to initialize calloc()'ed struct fields. + * Start them from 1 to avoid collisions with the default 0 value + * of such struct fields. + */ + PBUF_T_BINARY = 1, + + /* Do not reorder the following ones: + * PBUF_T_VARINT_*, PBUF_T_32BIT_* and PBUF_T_64BIT_* + */ + PBUF_T_VARINT_INT32, + PBUF_T_VARINT_UINT32, + PBUF_T_VARINT_INT64, + PBUF_T_VARINT_UINT64, + PBUF_T_VARINT_BOOL, + PBUF_T_VARINT_ENUM, + + /* These two following varints are first encoded with zigzag. */ + PBUF_T_VARINT_SINT32, + PBUF_T_VARINT_SINT64, + + /* Fixed size types from here. */ + PBUF_T_32BIT_FIXED32, + PBUF_T_32BIT_SFIXED32, + PBUF_T_32BIT_FLOAT, + + PBUF_T_64BIT_FIXED64, + PBUF_T_64BIT_SFIXED64, + PBUF_T_64BIT_DOUBLE, +}; + + struct pbuf_fid { unsigned int *ids; size_t sz; @@ -38,7 +70,8 @@ struct pbuf_fid { struct protobuf_parser_def { int (*skip)(unsigned char **pos, size_t *left, size_t vlen); - int (*smp_store)(struct sample *, unsigned char *pos, size_t left, size_t vlen); + int (*smp_store)(struct sample *, int type, + unsigned char *pos, size_t left, size_t vlen); }; #endif /* _TYPES_PROTOCOL_BUFFERS_H */ diff --git a/src/arg.c b/src/arg.c index 858f8ec54..b0fe94544 100644 --- a/src/arg.c +++ b/src/arg.c @@ -241,6 +241,9 @@ int make_arg_list(const char *in, int len, uint64_t mask, struct arg **argp, break; case ARGT_PBUF_FNUM: + if (in == beg) + goto empty_err; + if (!parse_dotted_uints(word, &arg->data.fid.ids, &arg->data.fid.sz)) goto parse_err; diff --git a/src/sample.c b/src/sample.c index 7cd1425bb..5b0ccc953 100644 --- a/src/sample.c +++ b/src/sample.c @@ -1779,33 +1779,6 @@ static int sample_conv_crc32c(const struct arg *arg_p, struct sample *smp, void return 1; } -/* Decode an unsigned protocol buffers varint */ -static int sample_conv_varint(const struct arg *arg_p, struct sample *smp, void *private) -{ - uint64_t varint; - - if (!protobuf_varint(&varint, (unsigned char *)smp->data.u.str.area, smp->data.u.str.data)) - return 0; - - smp->data.u.sint = varint; - smp->data.type = SMP_T_SINT; - return 1; -} - -/* Decode a signed protocol buffers varint encoded as (zigzag + varint). */ -static int sample_conv_svarint(const struct arg *arg_p, struct sample *smp, void *private) -{ - uint64_t varint; - - if (!protobuf_varint(&varint, (unsigned char *)smp->data.u.str.area, smp->data.u.str.data)) - return 0; - - /* zigzag decoding. */ - smp->data.u.sint = (varint >> 1) ^ -(varint & 1); - smp->data.type = SMP_T_SINT; - return 1; -} - /* This function escape special json characters. The returned string can be * safely set between two '"' and used as json string. The json string is * defined like this: @@ -2792,12 +2765,14 @@ static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void size_t grpc_left; unsigned int *fid; size_t fid_sz; + int type; if (!smp->strm) return 0; fid = arg_p[0].data.fid.ids; fid_sz = arg_p[0].data.fid.sz; + type = arg_p[1].data.sint; pos = (unsigned char *)smp->data.u.str.area; /* Remaining bytes in the body to be parsed. */ @@ -2856,7 +2831,7 @@ static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void return 0; } else if (field == fid_sz - 1) { - return pbuf_parser->smp_store(smp, pos, left, 0); + return pbuf_parser->smp_store(smp, type, pos, left, 0); } break; @@ -2883,7 +2858,7 @@ static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void if (!pbuf_parser->skip(&pos, &left, elen)) return 0; } else if (field == fid_sz - 1) { - return pbuf_parser->smp_store(smp, pos, left, elen); + return pbuf_parser->smp_store(smp, type, pos, left, elen); } break; @@ -2909,6 +2884,29 @@ static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void return 0; } +static int sample_conv_ungrpc_check(struct arg *args, struct sample_conv *conv, + const char *file, int line, char **err) +{ + if (!args[1].type) { + args[1].type = ARGT_SINT; + args[1].data.sint = PBUF_T_BINARY; + } + else { + int pbuf_type; + + pbuf_type = protobuf_type(args[1].data.str.area); + if (pbuf_type == -1) { + memprintf(err, "Wrong protocol buffer type '%s'", args[1].data.str.area); + return 0; + } + + args[1].type = ARGT_SINT; + args[1].data.sint = pbuf_type; + } + + return 1; +} + /* This function checks the "strcmp" converter's arguments and extracts the * variable name and its scope. */ @@ -3296,9 +3294,7 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "strcmp", sample_conv_strcmp, ARG1(1,STR), smp_check_strcmp, SMP_T_STR, SMP_T_SINT }, /* gRPC converters. */ - { "ungrpc", sample_conv_ungrpc, ARG1(1,PBUF_FNUM), NULL, SMP_T_BIN, SMP_T_BIN }, - { "varint", sample_conv_varint, 0, NULL, SMP_T_BIN, SMP_T_SINT }, - { "svarint", sample_conv_svarint, 0, NULL, SMP_T_BIN, SMP_T_SINT }, + { "ungrpc", sample_conv_ungrpc, ARG2(1,PBUF_FNUM,STR), sample_conv_ungrpc_check, SMP_T_BIN, SMP_T_BIN }, { "and", sample_conv_binary_and, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT }, { "or", sample_conv_binary_or, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },