diff --git a/doc/internals/api/buffer-list-api.txt b/doc/internals/api/buffer-list-api.txt new file mode 100644 index 000000000..9f0e2e4cf --- /dev/null +++ b/doc/internals/api/buffer-list-api.txt @@ -0,0 +1,128 @@ +2024-09-30 - Buffer List API + + +1. Use case + +The buffer list API allows one to share a certain amount of buffers between +multiple entities, which will each see their own as lists of buffers, while +keeping a sharedd free list. The immediate use case is for muxes, which may +want to allocate up to a certain number of buffers per connection, shared +among all streams. In this case, each stream will first request a new list +for its own use, then may request extra entries from the free list. At any +moment it will be possible to enumerate all allocated lists and to know which +buffer follows which one. + + +2. Representation + +The buffer list is an array of struct bl_elem. It can hold up to N-1 buffers +for N elements. The first one serves as the bookkeeping head and creates the +free list. + +Each bl_elem contains a struct buffer, a pointer to the next cell, and a few +flags. The struct buffer is a real struct buffer for all cells, except the +first one where it holds useful data to describe the state of the array: + + struct bl_elem { + struct buffer { + size_t size; // head: size of the array in number of elements + char *area; // head: not used (0) + size_t data; // head: number of elements allocated + size_t head; // head: number of users + } buf; + uint32_t next; + uint32_t flags; + }; + +There are a few important properties here: + + - for the free list, the first element isn't part of the list, otherwise + there wouldn't be any head storage anymore. + + - the head's buf.data doesn't include the first cell of the array, thus its + maximum value is buf.size - 1. + + - allocations are always made by appending to end of the existing list + + - releases are always made by releasing the beginning of the existing list + + - next == 0 for an allocatable cell implies that all the cells from this + element to the last one of the array are free. This allows to simply + initialize a whole new array with memset(array, 0, sizeof(array)) + + - next == ~0 for an allocated cell indicates we've reached the last element + of the current list. + + - for the head of the list, next points to the first available cell, or 0 if + the free list is depleted. + + +3. Example + +The array starts like this, created with a calloc() and having size initialized +to the total number of cells. The number represented is the 'next' value. "~" +here standands for ~0 (i.e. end marker). + + [1|0|0|0|0|0|0|0|0|0] => array entirely free + +strm1: bl_get(0) -> 1 = assign 1 to strm1's first cell + + [2|~|0|0|0|0|0|0|0|0] => strm1 allocated at [1] + 1 + +strm1: bl_get(1) -> 2 = allocate one cell after cell 1 + + [3|2|~|0|0|0|0|0|0|0] + 1 + +strm1: bl_get(2) -> 3 = allocate one cell after cell 2 + + [4|2|3|~|0|0|0|0|0|0] + 1 + +strm2: bl_get(0) -> 4 = assign 4 to strm2's first cell + + [5|2|3|~|~|0|0|0|0|0] + 1 2 + +strm1: bl_put(1) -> 2 = release cell 1, jump to next one (2) + + [1|5|3|~|~|0|0|0|0|0] + 1 2 + + +4. Manipulating buffer lists + +The API is very simple, it allows to reserve a buffer for a new stream or for +an existing one, to release a stream's first buffer or release the entire +stream, and to initialize / release the whole array. + +====================+==================+======================================= +Function | Arguments/Return | Description +--------------------+------------------+--------------------------------------- +bl_users() | const bl_elem *b | returns the current number of users on + | ret: uint32_t | the array (i.e. buf.head). +--------------------+------------------+--------------------------------------- +bl_size() | const bl_elem *b | returns the total number of + | ret: uint32_t | allocatable cells (i.e. buf.size-1) +--------------------+------------------+--------------------------------------- +bl_used() | const bl_elem *b | returns the number of cells currently + | ret: uint32_t | in use (i.e. buf.data) +--------------------+------------------+--------------------------------------- +bl_avail() | const bl_elem *b | returns the number of cells still + | ret: uint32_t | available. +--------------------+------------------+--------------------------------------- +bl_init() | bl_elem *b | initializes b for n elements. All are + | uint32_t n | in the free list. +--------------------+------------------+--------------------------------------- +bl_put() | bl_elem *b | releases cell to the free list, + | uint32_t n | possibly deleting the user. Returns + | ret: uint32_t | next cell idx or 0 if none (last one). +--------------------+------------------+--------------------------------------- +bl_deinit() | bl_elem *b | only when DEBUG_STRICT==2, scans the + | | array to check for leaks. +--------------------+------------------+--------------------------------------- +bl_get() | bl_elem *b | allocates a new cell after to add to n + | uint32_t n | or a new stream. Returns the cell or 0 + | ret: uint32_t | if no more space. +====================+==================+======================================= diff --git a/include/haproxy/buf-t.h b/include/haproxy/buf-t.h index 3c0f8b551..5c59b0aaf 100644 --- a/include/haproxy/buf-t.h +++ b/include/haproxy/buf-t.h @@ -52,6 +52,26 @@ struct buffer { #define BUF_WANTED ((struct buffer){ .area = (char *)1 }) #define BUF_RING ((struct buffer){ .area = (char *)2 }) +/* An element of a buffer list (bl_*). They're all stored in an array. The + * holder contains a pointer to that array and a count. The first element + * (index zero) builds the free list and may never be used. All owners simply + * have a head and a tail index pointing to their own list. In order to ease + * initialization, for each allocatable cell, next==0 indicates that all + * following cells till the end of the array are free. The end of a list is + * marked by next==~0. For the head, next is always valid or is zero when no + * more entries are available. The struct element doesn't have holes. It's 24 + * bytes in 32 bits and 40 bytes in 64 bits, so offsets are trivially obtained + * from indexes. The pointer may be split into two 16 bits fields if + * needed in order to make room for something else later, since we don't + * expect to make 64k-buffer arrays. The first element's buf stores size, + * allocated space and number of users. + */ +struct bl_elem { + struct buffer buf; + uint32_t next; + uint32_t flags; +}; + #endif /* _HAPROXY_BUF_T_H */ /* diff --git a/include/haproxy/buf.h b/include/haproxy/buf.h index 9e5a9e861..8238ae40f 100644 --- a/include/haproxy/buf.h +++ b/include/haproxy/buf.h @@ -55,6 +55,9 @@ int b_put_varint(struct buffer *b, uint64_t v); int b_get_varint(struct buffer *b, uint64_t *vptr); int b_peek_varint(struct buffer *b, size_t ofs, uint64_t *vptr); +void bl_deinit(struct bl_elem *head); +uint32_t bl_get(struct bl_elem *head, uint32_t idx); + /***************************************************************************/ /* Functions used to compute offsets and pointers. Most of them exist in */ /* both wrapping-safe and unchecked ("__" prefix) variants. Some returning */ @@ -656,6 +659,79 @@ static inline struct buffer *br_del_head(struct buffer *r) return br_head(r); } +/* + * Buffer list management. + */ + +/* Returns the number of users of at least one entry */ +static inline uint32_t bl_users(const struct bl_elem *head) +{ + return head->buf.head; +} + +/* Returns the number of allocatable cells */ +static inline uint32_t bl_size(const struct bl_elem *head) +{ + return head->buf.size - 1; +} + +/* Returns the number of cells currently in use */ +static inline uint32_t bl_used(const struct bl_elem *head) +{ + return head->buf.data; +} + +/* Returns the number of cells still available */ +static inline uint32_t bl_avail(const struct bl_elem *head) +{ + return bl_size(head) - bl_used(head); +} + +/* Initializes an array of elements of type bl_elem (one less will be + * allocatable). The initialized array is returned on success, otherwise NULL + * on allocation failure. + */ +static inline void bl_init(struct bl_elem *head, uint32_t nbelem) +{ + BUG_ON_HOT(nbelem < 2); + memset(head, 0, nbelem * sizeof(*head)); + head->buf.size = nbelem; + head->next = 1; +} + +/* Puts the cell at index back into the list . It must have been + * freed from its buffer before calling this, and must correspond to the head + * of the caller. It returns the new head for the caller (the next cell + * immediately after the current one), or zero if the list is empty, in which + * case the caller is considered as no longer belonging to the list. + */ +static inline uint32_t bl_put(struct bl_elem *head, uint32_t idx) +{ + uint32_t n; + + BUG_ON_HOT(!idx || idx >= head->buf.size); + n = head[idx].next; + + /* if the element was the last one (head[idx].next == ~0) then the + * chain is entirely gone and the caller is no longer in the list. + */ + if (n == ~0) { + BUG_ON_HOT(!head->buf.head); + head->buf.head--; // #users + n = 0; // no next + } + + /* If the free list was empty (next==0), this element becomes both the + * first and the last one, otherwise it inserts itself before the + * previous first free element. + */ + head[idx].next = head->next ? head->next : ~0U; + head->next = idx; + BUG_ON_HOT(!head->buf.data); + head->buf.data--; // one less allocated + return n; +} + #endif /* _HAPROXY_BUF_H */ /* diff --git a/src/buf.c b/src/buf.c index 4f455dba5..b81652ac0 100644 --- a/src/buf.c +++ b/src/buf.c @@ -730,3 +730,82 @@ int b_peek_varint(struct buffer *b, size_t ofs, uint64_t *vptr) size = b->data - ofs - data; return size; } + +/* + * Buffer List management. + */ + +/* Deinits an array of buffer list. It's the caller's responsibility to check + * that all buffers were already released. This should be done before any + * free() of the array. + */ +void bl_deinit(struct bl_elem *head) +{ + BUG_ON_HOT( + /* make sure that all elements are properly released, i.e. all + * are reachable from the free list. + */ + ({ + uint32_t elem = 0, free = 1; + if (head->next && !head->buf.data && !head->buf.head) { + do { + free++; + elem = head[elem].next ? head[elem].next : elem + 1; + } while (elem != ~0 && elem != head->buf.size); + } + free != head->buf.size; + }), "bl_deinit() of a non-completely released list"); +} + +/* Gets the index of a spare entry in the buffer list, to be used after element + * of index . It is detached, appended to the end of the existing list and + * marked as the last one. If is zero, the caller requests the creation + * of a new list entry. If no more buffer slots are available, the function + * returns zero. + */ +uint32_t bl_get(struct bl_elem *head, uint32_t idx) +{ + uint32_t e, n; + + BUG_ON_HOT(idx >= head->buf.size); + + /* Get the first free element. In the head it's always a valid index or + * 0 to indicate the end of list. We can then always dereference it, + * and if 0 (empty, which is rare), it'll loop back to itself. This + * allows us to save a test in the fast path. + */ + e = head->next; // element to be allocated + n = head[e].next; // next one to replace the free list's top + if (!n) { + /* Happens only with a freshly initialized array, or when the + * free list is depleted (e==0). + */ + if (!e) + goto done; + + /* n is in the free area till the end, let's report the next + * free entry, otherwise leave it at zero to mark the end of + * the free list. + */ + if (e + 1 != head->buf.size) + n = e + 1; + } + + head->next = n == ~0U ? 0 : n; + head->buf.data++; + + if (idx) { + /* append to a tail: idx must point to a tail */ + BUG_ON_HOT(head[idx].next != ~0); + head[idx].next = e; + } + else { + /* allocate a new user and offer it this slot */ + head->buf.head++; // #users + } + + head[e].next = ~0; // mark the end of list + done: + /* and finally return the element's index */ + return e; +}