MINOR: cpu-topo: add a 'first-usable-node' cpu policy

This is a reimplemlentation of the current default policy. It binds to
the first node having usable CPUs if found, and drops CPUs from the
second and next nodes.
This commit is contained in:
Willy Tarreau 2025-03-06 08:40:21 +01:00
parent 156430ceb6
commit 7fc6cdd0b1
2 changed files with 114 additions and 0 deletions

View File

@ -1522,6 +1522,7 @@ The following keywords are supported in the "global" section :
- chroot
- cluster-secret
- cpu-map
- cpu-policy
- cpu-set
- crt-base
- daemon
@ -1947,6 +1948,41 @@ cpu-map [auto:]<thread-group>[/<thread-set>] <cpu-set>[,...] [...]
cpu-map 4/1-40 40-79,120-159
cpu-policy <policy>
Selects the CPU allocation policy to be used.
On multi-CPU systems, there can be plenty of reasons for not using all
available CPU cores, and/or for grouping them into different thread groups,
for performance, latency, cost, or system-wide resource management. The
"cpu-set" directive already allows to evict a number of them, but once done,
it is necessary to decide how to assign the remaining ones to threads and
thread groups.
This mapping is normally performed using the "cpu-map" directive, though it
can be particularly difficult to maintain on heterogenous systems.
The "cpu-policy" directive chooses between a small number of allocation
policies which one to use instead, when "cpu-map" is not used. The following
policies are currently supported:
- none no particular post-selection is performed. All enabled
CPUs will be usable, and if the number of threads is
not set, it will be set to the number of available CPUs
but no more than 32 for 32-bit systems or 64 for 64-bit
systems, per thread-group. The number of thread-groups,
if not set, will be set to 1.
- first-usable-node if the CPUs were not previously restricted at boot (for
example using the "taskset" utility), and if the
"nbthread" directive was not set, then the first NUMA
node with enabled CPUs will be used, and this number of
CPUs will be used as the number of threads. A single
thread group will be enabled with all of them, within
the limit of 32 or 64 depending on the system. This is
the default policy.
See also: "cpu-map", "cpu-set", "nbthread"
cpu-set <directive>...
Allows to symbolically describe what sets of CPUs to run on. The directive
supports the following keyword:

View File

@ -10,6 +10,7 @@
#include <haproxy/cpuset.h>
#include <haproxy/cpu_topo.h>
#include <haproxy/global.h>
#include <haproxy/log.h>
#include <haproxy/tools.h>
/* for cpu_set.flags below */
@ -50,8 +51,11 @@ struct cpu_set_cfg {
static int cpu_policy = 0;
/* list of CPU policies for "cpu-policy". The default one is the first one. */
static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
static struct ha_cpu_policy ha_cpu_policy[] = {
{ .name = "none", .desc = "use all available CPUs", .fct = NULL },
{ .name = "first-usable-node", .desc = "use only first usable node if nbthreads not set", .fct = cpu_policy_first_usable_node },
{ 0 } /* end */
};
@ -881,6 +885,80 @@ void cpu_refine_cpusets(void)
}
}
/* the "first-usable-node" cpu-policy: historical one
* - does nothing if numa_cpu_mapping is not set
* - does nothing if nbthread is set
* - does nothing if the set of CPUs had been set manually using taskset
* - does nothing if the first node couldn't be determined
* Otherwise ignores all CPUs not on the first node.
*/
static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin, int gmax, char **err)
{
struct hap_cpuset node_cpu_set;
int first_node_id = -1;
int second_node_id = -1;
int cpu;
int cpu_count;
int grp, thr;
if (!global.numa_cpu_mapping)
return 0;
if (global.nbthread)
return 0;
if (cpu_mask_forced)
return 0;
/* determine first and second nodes with usable CPUs */
for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
if (ha_cpu_topo[cpu].st & HA_CPU_F_EXCL_MASK)
continue;
if (ha_cpu_topo[cpu].no_id >= 0 &&
ha_cpu_topo[cpu].no_id != first_node_id) {
if (first_node_id < 0)
first_node_id = ha_cpu_topo[cpu].no_id;
else {
second_node_id = ha_cpu_topo[cpu].no_id;
break;
}
}
}
/* no information found on a second node */
if (second_node_id < 0)
return 0;
/* ignore all CPUs of other nodes, count the remaining valid ones,
* and make a CPU set of them.
*/
ha_cpuset_zero(&node_cpu_set);
for (cpu = cpu_count = 0; cpu <= cpu_topo_lastcpu; cpu++) {
if (ha_cpu_topo[cpu].no_id != first_node_id)
ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
else if (!(ha_cpu_topo[cpu].st & HA_CPU_F_EXCL_MASK)) {
ha_cpuset_set(&node_cpu_set, ha_cpu_topo[cpu].idx);
cpu_count++;
}
}
/* assign all threads of all thread groups to this node */
for (grp = 0; grp < MAX_TGROUPS; grp++)
for (thr = 0; thr < MAX_THREADS_PER_GROUP; thr++)
ha_cpuset_assign(&cpu_map[grp].thread[thr], &node_cpu_set);
if (tmin <= cpu_count && cpu_count < tmax)
tmax = cpu_count;
ha_diag_warning("Multi-socket cpu detected, automatically binding on active CPUs of '%d' (%u active cpu(s))\n", first_node_id, cpu_count);
if (!global.nbthread)
global.nbthread = tmax;
return 0;
}
/* apply the chosen CPU policy if no cpu-map was forced. Returns < 0 on failure
* with a message in *err that must be freed by the caller if non-null.
*/