MEDIUM: cpu-topo: let the "group-by-cluster" split groups

When a cluster is too large to fit into a single group, let's split it
into two equal groups, which will still be allowed to use all the CPUs
of the cluster. This allows haproxy to start all the threads with a
minimum number of groups (e.g. 2x40 for 80 cores).
This commit is contained in:
Willy Tarreau 2025-03-13 15:41:00 +01:00
parent 8aeb096740
commit 96cd420dc3

View File

@ -973,6 +973,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
int cpu, cpu_start; int cpu, cpu_start;
int cpu_count; int cpu_count;
int cid, lcid; int cid, lcid;
int thr_per_grp, nb_grp;
int thr; int thr;
if (global.nbthread) if (global.nbthread)
@ -984,7 +985,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
/* iterate over each new cluster */ /* iterate over each new cluster */
lcid = -1; lcid = -1;
cpu_start = 0; cpu_start = 0;
while (global.nbtgroups < MAX_TGROUPS) { while (global.nbtgroups < MAX_TGROUPS && global.nbthread < MAX_THREADS) {
ha_cpuset_zero(&node_cpu_set); ha_cpuset_zero(&node_cpu_set);
cid = -1; cpu_count = 0; cid = -1; cpu_count = 0;
@ -1010,35 +1011,49 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
* number of CPUs in this cluster, and cpu_start is the next * number of CPUs in this cluster, and cpu_start is the next
* cpu to restart from to scan for new clusters. * cpu to restart from to scan for new clusters.
*/ */
if (cid < 0) if (cid < 0 || !cpu_count)
break; break;
/* check that we're still within limits */ /* check that we're still within limits. If there are too many
if (cpu_count > MAX_THREADS_PER_GROUP) * CPUs but enough groups left, we'll try to make more smaller
cpu_count = MAX_THREADS_PER_GROUP; * groups, of the closest size each.
*/
nb_grp = (cpu_count + MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP;
if (nb_grp > MAX_TGROUPS - global.nbtgroups)
nb_grp = MAX_TGROUPS - global.nbtgroups;
thr_per_grp = (cpu_count + nb_grp - 1) / nb_grp;
if (thr_per_grp > MAX_THREADS_PER_GROUP)
thr_per_grp = MAX_THREADS_PER_GROUP;
if (cpu_count + global.nbthread > MAX_THREADS) while (nb_grp && cpu_count > 0) {
cpu_count = MAX_THREADS - global.nbthread; /* create at most thr_per_grp threads */
if (thr_per_grp > cpu_count)
thr_per_grp = cpu_count;
if (cpu_count <= 0) if (thr_per_grp + global.nbthread > MAX_THREADS)
break; thr_per_grp = MAX_THREADS - global.nbthread;
/* let's create the new thread group */ /* let's create the new thread group */
ha_tgroup_info[global.nbtgroups].base = global.nbthread; ha_tgroup_info[global.nbtgroups].base = global.nbthread;
ha_tgroup_info[global.nbtgroups].count = cpu_count; ha_tgroup_info[global.nbtgroups].count = thr_per_grp;
/* assign to this group the required number of threads */ /* assign to this group the required number of threads */
for (thr = 0; thr < cpu_count; thr++) { for (thr = 0; thr < thr_per_grp; thr++) {
ha_thread_info[thr + global.nbthread].tgid = global.nbtgroups + 1; ha_thread_info[thr + global.nbthread].tgid = global.nbtgroups + 1;
ha_thread_info[thr + global.nbthread].tg = &ha_tgroup_info[global.nbtgroups]; ha_thread_info[thr + global.nbthread].tg = &ha_tgroup_info[global.nbtgroups];
ha_thread_info[thr + global.nbthread].tg_ctx = &ha_tgroup_ctx[global.nbtgroups]; ha_thread_info[thr + global.nbthread].tg_ctx = &ha_tgroup_ctx[global.nbtgroups];
/* map these threads to all the CPUs */ /* map these threads to all the CPUs */
ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set); ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
}
cpu_count -= thr_per_grp;
global.nbthread += thr_per_grp;
global.nbtgroups++;
if (global.nbtgroups >= MAX_TGROUPS || global.nbthread >= MAX_THREADS)
break;
} }
lcid = cid; // last cluster_id lcid = cid; // last cluster_id
global.nbthread += cpu_count;
global.nbtgroups++;
} }
if (global.nbthread) if (global.nbthread)