MINOR: cpu-topo: add new cpu-policies "group-by-2-clusters" and above

This adds "group-by-{2,3,4}-clusters", which, as its name implies, create one thread group per X clusters. This can be useful when CPUs are split into too small clusters, as well as when the total number of assigned cores is not even between the clusters, to try to spread the load between less different ones.
2025-03-31 15:00:57 +02:00 · 2025-03-31 15:00:57 +02:00 · 9f00702dc6
commit 9f00702dc6
parent 1e9a2529aa
2 changed files with 41 additions and 3 deletions
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@ -1995,6 +1995,33 @@ cpu-policy <policy>
                        the limit of 32 or 64 depending on the system. This is
                        the default policy.

+   - group-by-2-clusters same as "group-by-cluster" but create a group every
+                        two clusters. This can make sense on CPUs having many
+                        clusters of few cores each, to avoid creating many
+                        groups, or to smooth the distribution a little bit when
+                        not all cores are in use. Please note that it can have
+                        very bad performance effects when the communication
+                        between clusters is slow. This is generally recommended
+                        against.
+
+   - group-by-3-clusters same as "group-by-cluster" but create a group every
+                        three clusters. This can make sense on CPUs having many
+                        clusters of few cores each, to avoid creating many
+                        groups, or to smooth the distribution a little bit when
+                        not all cores are in use. Please note that it can have
+                        very bad performance effects when the communication
+                        between clusters is slow. This is generally recommended
+                        against.
+
+   - group-by-4-clusters same as "group-by-cluster" but create a group every
+                        four clusters. This can make sense on CPUs having many
+                        clusters of few cores each, to avoid creating many
+                        groups, or to smooth the distribution a little bit when
+                        not all cores are in use. Please note that it can have
+                        very bad performance effects when the communication
+                        between clusters is slow. This is generally recommended
+                        against.
+
   - group-by-cluster   if neither "nbthread" not "nbtgroups" were set, then
                        one thread group is created for each CPU cluster with
                        available CPUs, each with as many threads as CPUs. All
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@ -61,6 +61,9 @@ static struct ha_cpu_policy ha_cpu_policy[] = {
 	{ .name = "none",               .desc = "use all available CPUs",                           .fct = NULL   },
 	{ .name = "first-usable-node",  .desc = "use only first usable node if nbthreads not set",  .fct = cpu_policy_first_usable_node, .arg = 0 },
 	{ .name = "group-by-cluster",   .desc = "make one thread group per core cluster",           .fct = cpu_policy_group_by_cluster , .arg = 1 },
+	{ .name = "group-by-2-clusters",.desc = "make one thread group per 2 core clusters",        .fct = cpu_policy_group_by_cluster , .arg = 2 },
+	{ .name = "group-by-3-clusters",.desc = "make one thread group per 3 core clusters",        .fct = cpu_policy_group_by_cluster , .arg = 3 },
+	{ .name = "group-by-4-clusters",.desc = "make one thread group per 4 core clusters",        .fct = cpu_policy_group_by_cluster , .arg = 4 },
 	{ .name = "performance",        .desc = "make one thread group per perf. core cluster",     .fct = cpu_policy_performance      , .arg = 0 },
 	{ .name = "efficiency",         .desc = "make one thread group per eff. core cluster",      .fct = cpu_policy_efficiency       , .arg = 0 },
 	{ .name = "resource",           .desc = "make one thread group from the smallest cluster",  .fct = cpu_policy_resource         , .arg = 0 },
@ -1063,6 +1066,8 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
 *  - otherwise tries to create one thread-group per cluster, with as many
 *    threads as CPUs in the cluster, and bind all the threads of this group
 *    to all the CPUs of the cluster.
+ * Also implements the variants "group-by-2-clusters", "group-by-3-clusters"
+ * and "group-by-4-clusters".
 */
 static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, int gmax, char **err)
 {
@ -1072,6 +1077,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
 	int cid, lcid;
 	int thr_per_grp, nb_grp;
 	int thr;
+	int div;

 	if (global.nbthread)
 		return 0;
@ -1082,6 +1088,11 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
 	/* iterate over each new cluster */
 	lcid = -1;
 	cpu_start = 0;
+
+	/* used as a divisor of clusters*/
+	div = ha_cpu_policy[policy].arg;
+	div = div ? div : 1;
+
 	while (global.nbtgroups < MAX_TGROUPS && global.nbthread < MAX_THREADS) {
 		ha_cpuset_zero(&node_cpu_set);
 		cid = -1; cpu_count = 0;
@ -1090,14 +1101,14 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
 			/* skip disabled and already visited CPUs */
 			if (ha_cpu_topo[cpu].st & HA_CPU_F_EXCL_MASK)
 				continue;
-			if (ha_cpu_topo[cpu].cl_gid <= lcid)
+			if ((ha_cpu_topo[cpu].cl_gid / div) <= lcid)
 				continue;

 			if (cid < 0) {
-				cid = ha_cpu_topo[cpu].cl_gid;
+				cid = ha_cpu_topo[cpu].cl_gid / div;
 				cpu_start = cpu + 1;
 			}
-			else if (cid != ha_cpu_topo[cpu].cl_gid)
+			else if (cid != ha_cpu_topo[cpu].cl_gid / div)
 				continue;

 			/* make a mask of all of this cluster's CPUs */