MINOR: cpu-topo: add a function to sort by cluster+capacity

The purpose here is to detect heterogenous clusters which are not
properly reported, based on the exposed information about the cores
capacity. The algorithm here consists in sorting CPUs by capacity
within a cluster, and considering as equal all those which have 5%
or less difference in capacity with the previous one. This allows
large clusters of more than 5% total between extremities, while
keeping apart those where the limit is more pronounced. This is
quite common in embedded environments with big.little systems, as
well as on some laptops.
This commit is contained in:
Willy Tarreau 2025-02-27 19:44:37 +01:00
parent 0290b807dd
commit 4a6eaf6c5e
2 changed files with 96 additions and 0 deletions

View File

@ -66,5 +66,6 @@ void cpu_reorder_by_cluster(struct ha_cpu_topo *topo, int entries);
int _cmp_cpu_index(const void *a, const void *b);
int _cmp_cpu_locality(const void *a, const void *b);
int _cmp_cpu_cluster(const void *a, const void *b);
int _cmp_cpu_cluster_capa(const void *a, const void *b);
#endif /* _HAPROXY_CPU_TOPO_H */

View File

@ -322,6 +322,95 @@ int _cmp_cpu_locality(const void *a, const void *b)
return 0;
}
/* function used by qsort to compare two hwcpus and arrange them by vicinity
* then capacity. -1 says a<b, 1 says a>b. The goal is to detect different
* CPU capacities among clusters.
*/
int _cmp_cpu_cluster_capa(const void *a, const void *b)
{
const struct ha_cpu_topo *l = (const struct ha_cpu_topo *)a;
const struct ha_cpu_topo *r = (const struct ha_cpu_topo *)b;
/* first, online vs offline */
if (!(l->st & HA_CPU_F_EXCL_MASK) && (r->st & HA_CPU_F_EXCL_MASK))
return -1;
if (!(r->st & HA_CPU_F_EXCL_MASK) && (l->st & HA_CPU_F_EXCL_MASK))
return 1;
/* next, package ID */
if (l->pk_id >= 0 && l->pk_id < r->pk_id)
return -1;
if (l->pk_id > r->pk_id && r->pk_id >= 0)
return 1;
/* next, node ID */
if (l->no_id >= 0 && l->no_id < r->no_id)
return -1;
if (l->no_id > r->no_id && r->no_id >= 0)
return 1;
/* next, L4 */
if (l->ca_id[4] >= 0 && l->ca_id[4] < r->ca_id[4])
return -1;
if (l->ca_id[4] > r->ca_id[4] && r->ca_id[4] >= 0)
return 1;
/* next, L3 */
if (l->ca_id[3] >= 0 && l->ca_id[3] < r->ca_id[3])
return -1;
if (l->ca_id[3] > r->ca_id[3] && r->ca_id[3] >= 0)
return 1;
/* next, cluster */
if (l->cl_gid >= 0 && l->cl_gid < r->cl_gid)
return -1;
if (l->cl_gid > r->cl_gid && r->cl_gid >= 0)
return 1;
/* Same cluster. For CPU capacity, we tolerate a +/- 5% margin however
* so that if some values come from measurement we don't end up
* reorganizing everything.
*/
if (l->capa > 0 && (int)l->capa * 19 > (int)r->capa * 20)
return -1;
if (r->capa > 0 && (int)l->capa * 20 < (int)r->capa * 19)
return 1;
/* next, L2 */
if (l->ca_id[2] >= 0 && l->ca_id[2] < r->ca_id[2])
return -1;
if (l->ca_id[2] > r->ca_id[2] && r->ca_id[2] >= 0)
return 1;
/* next, thread set */
if (l->ts_id >= 0 && l->ts_id < r->ts_id)
return -1;
if (l->ts_id > r->ts_id && r->ts_id >= 0)
return 1;
/* next, L1 */
if (l->ca_id[1] >= 0 && l->ca_id[1] < r->ca_id[1])
return -1;
if (l->ca_id[1] > r->ca_id[1] && r->ca_id[1] >= 0)
return 1;
/* next, L0 */
if (l->ca_id[0] >= 0 && l->ca_id[0] < r->ca_id[0])
return -1;
if (l->ca_id[0] > r->ca_id[0] && r->ca_id[0] >= 0)
return 1;
/* next, IDX, so that SMT ordering is preserved */
if (l->idx >= 0 && l->idx < r->idx)
return -1;
if (l->idx > r->idx && r->idx >= 0)
return 1;
/* exactly the same */
return 0;
}
/* function used by qsort to compare two hwcpus and arrange them by cluster to
* make sure no cluster crosses L3 boundaries. -1 says a<b, 1 says a>b. It's
* only used during topology detection.
@ -398,6 +487,12 @@ void cpu_reorder_by_cluster(struct ha_cpu_topo *topo, int entries)
qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster);
}
/* re-order a CPU topology array by locality and capacity to detect clusters. */
void cpu_reorder_by_cluster_capa(struct ha_cpu_topo *topo, int entries)
{
qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster_capa);
}
/* returns an optimal maxcpus for the current system. It will take into
* account what is reported by the OS, if any, otherwise will fall back
* to the cpuset size, which serves as an upper limit in any case.