From 4a6eaf6c5e080fb187714749e897340d7b08cfc0 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 27 Feb 2025 19:44:37 +0100 Subject: [PATCH] MINOR: cpu-topo: add a function to sort by cluster+capacity The purpose here is to detect heterogenous clusters which are not properly reported, based on the exposed information about the cores capacity. The algorithm here consists in sorting CPUs by capacity within a cluster, and considering as equal all those which have 5% or less difference in capacity with the previous one. This allows large clusters of more than 5% total between extremities, while keeping apart those where the limit is more pronounced. This is quite common in embedded environments with big.little systems, as well as on some laptops. --- include/haproxy/cpu_topo.h | 1 + src/cpu_topo.c | 95 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/include/haproxy/cpu_topo.h b/include/haproxy/cpu_topo.h index dadd900fd..ae86b02f2 100644 --- a/include/haproxy/cpu_topo.h +++ b/include/haproxy/cpu_topo.h @@ -66,5 +66,6 @@ void cpu_reorder_by_cluster(struct ha_cpu_topo *topo, int entries); int _cmp_cpu_index(const void *a, const void *b); int _cmp_cpu_locality(const void *a, const void *b); int _cmp_cpu_cluster(const void *a, const void *b); +int _cmp_cpu_cluster_capa(const void *a, const void *b); #endif /* _HAPROXY_CPU_TOPO_H */ diff --git a/src/cpu_topo.c b/src/cpu_topo.c index 1510a61f4..5c3479bbb 100644 --- a/src/cpu_topo.c +++ b/src/cpu_topo.c @@ -322,6 +322,95 @@ int _cmp_cpu_locality(const void *a, const void *b) return 0; } +/* function used by qsort to compare two hwcpus and arrange them by vicinity + * then capacity. -1 says ab. The goal is to detect different + * CPU capacities among clusters. + */ +int _cmp_cpu_cluster_capa(const void *a, const void *b) +{ + const struct ha_cpu_topo *l = (const struct ha_cpu_topo *)a; + const struct ha_cpu_topo *r = (const struct ha_cpu_topo *)b; + + /* first, online vs offline */ + if (!(l->st & HA_CPU_F_EXCL_MASK) && (r->st & HA_CPU_F_EXCL_MASK)) + return -1; + + if (!(r->st & HA_CPU_F_EXCL_MASK) && (l->st & HA_CPU_F_EXCL_MASK)) + return 1; + + /* next, package ID */ + if (l->pk_id >= 0 && l->pk_id < r->pk_id) + return -1; + if (l->pk_id > r->pk_id && r->pk_id >= 0) + return 1; + + /* next, node ID */ + if (l->no_id >= 0 && l->no_id < r->no_id) + return -1; + if (l->no_id > r->no_id && r->no_id >= 0) + return 1; + + /* next, L4 */ + if (l->ca_id[4] >= 0 && l->ca_id[4] < r->ca_id[4]) + return -1; + if (l->ca_id[4] > r->ca_id[4] && r->ca_id[4] >= 0) + return 1; + + /* next, L3 */ + if (l->ca_id[3] >= 0 && l->ca_id[3] < r->ca_id[3]) + return -1; + if (l->ca_id[3] > r->ca_id[3] && r->ca_id[3] >= 0) + return 1; + + /* next, cluster */ + if (l->cl_gid >= 0 && l->cl_gid < r->cl_gid) + return -1; + if (l->cl_gid > r->cl_gid && r->cl_gid >= 0) + return 1; + + /* Same cluster. For CPU capacity, we tolerate a +/- 5% margin however + * so that if some values come from measurement we don't end up + * reorganizing everything. + */ + if (l->capa > 0 && (int)l->capa * 19 > (int)r->capa * 20) + return -1; + if (r->capa > 0 && (int)l->capa * 20 < (int)r->capa * 19) + return 1; + + /* next, L2 */ + if (l->ca_id[2] >= 0 && l->ca_id[2] < r->ca_id[2]) + return -1; + if (l->ca_id[2] > r->ca_id[2] && r->ca_id[2] >= 0) + return 1; + + /* next, thread set */ + if (l->ts_id >= 0 && l->ts_id < r->ts_id) + return -1; + if (l->ts_id > r->ts_id && r->ts_id >= 0) + return 1; + + /* next, L1 */ + if (l->ca_id[1] >= 0 && l->ca_id[1] < r->ca_id[1]) + return -1; + if (l->ca_id[1] > r->ca_id[1] && r->ca_id[1] >= 0) + return 1; + + /* next, L0 */ + if (l->ca_id[0] >= 0 && l->ca_id[0] < r->ca_id[0]) + return -1; + if (l->ca_id[0] > r->ca_id[0] && r->ca_id[0] >= 0) + return 1; + + /* next, IDX, so that SMT ordering is preserved */ + if (l->idx >= 0 && l->idx < r->idx) + return -1; + if (l->idx > r->idx && r->idx >= 0) + return 1; + + /* exactly the same */ + return 0; +} + /* function used by qsort to compare two hwcpus and arrange them by cluster to * make sure no cluster crosses L3 boundaries. -1 says ab. It's * only used during topology detection. @@ -398,6 +487,12 @@ void cpu_reorder_by_cluster(struct ha_cpu_topo *topo, int entries) qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster); } +/* re-order a CPU topology array by locality and capacity to detect clusters. */ +void cpu_reorder_by_cluster_capa(struct ha_cpu_topo *topo, int entries) +{ + qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster_capa); +} + /* returns an optimal maxcpus for the current system. It will take into * account what is reported by the OS, if any, otherwise will fall back * to the cpuset size, which serves as an upper limit in any case.