MINOR: cpu-topo: assign clusters to cores without and renumber them

Due to the previous commit we can end up with cores not assigned
any cluster ID. For this, at the end we sort the CPUs by topology
and assign cluster IDs to remaining CPUs based on pkg/node/llc.
For example an 14900 now shows 5 clusters, one for the 8 p-cores,
and 4 of 4 e-cores each.

The local cluster numbers are per (node,pkg) ID so that any rule could
easily be applied on them, but we also keep the global numbers that
will help with thread group assignment.

We still need to force to assign distinct cluster IDs to cores
running on a different L3. For example the EPYC 74F3 is reported
as having 8 different L3s (which is true) and only one cluster.

Here we introduce a new function "cpu_compose_clusters()" that is called
from the main init code just after cpu_detect_topology() so that it's
not OS-dependent. It deals with this renumbering of all clusters in
topology order, taking care of considering any distinct LLC as being
on a distinct cluster.
This commit is contained in:
Willy Tarreau 2025-01-10 18:20:53 +01:00
parent 385360fe81
commit af648c7b58
3 changed files with 54 additions and 0 deletions

View File

@ -28,6 +28,9 @@ int cpu_detect_usable(void);
/* detect the CPU topology based on info in /sys */
int cpu_detect_topology(void);
/* compose clusters */
void cpu_compose_clusters(void);
/* Detects CPUs that are bound to the current process. Returns the number of
* CPUs detected or 0 if the detection failed.
*/

View File

@ -415,6 +415,54 @@ static int cpu_topo_get_maxcpus(void)
return abs_max;
}
/* This function is responsible for composing clusters based on existing info
* on the CPU topology.
*/
void cpu_compose_clusters(void)
{
int cpu;
int curr_gid, prev_gid;
int curr_lid, prev_lid;
/* Now we'll sort CPUs by topology and assign cluster IDs to those that
* don't yet have one, based on the die/pkg/llc.
*/
cpu_reorder_by_locality(ha_cpu_topo, cpu_topo_maxcpus);
prev_gid = prev_lid = -2; // make sure it cannot match even unassigned ones
curr_gid = curr_lid = -1;
for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
/* renumber clusters and assign unassigned ones at the same
* time. For this, we'll compare pkg/die/llc with the last
* CPU's and verify if we need to create a new cluster ID.
* Note that some platforms don't report cache. The locao value
* is local to the pkg+node combination so that we reset it
* when changing, contrary to the global one which grows.
*/
if (!cpu ||
(ha_cpu_topo[cpu].pk_id != ha_cpu_topo[cpu-1].pk_id) ||
(ha_cpu_topo[cpu].no_id != ha_cpu_topo[cpu-1].no_id)) {
curr_gid++;
curr_lid = 0;
}
else if (ha_cpu_topo[cpu].cl_gid != prev_gid ||
ha_cpu_topo[cpu].ca_id[4] != ha_cpu_topo[cpu-1].ca_id[4] ||
(ha_cpu_topo[cpu].ca_id[4] < 0 && // no l4 ? check L3
((ha_cpu_topo[cpu].ca_id[3] != ha_cpu_topo[cpu-1].ca_id[3]) ||
(ha_cpu_topo[cpu].ca_id[3] < 0 && // no l3 ? check L2
(ha_cpu_topo[cpu].ca_id[2] != ha_cpu_topo[cpu-1].ca_id[2]))))) {
curr_gid++;
curr_lid++;
}
prev_gid = ha_cpu_topo[cpu].cl_gid;
prev_lid = ha_cpu_topo[cpu].cl_lid;
ha_cpu_topo[cpu].cl_gid = curr_gid;
ha_cpu_topo[cpu].cl_lid = curr_lid;
}
cpu_reorder_by_index(ha_cpu_topo, cpu_topo_maxcpus);
}
/* CPU topology detection below, OS-specific */
#if defined(__linux__)

View File

@ -2065,6 +2065,9 @@ static void step_init_2(int argc, char** argv)
/* Now detect how CPUs are arranged */
cpu_detect_topology();
/* compose clusters */
cpu_compose_clusters();
#endif
/* detect the optimal thread-groups and nbthreads if not set */