MINOR: cpu-topo: assign clusters to cores without and renumber them
Due to the previous commit we can end up with cores not assigned any cluster ID. For this, at the end we sort the CPUs by topology and assign cluster IDs to remaining CPUs based on pkg/node/llc. For example an 14900 now shows 5 clusters, one for the 8 p-cores, and 4 of 4 e-cores each. The local cluster numbers are per (node,pkg) ID so that any rule could easily be applied on them, but we also keep the global numbers that will help with thread group assignment. We still need to force to assign distinct cluster IDs to cores running on a different L3. For example the EPYC 74F3 is reported as having 8 different L3s (which is true) and only one cluster. Here we introduce a new function "cpu_compose_clusters()" that is called from the main init code just after cpu_detect_topology() so that it's not OS-dependent. It deals with this renumbering of all clusters in topology order, taking care of considering any distinct LLC as being on a distinct cluster.
This commit is contained in:
parent
385360fe81
commit
af648c7b58
@ -28,6 +28,9 @@ int cpu_detect_usable(void);
|
||||
/* detect the CPU topology based on info in /sys */
|
||||
int cpu_detect_topology(void);
|
||||
|
||||
/* compose clusters */
|
||||
void cpu_compose_clusters(void);
|
||||
|
||||
/* Detects CPUs that are bound to the current process. Returns the number of
|
||||
* CPUs detected or 0 if the detection failed.
|
||||
*/
|
||||
|
@ -415,6 +415,54 @@ static int cpu_topo_get_maxcpus(void)
|
||||
return abs_max;
|
||||
}
|
||||
|
||||
/* This function is responsible for composing clusters based on existing info
|
||||
* on the CPU topology.
|
||||
*/
|
||||
void cpu_compose_clusters(void)
|
||||
{
|
||||
int cpu;
|
||||
int curr_gid, prev_gid;
|
||||
int curr_lid, prev_lid;
|
||||
|
||||
/* Now we'll sort CPUs by topology and assign cluster IDs to those that
|
||||
* don't yet have one, based on the die/pkg/llc.
|
||||
*/
|
||||
cpu_reorder_by_locality(ha_cpu_topo, cpu_topo_maxcpus);
|
||||
|
||||
prev_gid = prev_lid = -2; // make sure it cannot match even unassigned ones
|
||||
curr_gid = curr_lid = -1;
|
||||
for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
|
||||
/* renumber clusters and assign unassigned ones at the same
|
||||
* time. For this, we'll compare pkg/die/llc with the last
|
||||
* CPU's and verify if we need to create a new cluster ID.
|
||||
* Note that some platforms don't report cache. The locao value
|
||||
* is local to the pkg+node combination so that we reset it
|
||||
* when changing, contrary to the global one which grows.
|
||||
*/
|
||||
if (!cpu ||
|
||||
(ha_cpu_topo[cpu].pk_id != ha_cpu_topo[cpu-1].pk_id) ||
|
||||
(ha_cpu_topo[cpu].no_id != ha_cpu_topo[cpu-1].no_id)) {
|
||||
curr_gid++;
|
||||
curr_lid = 0;
|
||||
}
|
||||
else if (ha_cpu_topo[cpu].cl_gid != prev_gid ||
|
||||
ha_cpu_topo[cpu].ca_id[4] != ha_cpu_topo[cpu-1].ca_id[4] ||
|
||||
(ha_cpu_topo[cpu].ca_id[4] < 0 && // no l4 ? check L3
|
||||
((ha_cpu_topo[cpu].ca_id[3] != ha_cpu_topo[cpu-1].ca_id[3]) ||
|
||||
(ha_cpu_topo[cpu].ca_id[3] < 0 && // no l3 ? check L2
|
||||
(ha_cpu_topo[cpu].ca_id[2] != ha_cpu_topo[cpu-1].ca_id[2]))))) {
|
||||
curr_gid++;
|
||||
curr_lid++;
|
||||
}
|
||||
prev_gid = ha_cpu_topo[cpu].cl_gid;
|
||||
prev_lid = ha_cpu_topo[cpu].cl_lid;
|
||||
ha_cpu_topo[cpu].cl_gid = curr_gid;
|
||||
ha_cpu_topo[cpu].cl_lid = curr_lid;
|
||||
}
|
||||
|
||||
cpu_reorder_by_index(ha_cpu_topo, cpu_topo_maxcpus);
|
||||
}
|
||||
|
||||
/* CPU topology detection below, OS-specific */
|
||||
|
||||
#if defined(__linux__)
|
||||
|
@ -2065,6 +2065,9 @@ static void step_init_2(int argc, char** argv)
|
||||
|
||||
/* Now detect how CPUs are arranged */
|
||||
cpu_detect_topology();
|
||||
|
||||
/* compose clusters */
|
||||
cpu_compose_clusters();
|
||||
#endif
|
||||
|
||||
/* detect the optimal thread-groups and nbthreads if not set */
|
||||
|
Loading…
x
Reference in New Issue
Block a user