diff --git a/src/cpu_topo.c b/src/cpu_topo.c index cd823272f..b44c1005b 100644 --- a/src/cpu_topo.c +++ b/src/cpu_topo.c @@ -973,6 +973,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, int cpu, cpu_start; int cpu_count; int cid, lcid; + int thr_per_grp, nb_grp; int thr; if (global.nbthread) @@ -984,7 +985,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, /* iterate over each new cluster */ lcid = -1; cpu_start = 0; - while (global.nbtgroups < MAX_TGROUPS) { + while (global.nbtgroups < MAX_TGROUPS && global.nbthread < MAX_THREADS) { ha_cpuset_zero(&node_cpu_set); cid = -1; cpu_count = 0; @@ -1010,35 +1011,49 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, * number of CPUs in this cluster, and cpu_start is the next * cpu to restart from to scan for new clusters. */ - if (cid < 0) + if (cid < 0 || !cpu_count) break; - /* check that we're still within limits */ - if (cpu_count > MAX_THREADS_PER_GROUP) - cpu_count = MAX_THREADS_PER_GROUP; + /* check that we're still within limits. If there are too many + * CPUs but enough groups left, we'll try to make more smaller + * groups, of the closest size each. + */ + nb_grp = (cpu_count + MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP; + if (nb_grp > MAX_TGROUPS - global.nbtgroups) + nb_grp = MAX_TGROUPS - global.nbtgroups; + thr_per_grp = (cpu_count + nb_grp - 1) / nb_grp; + if (thr_per_grp > MAX_THREADS_PER_GROUP) + thr_per_grp = MAX_THREADS_PER_GROUP; - if (cpu_count + global.nbthread > MAX_THREADS) - cpu_count = MAX_THREADS - global.nbthread; + while (nb_grp && cpu_count > 0) { + /* create at most thr_per_grp threads */ + if (thr_per_grp > cpu_count) + thr_per_grp = cpu_count; - if (cpu_count <= 0) - break; + if (thr_per_grp + global.nbthread > MAX_THREADS) + thr_per_grp = MAX_THREADS - global.nbthread; - /* let's create the new thread group */ - ha_tgroup_info[global.nbtgroups].base = global.nbthread; - ha_tgroup_info[global.nbtgroups].count = cpu_count; + /* let's create the new thread group */ + ha_tgroup_info[global.nbtgroups].base = global.nbthread; + ha_tgroup_info[global.nbtgroups].count = thr_per_grp; - /* assign to this group the required number of threads */ - for (thr = 0; thr < cpu_count; thr++) { - ha_thread_info[thr + global.nbthread].tgid = global.nbtgroups + 1; - ha_thread_info[thr + global.nbthread].tg = &ha_tgroup_info[global.nbtgroups]; - ha_thread_info[thr + global.nbthread].tg_ctx = &ha_tgroup_ctx[global.nbtgroups]; - /* map these threads to all the CPUs */ - ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set); + /* assign to this group the required number of threads */ + for (thr = 0; thr < thr_per_grp; thr++) { + ha_thread_info[thr + global.nbthread].tgid = global.nbtgroups + 1; + ha_thread_info[thr + global.nbthread].tg = &ha_tgroup_info[global.nbtgroups]; + ha_thread_info[thr + global.nbthread].tg_ctx = &ha_tgroup_ctx[global.nbtgroups]; + /* map these threads to all the CPUs */ + ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set); + } + + cpu_count -= thr_per_grp; + global.nbthread += thr_per_grp; + global.nbtgroups++; + if (global.nbtgroups >= MAX_TGROUPS || global.nbthread >= MAX_THREADS) + break; } lcid = cid; // last cluster_id - global.nbthread += cpu_count; - global.nbtgroups++; } if (global.nbthread)