diff --git a/include/haproxy/cpu_topo.h b/include/haproxy/cpu_topo.h index dadd900fd..ae86b02f2 100644 --- a/include/haproxy/cpu_topo.h +++ b/include/haproxy/cpu_topo.h @@ -66,5 +66,6 @@ void cpu_reorder_by_cluster(struct ha_cpu_topo *topo, int entries); int _cmp_cpu_index(const void *a, const void *b); int _cmp_cpu_locality(const void *a, const void *b); int _cmp_cpu_cluster(const void *a, const void *b); +int _cmp_cpu_cluster_capa(const void *a, const void *b); #endif /* _HAPROXY_CPU_TOPO_H */ diff --git a/src/cpu_topo.c b/src/cpu_topo.c index 1510a61f4..5c3479bbb 100644 --- a/src/cpu_topo.c +++ b/src/cpu_topo.c @@ -322,6 +322,95 @@ int _cmp_cpu_locality(const void *a, const void *b) return 0; } +/* function used by qsort to compare two hwcpus and arrange them by vicinity + * then capacity. -1 says ab. The goal is to detect different + * CPU capacities among clusters. + */ +int _cmp_cpu_cluster_capa(const void *a, const void *b) +{ + const struct ha_cpu_topo *l = (const struct ha_cpu_topo *)a; + const struct ha_cpu_topo *r = (const struct ha_cpu_topo *)b; + + /* first, online vs offline */ + if (!(l->st & HA_CPU_F_EXCL_MASK) && (r->st & HA_CPU_F_EXCL_MASK)) + return -1; + + if (!(r->st & HA_CPU_F_EXCL_MASK) && (l->st & HA_CPU_F_EXCL_MASK)) + return 1; + + /* next, package ID */ + if (l->pk_id >= 0 && l->pk_id < r->pk_id) + return -1; + if (l->pk_id > r->pk_id && r->pk_id >= 0) + return 1; + + /* next, node ID */ + if (l->no_id >= 0 && l->no_id < r->no_id) + return -1; + if (l->no_id > r->no_id && r->no_id >= 0) + return 1; + + /* next, L4 */ + if (l->ca_id[4] >= 0 && l->ca_id[4] < r->ca_id[4]) + return -1; + if (l->ca_id[4] > r->ca_id[4] && r->ca_id[4] >= 0) + return 1; + + /* next, L3 */ + if (l->ca_id[3] >= 0 && l->ca_id[3] < r->ca_id[3]) + return -1; + if (l->ca_id[3] > r->ca_id[3] && r->ca_id[3] >= 0) + return 1; + + /* next, cluster */ + if (l->cl_gid >= 0 && l->cl_gid < r->cl_gid) + return -1; + if (l->cl_gid > r->cl_gid && r->cl_gid >= 0) + return 1; + + /* Same cluster. For CPU capacity, we tolerate a +/- 5% margin however + * so that if some values come from measurement we don't end up + * reorganizing everything. + */ + if (l->capa > 0 && (int)l->capa * 19 > (int)r->capa * 20) + return -1; + if (r->capa > 0 && (int)l->capa * 20 < (int)r->capa * 19) + return 1; + + /* next, L2 */ + if (l->ca_id[2] >= 0 && l->ca_id[2] < r->ca_id[2]) + return -1; + if (l->ca_id[2] > r->ca_id[2] && r->ca_id[2] >= 0) + return 1; + + /* next, thread set */ + if (l->ts_id >= 0 && l->ts_id < r->ts_id) + return -1; + if (l->ts_id > r->ts_id && r->ts_id >= 0) + return 1; + + /* next, L1 */ + if (l->ca_id[1] >= 0 && l->ca_id[1] < r->ca_id[1]) + return -1; + if (l->ca_id[1] > r->ca_id[1] && r->ca_id[1] >= 0) + return 1; + + /* next, L0 */ + if (l->ca_id[0] >= 0 && l->ca_id[0] < r->ca_id[0]) + return -1; + if (l->ca_id[0] > r->ca_id[0] && r->ca_id[0] >= 0) + return 1; + + /* next, IDX, so that SMT ordering is preserved */ + if (l->idx >= 0 && l->idx < r->idx) + return -1; + if (l->idx > r->idx && r->idx >= 0) + return 1; + + /* exactly the same */ + return 0; +} + /* function used by qsort to compare two hwcpus and arrange them by cluster to * make sure no cluster crosses L3 boundaries. -1 says ab. It's * only used during topology detection. @@ -398,6 +487,12 @@ void cpu_reorder_by_cluster(struct ha_cpu_topo *topo, int entries) qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster); } +/* re-order a CPU topology array by locality and capacity to detect clusters. */ +void cpu_reorder_by_cluster_capa(struct ha_cpu_topo *topo, int entries) +{ + qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster_capa); +} + /* returns an optimal maxcpus for the current system. It will take into * account what is reported by the OS, if any, otherwise will fall back * to the cpuset size, which serves as an upper limit in any case.