From 253fba01a7adf20303ec65cbe256681df588f065 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 7 Feb 2025 17:20:48 +0100 Subject: [PATCH] IMPORT: plock: lower the slope of the exponential back-off Along many tests involving both haproxy's scheduler and forwarded traffic, various exponents and algorithms were attempted for the EBO and their effects were measured. It was found that a growth in 1.25^N limited to 128k cycles consistently gives a better latency than 1.5^N limited to 256k cycles, without degrading general performance. The measures of the time to grab a write lock on a 48-thread EPYC show that the number of occurrences of low times was roughly multiplied by 2-3 while the number of occurrences of times above 64us was reduced by similar factors, to even reach 300 at 64us and limiting the maximum time by a factor of 4. The other variants that were experimented with are: m = ((m + (m >> 1)) + 2) & 0x3ffff; // original m = ((m + (m >> 1) + (m >> 3)) + 2) & 0x3ffff; m = ((m + (m >> 1) + (m >> 4)) + 2) & 0x3ffff; m = ((m + (m >> 1) + (m >> 4)) + 2) & 0x1ffff; m = ((m + (m >> 1) + (m >> 4)) + 1) & 0x1ffff; m = ((m + (m >> 2) + (m >> 4)) + 1) & 0x1ffff; // lowest CPU on pl_wr test + good perf m = ((m + (m >> 2)) + 1) & 0x1ffff; // even lower cpu usage, lowest max m = ((m + (m >> 1) + (m >> 2)) + 1) & 0x1ffff; // correct but slightly higher maxes m = ((m + (m >> 1) + (m >> 3)) + 1) & 0x1ffff; // less good than m+m>>2 m = ((m + (m >> 2) + (m >> 3)) + 1) & 0x1ffff; // better but not as good as m+m>>2 m = ((m + (m >> 3) + (m >> 4)) + 1) & 0x1ffff; // less good, lower rates on small coounts. m = ((m + (m >> 2) + (m >> 3) + (m >> 4)) + 1) & 0x1ffff; // less good as well m = ((m & 0x7fff) + (m >> 1) + (m >> 4)) + 2; m = ((m & 0xffff) + (m >> 1) + (m >> 4)) + 2; This is plock commit dddd9ee01c522da33c353e2e4d4fd743d8336ec3. --- include/import/plock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/import/plock.h b/include/import/plock.h index 5c2860561..d0126a7a3 100644 --- a/include/import/plock.h +++ b/include/import/plock.h @@ -107,7 +107,7 @@ static unsigned long __pl_wait_unlock_long(const unsigned long *lock, const unsi * values and still growing. This allows competing threads to * wait different times once the threshold is reached. */ - m = ((m + (m >> 1)) + 2) & 0x3ffff; + m = ((m + (m >> 2)) + 1) & 0x1ffff; } while (1); return ret; @@ -176,7 +176,7 @@ static unsigned int __pl_wait_unlock_int(const unsigned int *lock, const unsigne * values and still growing. This allows competing threads to * wait different times once the threshold is reached. */ - m = ((m + (m >> 1)) + 2) & 0x3ffff; + m = ((m + (m >> 2)) + 1) & 0x1ffff; } while (1); return ret;