MEDIUM: stick-tables: Avoid visiting all shards in process_table_expire

In process_table_expire(), only ever visit one shard at a time. To know which one was last, we use the same variable as the one used for stktable_trash_oldest(). In order to know when to wake up the task again, we know remember what the nearest expire is for each shard, and just go through all of them to find the smallest one.
2025-05-07 16:00:32 +00:00 · 2025-05-07 16:00:32 +00:00 · 5ddd189a0b
commit 5ddd189a0b
parent 48eb925d72
2 changed files with 113 additions and 94 deletions
--- a/include/haproxy/stick_table-t.h
+++ b/include/haproxy/stick_table-t.h
@ -211,6 +211,7 @@ struct stktable {
 	struct {
 		struct eb_root keys;      /* head of sticky session tree */
 		struct eb_root exps;      /* head of sticky session expiration tree */
+		unsigned int min_exp;     /* closest next expiration */
 		__decl_thread(HA_RWLOCK_T sh_lock); /* for the trees above */
 	} shards[CONFIG_HAP_TBL_BUCKETS];

--- a/src/stick_table.c
+++ b/src/stick_table.c
@ -885,17 +885,24 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 	int updt_locked;
 	int expired;
 	int looped;
-	int exp_next;
 	int task_exp;
+	unsigned int cur_shard;
 	int shard;

 	task_exp = TICK_ETERNITY;

-	for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) {
-		updt_locked = 0;
+	cur_shard = t->last_exp_shard;
+
+	do {
+		shard = cur_shard + 1;
+		if (shard == CONFIG_HAP_TBL_BUCKETS)
+			shard = 0;
+	} while (_HA_ATOMIC_CAS(&t->last_exp_shard, &cur_shard, shard) != 0 && __ha_cpu_relax());
+
 	looped = 0;
 	HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
 	eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
+	updt_locked = 0;
 	expired = 0;

 	while (1) {
@ -915,8 +922,7 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int

 		if (likely(tick_is_lt(now_ms, eb->key))) {
 			/* timer not expired yet, revisit it later */
-				exp_next = eb->key;
-				goto out_unlock;
+			break;
 		}

 		/* timer looks expired, detach it from the queue */
@ -924,8 +930,11 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 		eb = eb32_next(eb);

 		/* don't delete an entry which is currently referenced */
-			if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
+		if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) {
+			if (tick_isset(ts->expire))
+				task_exp = tick_first(task_exp, ts->expire);
 			continue;
+		}

 		eb32_delete(&ts->exp);

@ -933,6 +942,7 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 			if (!tick_isset(ts->expire))
 				continue;

+			task_exp = tick_first(task_exp, ts->expire);
 			ts->exp.key = ts->expire;
 			eb32_insert(&t->shards[shard].exps, &ts->exp);

@ -956,8 +966,8 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 			expired++;
 			if (expired == STKTABLE_MAX_UPDATES_AT_ONCE) {
 				need_resched = 1;
-					exp_next = TICK_ETERNITY;
-					goto out_unlock;
+				task_exp = tick_first(task_exp, ts->expire);
+				break;
 			}
 		}
 		/* if the entry is in the update list, we must be extremely careful
@ -973,8 +983,10 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 		/* now we're locked, new peers can't grab it anymore,
 		 * existing ones already have the ref_cnt.
 		 */
-			if (HA_ATOMIC_LOAD(&ts->ref_cnt))
+		if (HA_ATOMIC_LOAD(&ts->ref_cnt)) {
+			task_exp = tick_first(task_exp, ts->expire);
 			continue;
+		}

 		/* session expired, trash it */
 		ebmb_delete(&ts->key);
@ -983,20 +995,26 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 		__stksess_free(t, ts);
 	}

-		/* We have found no task to expire in any tree */
-		exp_next = TICK_ETERNITY;
-
-	out_unlock:
 	if (updt_locked)
 		HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);

-		task_exp = tick_first(task_exp, exp_next);
+	t->shards[shard].min_exp = task_exp;
 	HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
-	}

 	if (need_resched) {
 		task_wakeup(task, TASK_WOKEN_OTHER);
 	} else {
+		int i;
+
+		task_exp = TICK_ETERNITY;
+		/*
+		 * Lookup for the next wakeup date for each shard.
+		 * It is okay to access this without a lock, this is only ever modified
+		 * by the task currently running.
+		 */
+		for (i = 0; i < CONFIG_HAP_TBL_BUCKETS; i++) {
+			task_exp = tick_first(task_exp, t->shards[i].min_exp);
+		}
 		/* Reset the task's expiration. We do this under the lock so as not
 		 * to ruin a call to task_queue() in stktable_requeue_exp() if we
 		 * were to update with TICK_ETERNITY.