Compare commits
2 Commits
master
...
20250507-s
Author | SHA1 | Date | |
---|---|---|---|
|
5ddd189a0b | ||
|
48eb925d72 |
@ -211,12 +211,14 @@ struct stktable {
|
|||||||
struct {
|
struct {
|
||||||
struct eb_root keys; /* head of sticky session tree */
|
struct eb_root keys; /* head of sticky session tree */
|
||||||
struct eb_root exps; /* head of sticky session expiration tree */
|
struct eb_root exps; /* head of sticky session expiration tree */
|
||||||
|
unsigned int min_exp; /* closest next expiration */
|
||||||
__decl_thread(HA_RWLOCK_T sh_lock); /* for the trees above */
|
__decl_thread(HA_RWLOCK_T sh_lock); /* for the trees above */
|
||||||
} shards[CONFIG_HAP_TBL_BUCKETS];
|
} shards[CONFIG_HAP_TBL_BUCKETS];
|
||||||
|
|
||||||
unsigned int refcnt; /* number of local peer over all peers sections
|
unsigned int refcnt; /* number of local peer over all peers sections
|
||||||
attached to this table */
|
attached to this table */
|
||||||
unsigned int current; /* number of sticky sessions currently in table */
|
unsigned int current; /* number of sticky sessions currently in table */
|
||||||
|
unsigned int last_exp_shard; /* last shard we visited when expiring entries */
|
||||||
__decl_thread(HA_RWLOCK_T lock); /* lock related to the table */
|
__decl_thread(HA_RWLOCK_T lock); /* lock related to the table */
|
||||||
|
|
||||||
THREAD_ALIGN(64);
|
THREAD_ALIGN(64);
|
||||||
|
@ -296,119 +296,109 @@ int stktable_trash_oldest(struct stktable *t, int to_batch)
|
|||||||
struct stksess *ts;
|
struct stksess *ts;
|
||||||
struct eb32_node *eb;
|
struct eb32_node *eb;
|
||||||
int max_search; // no more than 50% misses
|
int max_search; // no more than 50% misses
|
||||||
int max_per_shard;
|
|
||||||
int done_per_shard;
|
|
||||||
int batched = 0;
|
int batched = 0;
|
||||||
int updt_locked;
|
int updt_locked = 0;
|
||||||
int looped;
|
int looped = 0;
|
||||||
|
unsigned int cur_shard;
|
||||||
int shard;
|
int shard;
|
||||||
|
|
||||||
shard = 0;
|
cur_shard = t->last_exp_shard;
|
||||||
|
|
||||||
|
do {
|
||||||
|
shard = cur_shard + 1;
|
||||||
|
if (shard == CONFIG_HAP_TBL_BUCKETS)
|
||||||
|
shard = 0;
|
||||||
|
} while (_HA_ATOMIC_CAS(&t->last_exp_shard, &cur_shard, shard) != 0 && __ha_cpu_relax());
|
||||||
|
|
||||||
if (to_batch > STKTABLE_MAX_UPDATES_AT_ONCE)
|
if (to_batch > STKTABLE_MAX_UPDATES_AT_ONCE)
|
||||||
to_batch = STKTABLE_MAX_UPDATES_AT_ONCE;
|
to_batch = STKTABLE_MAX_UPDATES_AT_ONCE;
|
||||||
|
|
||||||
max_search = to_batch * 2; // no more than 50% misses
|
max_search = to_batch * 2; // no more than 50% misses
|
||||||
max_per_shard = (to_batch + CONFIG_HAP_TBL_BUCKETS - 1) / CONFIG_HAP_TBL_BUCKETS;
|
|
||||||
|
|
||||||
while (batched < to_batch) {
|
while (batched < to_batch) {
|
||||||
done_per_shard = 0;
|
|
||||||
looped = 0;
|
|
||||||
updt_locked = 0;
|
|
||||||
|
|
||||||
HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
||||||
|
|
||||||
eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
|
eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
|
||||||
while (batched < to_batch && done_per_shard < max_per_shard) {
|
if (unlikely(!eb)) {
|
||||||
if (unlikely(!eb)) {
|
/* we might have reached the end of the tree, typically because
|
||||||
/* we might have reached the end of the tree, typically because
|
* <now_ms> is in the first half and we're first scanning the last
|
||||||
* <now_ms> is in the first half and we're first scanning the last
|
* half. Let's loop back to the beginning of the tree now if we
|
||||||
* half. Let's loop back to the beginning of the tree now if we
|
* have not yet visited it.
|
||||||
* have not yet visited it.
|
*/
|
||||||
*/
|
if (looped)
|
||||||
if (looped)
|
break;
|
||||||
break;
|
looped = 1;
|
||||||
looped = 1;
|
eb = eb32_first(&t->shards[shard].exps);
|
||||||
eb = eb32_first(&t->shards[shard].exps);
|
if (likely(!eb))
|
||||||
if (likely(!eb))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (--max_search < 0)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* timer looks expired, detach it from the queue */
|
|
||||||
ts = eb32_entry(eb, struct stksess, exp);
|
|
||||||
eb = eb32_next(eb);
|
|
||||||
|
|
||||||
/* don't delete an entry which is currently referenced */
|
|
||||||
if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
eb32_delete(&ts->exp);
|
|
||||||
|
|
||||||
if (ts->expire != ts->exp.key) {
|
|
||||||
if (!tick_isset(ts->expire))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ts->exp.key = ts->expire;
|
|
||||||
eb32_insert(&t->shards[shard].exps, &ts->exp);
|
|
||||||
|
|
||||||
/* the update might have jumped beyond the next element,
|
|
||||||
* possibly causing a wrapping. We need to check whether
|
|
||||||
* the next element should be used instead. If the next
|
|
||||||
* element doesn't exist it means we're on the right
|
|
||||||
* side and have to check the first one then. If it
|
|
||||||
* exists and is closer, we must use it, otherwise we
|
|
||||||
* use the current one.
|
|
||||||
*/
|
|
||||||
if (!eb)
|
|
||||||
eb = eb32_first(&t->shards[shard].exps);
|
|
||||||
|
|
||||||
if (!eb || tick_is_lt(ts->exp.key, eb->key))
|
|
||||||
eb = &ts->exp;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if the entry is in the update list, we must be extremely careful
|
|
||||||
* because peers can see it at any moment and start to use it. Peers
|
|
||||||
* will take the table's updt_lock for reading when doing that, and
|
|
||||||
* with that lock held, will grab a ref_cnt before releasing the
|
|
||||||
* lock. So we must take this lock as well and check the ref_cnt.
|
|
||||||
*/
|
|
||||||
if (!updt_locked) {
|
|
||||||
updt_locked = 1;
|
|
||||||
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
|
||||||
}
|
|
||||||
/* now we're locked, new peers can't grab it anymore,
|
|
||||||
* existing ones already have the ref_cnt.
|
|
||||||
*/
|
|
||||||
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* session expired, trash it */
|
|
||||||
ebmb_delete(&ts->key);
|
|
||||||
MT_LIST_DELETE(&ts->pend_updts);
|
|
||||||
eb32_delete(&ts->upd);
|
|
||||||
__stksess_free(t, ts);
|
|
||||||
batched++;
|
|
||||||
done_per_shard++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (updt_locked)
|
if (--max_search < 0)
|
||||||
HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
|
||||||
|
|
||||||
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
|
||||||
|
|
||||||
if (max_search <= 0)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
shard = (shard + 1) % CONFIG_HAP_TBL_BUCKETS;
|
/* timer looks expired, detach it from the queue */
|
||||||
if (!shard)
|
ts = eb32_entry(eb, struct stksess, exp);
|
||||||
break;
|
eb = eb32_next(eb);
|
||||||
|
|
||||||
|
/* don't delete an entry which is currently referenced */
|
||||||
|
if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
eb32_delete(&ts->exp);
|
||||||
|
|
||||||
|
if (ts->expire != ts->exp.key) {
|
||||||
|
if (!tick_isset(ts->expire))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ts->exp.key = ts->expire;
|
||||||
|
eb32_insert(&t->shards[shard].exps, &ts->exp);
|
||||||
|
|
||||||
|
/* the update might have jumped beyond the next element,
|
||||||
|
* possibly causing a wrapping. We need to check whether
|
||||||
|
* the next element should be used instead. If the next
|
||||||
|
* element doesn't exist it means we're on the right
|
||||||
|
* side and have to check the first one then. If it
|
||||||
|
* exists and is closer, we must use it, otherwise we
|
||||||
|
* use the current one.
|
||||||
|
*/
|
||||||
|
if (!eb)
|
||||||
|
eb = eb32_first(&t->shards[shard].exps);
|
||||||
|
|
||||||
|
if (!eb || tick_is_lt(ts->exp.key, eb->key))
|
||||||
|
eb = &ts->exp;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if the entry is in the update list, we must be extremely careful
|
||||||
|
* because peers can see it at any moment and start to use it. Peers
|
||||||
|
* will take the table's updt_lock for reading when doing that, and
|
||||||
|
* with that lock held, will grab a ref_cnt before releasing the
|
||||||
|
* lock. So we must take this lock as well and check the ref_cnt.
|
||||||
|
*/
|
||||||
|
if (!updt_locked) {
|
||||||
|
updt_locked = 1;
|
||||||
|
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
||||||
|
}
|
||||||
|
/* now we're locked, new peers can't grab it anymore,
|
||||||
|
* existing ones already have the ref_cnt.
|
||||||
|
*/
|
||||||
|
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* session expired, trash it */
|
||||||
|
ebmb_delete(&ts->key);
|
||||||
|
MT_LIST_DELETE(&ts->pend_updts);
|
||||||
|
eb32_delete(&ts->upd);
|
||||||
|
__stksess_free(t, ts);
|
||||||
|
batched++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (updt_locked)
|
||||||
|
HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
||||||
|
|
||||||
|
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
||||||
|
|
||||||
return batched;
|
return batched;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -895,118 +885,136 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
|
|||||||
int updt_locked;
|
int updt_locked;
|
||||||
int expired;
|
int expired;
|
||||||
int looped;
|
int looped;
|
||||||
int exp_next;
|
|
||||||
int task_exp;
|
int task_exp;
|
||||||
|
unsigned int cur_shard;
|
||||||
int shard;
|
int shard;
|
||||||
|
|
||||||
task_exp = TICK_ETERNITY;
|
task_exp = TICK_ETERNITY;
|
||||||
|
|
||||||
for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) {
|
cur_shard = t->last_exp_shard;
|
||||||
updt_locked = 0;
|
|
||||||
looped = 0;
|
|
||||||
HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
|
||||||
eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
|
|
||||||
expired = 0;
|
|
||||||
|
|
||||||
while (1) {
|
do {
|
||||||
if (unlikely(!eb)) {
|
shard = cur_shard + 1;
|
||||||
/* we might have reached the end of the tree, typically because
|
if (shard == CONFIG_HAP_TBL_BUCKETS)
|
||||||
* <now_ms> is in the first half and we're first scanning the last
|
shard = 0;
|
||||||
* half. Let's loop back to the beginning of the tree now if we
|
} while (_HA_ATOMIC_CAS(&t->last_exp_shard, &cur_shard, shard) != 0 && __ha_cpu_relax());
|
||||||
* have not yet visited it.
|
|
||||||
*/
|
|
||||||
if (looped)
|
|
||||||
break;
|
|
||||||
looped = 1;
|
|
||||||
eb = eb32_first(&t->shards[shard].exps);
|
|
||||||
if (likely(!eb))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (likely(tick_is_lt(now_ms, eb->key))) {
|
looped = 0;
|
||||||
/* timer not expired yet, revisit it later */
|
HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
||||||
exp_next = eb->key;
|
eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
|
||||||
goto out_unlock;
|
updt_locked = 0;
|
||||||
}
|
expired = 0;
|
||||||
|
|
||||||
/* timer looks expired, detach it from the queue */
|
while (1) {
|
||||||
ts = eb32_entry(eb, struct stksess, exp);
|
if (unlikely(!eb)) {
|
||||||
eb = eb32_next(eb);
|
/* we might have reached the end of the tree, typically because
|
||||||
|
* <now_ms> is in the first half and we're first scanning the last
|
||||||
/* don't delete an entry which is currently referenced */
|
* half. Let's loop back to the beginning of the tree now if we
|
||||||
if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
|
* have not yet visited it.
|
||||||
continue;
|
|
||||||
|
|
||||||
eb32_delete(&ts->exp);
|
|
||||||
|
|
||||||
if (!tick_is_expired(ts->expire, now_ms)) {
|
|
||||||
if (!tick_isset(ts->expire))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ts->exp.key = ts->expire;
|
|
||||||
eb32_insert(&t->shards[shard].exps, &ts->exp);
|
|
||||||
|
|
||||||
/* the update might have jumped beyond the next element,
|
|
||||||
* possibly causing a wrapping. We need to check whether
|
|
||||||
* the next element should be used instead. If the next
|
|
||||||
* element doesn't exist it means we're on the right
|
|
||||||
* side and have to check the first one then. If it
|
|
||||||
* exists and is closer, we must use it, otherwise we
|
|
||||||
* use the current one.
|
|
||||||
*/
|
|
||||||
if (!eb)
|
|
||||||
eb = eb32_first(&t->shards[shard].exps);
|
|
||||||
|
|
||||||
if (!eb || tick_is_lt(ts->exp.key, eb->key))
|
|
||||||
eb = &ts->exp;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (updt_locked == 1) {
|
|
||||||
expired++;
|
|
||||||
if (expired == STKTABLE_MAX_UPDATES_AT_ONCE) {
|
|
||||||
need_resched = 1;
|
|
||||||
exp_next = TICK_ETERNITY;
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* if the entry is in the update list, we must be extremely careful
|
|
||||||
* because peers can see it at any moment and start to use it. Peers
|
|
||||||
* will take the table's updt_lock for reading when doing that, and
|
|
||||||
* with that lock held, will grab a ref_cnt before releasing the
|
|
||||||
* lock. So we must take this lock as well and check the ref_cnt.
|
|
||||||
*/
|
*/
|
||||||
if (!updt_locked) {
|
if (looped)
|
||||||
updt_locked = 1;
|
break;
|
||||||
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
looped = 1;
|
||||||
}
|
eb = eb32_first(&t->shards[shard].exps);
|
||||||
/* now we're locked, new peers can't grab it anymore,
|
if (likely(!eb))
|
||||||
* existing ones already have the ref_cnt.
|
break;
|
||||||
*/
|
|
||||||
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* session expired, trash it */
|
|
||||||
ebmb_delete(&ts->key);
|
|
||||||
MT_LIST_DELETE(&ts->pend_updts);
|
|
||||||
eb32_delete(&ts->upd);
|
|
||||||
__stksess_free(t, ts);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We have found no task to expire in any tree */
|
if (likely(tick_is_lt(now_ms, eb->key))) {
|
||||||
exp_next = TICK_ETERNITY;
|
/* timer not expired yet, revisit it later */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
out_unlock:
|
/* timer looks expired, detach it from the queue */
|
||||||
if (updt_locked)
|
ts = eb32_entry(eb, struct stksess, exp);
|
||||||
HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
eb = eb32_next(eb);
|
||||||
|
|
||||||
task_exp = tick_first(task_exp, exp_next);
|
/* don't delete an entry which is currently referenced */
|
||||||
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) {
|
||||||
|
if (tick_isset(ts->expire))
|
||||||
|
task_exp = tick_first(task_exp, ts->expire);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
eb32_delete(&ts->exp);
|
||||||
|
|
||||||
|
if (!tick_is_expired(ts->expire, now_ms)) {
|
||||||
|
if (!tick_isset(ts->expire))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
task_exp = tick_first(task_exp, ts->expire);
|
||||||
|
ts->exp.key = ts->expire;
|
||||||
|
eb32_insert(&t->shards[shard].exps, &ts->exp);
|
||||||
|
|
||||||
|
/* the update might have jumped beyond the next element,
|
||||||
|
* possibly causing a wrapping. We need to check whether
|
||||||
|
* the next element should be used instead. If the next
|
||||||
|
* element doesn't exist it means we're on the right
|
||||||
|
* side and have to check the first one then. If it
|
||||||
|
* exists and is closer, we must use it, otherwise we
|
||||||
|
* use the current one.
|
||||||
|
*/
|
||||||
|
if (!eb)
|
||||||
|
eb = eb32_first(&t->shards[shard].exps);
|
||||||
|
|
||||||
|
if (!eb || tick_is_lt(ts->exp.key, eb->key))
|
||||||
|
eb = &ts->exp;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (updt_locked == 1) {
|
||||||
|
expired++;
|
||||||
|
if (expired == STKTABLE_MAX_UPDATES_AT_ONCE) {
|
||||||
|
need_resched = 1;
|
||||||
|
task_exp = tick_first(task_exp, ts->expire);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if the entry is in the update list, we must be extremely careful
|
||||||
|
* because peers can see it at any moment and start to use it. Peers
|
||||||
|
* will take the table's updt_lock for reading when doing that, and
|
||||||
|
* with that lock held, will grab a ref_cnt before releasing the
|
||||||
|
* lock. So we must take this lock as well and check the ref_cnt.
|
||||||
|
*/
|
||||||
|
if (!updt_locked) {
|
||||||
|
updt_locked = 1;
|
||||||
|
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
||||||
|
}
|
||||||
|
/* now we're locked, new peers can't grab it anymore,
|
||||||
|
* existing ones already have the ref_cnt.
|
||||||
|
*/
|
||||||
|
if (HA_ATOMIC_LOAD(&ts->ref_cnt)) {
|
||||||
|
task_exp = tick_first(task_exp, ts->expire);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* session expired, trash it */
|
||||||
|
ebmb_delete(&ts->key);
|
||||||
|
MT_LIST_DELETE(&ts->pend_updts);
|
||||||
|
eb32_delete(&ts->upd);
|
||||||
|
__stksess_free(t, ts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (updt_locked)
|
||||||
|
HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
|
||||||
|
|
||||||
|
t->shards[shard].min_exp = task_exp;
|
||||||
|
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
|
||||||
|
|
||||||
if (need_resched) {
|
if (need_resched) {
|
||||||
task_wakeup(task, TASK_WOKEN_OTHER);
|
task_wakeup(task, TASK_WOKEN_OTHER);
|
||||||
} else {
|
} else {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
task_exp = TICK_ETERNITY;
|
||||||
|
/*
|
||||||
|
* Lookup for the next wakeup date for each shard.
|
||||||
|
* It is okay to access this without a lock, this is only ever modified
|
||||||
|
* by the task currently running.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < CONFIG_HAP_TBL_BUCKETS; i++) {
|
||||||
|
task_exp = tick_first(task_exp, t->shards[i].min_exp);
|
||||||
|
}
|
||||||
/* Reset the task's expiration. We do this under the lock so as not
|
/* Reset the task's expiration. We do this under the lock so as not
|
||||||
* to ruin a call to task_queue() in stktable_requeue_exp() if we
|
* to ruin a call to task_queue() in stktable_requeue_exp() if we
|
||||||
* were to update with TICK_ETERNITY.
|
* were to update with TICK_ETERNITY.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user