MDEV-35897 vector index search allocates too much memory for large ef_search
never estimate that a graph search will visit more nodes than there are in the graph. In fact, let's reduce the graph size by 30%, it'll increase the false positive rate of a bloom filter by 2% when visiting the whole graph, it doesn't affect recall noticeably. we need to read the shared graph size under a lock. let's store it in the thread-local unused TABLE::used_stat_records member.
This commit is contained in:
parent
395db6f1d5
commit
82867e07e3
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2024, MariaDB plc
|
Copyright (c) 2024, 2025, MariaDB plc
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -753,6 +753,8 @@ MHNSW_Share *MHNSW_Share::get_from_share(TABLE_SHARE *share, TABLE *table)
|
|||||||
}
|
}
|
||||||
if (ctx)
|
if (ctx)
|
||||||
ctx->refcnt++;
|
ctx->refcnt++;
|
||||||
|
if (table) // hijack TABLE::used_stat_records
|
||||||
|
table->hlindex->used_stat_records= ctx->node_cache.size();
|
||||||
share->unlock_share();
|
share->unlock_share();
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
@ -1144,8 +1146,9 @@ static int search_layer(MHNSW_Share *ctx, TABLE *graph, const FVector *target,
|
|||||||
|
|
||||||
// WARNING! heuristic here
|
// WARNING! heuristic here
|
||||||
const double est_heuristic= 8 * std::sqrt(ctx->max_neighbors(layer));
|
const double est_heuristic= 8 * std::sqrt(ctx->max_neighbors(layer));
|
||||||
const uint est_size= static_cast<uint>(est_heuristic * std::pow(ef, ctx->ef_power));
|
double est_size= est_heuristic * std::pow(ef, ctx->ef_power);
|
||||||
VisitedSet visited(root, target, est_size);
|
set_if_smaller(est_size, graph->used_stat_records/1.3);
|
||||||
|
VisitedSet visited(root, target, static_cast<uint>(est_size));
|
||||||
|
|
||||||
candidates.init(max_ef, false, Visited::cmp);
|
candidates.init(max_ef, false, Visited::cmp);
|
||||||
best.init(ef, true, Visited::cmp);
|
best.init(ef, true, Visited::cmp);
|
||||||
@ -1213,9 +1216,9 @@ static int search_layer(MHNSW_Share *ctx, TABLE *graph, const FVector *target,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
set_if_bigger(ctx->diameter, max_distance); // not atomic, but it's ok
|
set_if_bigger(ctx->diameter, max_distance); // not atomic, but it's ok
|
||||||
if (ef > 1 && visited.count*2 > est_size)
|
if (ef > 1 && visited.count > est_size)
|
||||||
{
|
{
|
||||||
double ef_power= std::log(visited.count*2/est_heuristic) / std::log(ef);
|
double ef_power= std::log(visited.count/est_heuristic) / std::log(ef);
|
||||||
set_if_bigger(ctx->ef_power, ef_power); // not atomic, but it's ok
|
set_if_bigger(ctx->ef_power, ef_power); // not atomic, but it's ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user