mhnsw: make the search less greedy
introduced a generosity factor that makes the search less greedy. it dramatically improves the recall by making the search a bit slower (for the same recall one can use half the M and smaller ef). had to add Queue::safe_push() method that removes one of the furthest elements (not necessarily the furthest) in the queue to keep it from overflowing.
This commit is contained in:
parent
885eb19823
commit
26e599cd32
@ -44,6 +44,11 @@ public:
|
|||||||
Element *top() const { return (Element*)queue_top(&m_queue); }
|
Element *top() const { return (Element*)queue_top(&m_queue); }
|
||||||
|
|
||||||
void push(const Element *element) { queue_insert(&m_queue, (uchar*)element); }
|
void push(const Element *element) { queue_insert(&m_queue, (uchar*)element); }
|
||||||
|
void safe_push(const Element *element)
|
||||||
|
{
|
||||||
|
if (is_full()) m_queue.elements--; // remove one of the furthest elements
|
||||||
|
queue_insert(&m_queue, (uchar*)element);
|
||||||
|
}
|
||||||
Element *pop() { return (Element *)queue_remove_top(&m_queue); }
|
Element *pop() { return (Element *)queue_remove_top(&m_queue); }
|
||||||
void clear() { queue_remove_all(&m_queue); }
|
void clear() { queue_remove_all(&m_queue); }
|
||||||
void propagate_top() { queue_replace_top(&m_queue); }
|
void propagate_top() { queue_replace_top(&m_queue); }
|
||||||
|
@ -27,6 +27,7 @@ ulonglong mhnsw_cache_size;
|
|||||||
|
|
||||||
// Algorithm parameters
|
// Algorithm parameters
|
||||||
static constexpr float alpha = 1.1f;
|
static constexpr float alpha = 1.1f;
|
||||||
|
static constexpr float generosity = 1.1f;
|
||||||
static constexpr uint ef_construction= 10;
|
static constexpr uint ef_construction= 10;
|
||||||
|
|
||||||
enum Graph_table_fields {
|
enum Graph_table_fields {
|
||||||
@ -928,7 +929,8 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
|
|||||||
best.push(v);
|
best.push(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
float furthest_best= FLT_MAX;
|
float furthest_best= best.is_empty() ? FLT_MAX
|
||||||
|
: best.top()->distance_to_target * generosity;
|
||||||
while (candidates.elements())
|
while (candidates.elements())
|
||||||
{
|
{
|
||||||
const Visited &cur= *candidates.pop();
|
const Visited &cur= *candidates.pop();
|
||||||
@ -958,15 +960,18 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
|
|||||||
if (skip_deleted && v->node->deleted)
|
if (skip_deleted && v->node->deleted)
|
||||||
continue;
|
continue;
|
||||||
best.push(v);
|
best.push(v);
|
||||||
furthest_best= best.top()->distance_to_target;
|
furthest_best= best.top()->distance_to_target * generosity;
|
||||||
}
|
}
|
||||||
else if (v->distance_to_target < furthest_best)
|
else if (v->distance_to_target < furthest_best)
|
||||||
{
|
{
|
||||||
candidates.push(v);
|
candidates.safe_push(v);
|
||||||
if (skip_deleted && v->node->deleted)
|
if (skip_deleted && v->node->deleted)
|
||||||
continue;
|
continue;
|
||||||
|
if (v->distance_to_target < best.top()->distance_to_target)
|
||||||
|
{
|
||||||
best.replace_top(v);
|
best.replace_top(v);
|
||||||
furthest_best= best.top()->distance_to_target;
|
furthest_best= best.top()->distance_to_target * generosity;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user