cleanup
This commit is contained in:
parent
28242134c6
commit
0dbeb960d4
@ -90,27 +90,7 @@ struct FVector
|
|||||||
static size_t data_to_value_size(size_t data_size)
|
static size_t data_to_value_size(size_t data_size)
|
||||||
{ return (data_size - data_header)*2; }
|
{ return (data_size - data_header)*2; }
|
||||||
|
|
||||||
static const FVector *create(metric_type metric, void *mem, const void *src, size_t src_len)
|
static const FVector *create(metric_type metric, void *mem, const void *src, size_t src_len);
|
||||||
{
|
|
||||||
float scale=0, *v= (float *)src;
|
|
||||||
size_t vec_len= src_len / sizeof(float);
|
|
||||||
for (size_t i= 0; i < vec_len; i++)
|
|
||||||
if (std::abs(scale) < std::abs(get_float(v + i)))
|
|
||||||
scale= get_float(v + i);
|
|
||||||
|
|
||||||
FVector *vec= align_ptr(mem);
|
|
||||||
vec->scale= scale ? scale/32767 : 1;
|
|
||||||
for (size_t i= 0; i < vec_len; i++)
|
|
||||||
vec->dims[i] = static_cast<int16_t>(std::round(get_float(v + i) / vec->scale));
|
|
||||||
vec->postprocess(vec_len);
|
|
||||||
if (metric == COSINE)
|
|
||||||
{
|
|
||||||
if (vec->abs2 > 0.0f)
|
|
||||||
vec->scale/= std::sqrt(2*vec->abs2);
|
|
||||||
vec->abs2= 0.5f;
|
|
||||||
}
|
|
||||||
return vec;
|
|
||||||
}
|
|
||||||
|
|
||||||
void postprocess(size_t vec_len)
|
void postprocess(size_t vec_len)
|
||||||
{
|
{
|
||||||
@ -442,8 +422,7 @@ public:
|
|||||||
metric_type metric;
|
metric_type metric;
|
||||||
|
|
||||||
MHNSW_Share(TABLE *t)
|
MHNSW_Share(TABLE *t)
|
||||||
: tref_len(t->file->ref_length),
|
: tref_len(t->file->ref_length), gref_len(t->hlindex->file->ref_length),
|
||||||
gref_len(t->hlindex->file->ref_length),
|
|
||||||
M(static_cast<uint>(t->s->key_info[t->s->keys].option_struct->M)),
|
M(static_cast<uint>(t->s->key_info[t->s->keys].option_struct->M)),
|
||||||
metric(t->s->key_info[t->s->keys].option_struct->metric)
|
metric(t->s->key_info[t->s->keys].option_struct->metric)
|
||||||
{
|
{
|
||||||
@ -596,7 +575,7 @@ public:
|
|||||||
{
|
{
|
||||||
node_cache.clear();
|
node_cache.clear();
|
||||||
free_root(&root, MYF(0));
|
free_root(&root, MYF(0));
|
||||||
start= 0;
|
start= nullptr;
|
||||||
list_of_nodes_is_lost= true;
|
list_of_nodes_is_lost= true;
|
||||||
}
|
}
|
||||||
void release(bool, TABLE_SHARE *) override
|
void release(bool, TABLE_SHARE *) override
|
||||||
@ -792,6 +771,28 @@ int MHNSW_Share::acquire(MHNSW_Share **ctx, TABLE *table, bool for_update)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const FVector *FVector::create(metric_type metric, void *mem, const void *src, size_t src_len)
|
||||||
|
{
|
||||||
|
float scale=0, *v= (float *)src;
|
||||||
|
size_t vec_len= src_len / sizeof(float);
|
||||||
|
for (size_t i= 0; i < vec_len; i++)
|
||||||
|
if (std::abs(scale) < std::abs(get_float(v + i)))
|
||||||
|
scale= get_float(v + i);
|
||||||
|
|
||||||
|
FVector *vec= align_ptr(mem);
|
||||||
|
vec->scale= scale ? scale/32767 : 1;
|
||||||
|
for (size_t i= 0; i < vec_len; i++)
|
||||||
|
vec->dims[i] = static_cast<int16_t>(std::round(get_float(v + i) / vec->scale));
|
||||||
|
vec->postprocess(vec_len);
|
||||||
|
if (metric == COSINE)
|
||||||
|
{
|
||||||
|
if (vec->abs2 > 0.0f)
|
||||||
|
vec->scale/= std::sqrt(2*vec->abs2);
|
||||||
|
vec->abs2= 0.5f;
|
||||||
|
}
|
||||||
|
return vec;
|
||||||
|
}
|
||||||
|
|
||||||
/* copy the vector, preprocessed as needed */
|
/* copy the vector, preprocessed as needed */
|
||||||
const FVector *FVectorNode::make_vec(const void *v)
|
const FVector *FVectorNode::make_vec(const void *v)
|
||||||
{
|
{
|
||||||
@ -981,7 +982,7 @@ class VisitedSet
|
|||||||
the Neighborhood candidates, which might be already at its max size.
|
the Neighborhood candidates, which might be already at its max size.
|
||||||
*/
|
*/
|
||||||
static int select_neighbors(MHNSW_Share *ctx, TABLE *graph, size_t layer,
|
static int select_neighbors(MHNSW_Share *ctx, TABLE *graph, size_t layer,
|
||||||
FVectorNode &target, const Neighborhood &candidates,
|
FVectorNode *target, const Neighborhood &candidates,
|
||||||
FVectorNode *extra_candidate,
|
FVectorNode *extra_candidate,
|
||||||
size_t max_neighbor_connections)
|
size_t max_neighbor_connections)
|
||||||
{
|
{
|
||||||
@ -993,17 +994,17 @@ static int select_neighbors(MHNSW_Share *ctx, TABLE *graph, size_t layer,
|
|||||||
MEM_ROOT * const root= graph->in_use->mem_root;
|
MEM_ROOT * const root= graph->in_use->mem_root;
|
||||||
auto discarded= (Visited**)my_safe_alloca(sizeof(Visited**)*max_neighbor_connections);
|
auto discarded= (Visited**)my_safe_alloca(sizeof(Visited**)*max_neighbor_connections);
|
||||||
size_t discarded_num= 0;
|
size_t discarded_num= 0;
|
||||||
Neighborhood &neighbors= target.neighbors[layer];
|
Neighborhood &neighbors= target->neighbors[layer];
|
||||||
|
|
||||||
for (size_t i=0; i < candidates.num; i++)
|
for (size_t i=0; i < candidates.num; i++)
|
||||||
{
|
{
|
||||||
FVectorNode *node= candidates.links[i];
|
FVectorNode *node= candidates.links[i];
|
||||||
if (int err= node->load(graph))
|
if (int err= node->load(graph))
|
||||||
return err;
|
return err;
|
||||||
pq.push(new (root) Visited(node, node->distance_to(target.vec)));
|
pq.push(new (root) Visited(node, node->distance_to(target->vec)));
|
||||||
}
|
}
|
||||||
if (extra_candidate)
|
if (extra_candidate)
|
||||||
pq.push(new (root) Visited(extra_candidate, extra_candidate->distance_to(target.vec)));
|
pq.push(new (root) Visited(extra_candidate, extra_candidate->distance_to(target->vec)));
|
||||||
|
|
||||||
DBUG_ASSERT(pq.elements());
|
DBUG_ASSERT(pq.elements());
|
||||||
neighbors.num= 0;
|
neighbors.num= 0;
|
||||||
@ -1018,13 +1019,13 @@ static int select_neighbors(MHNSW_Share *ctx, TABLE *graph, size_t layer,
|
|||||||
if ((discard= node->distance_to(neighbors.links[i]->vec) <= target_dista))
|
if ((discard= node->distance_to(neighbors.links[i]->vec) <= target_dista))
|
||||||
break;
|
break;
|
||||||
if (!discard)
|
if (!discard)
|
||||||
target.push_neighbor(layer, node);
|
target->push_neighbor(layer, node);
|
||||||
else if (discarded_num + neighbors.num < max_neighbor_connections)
|
else if (discarded_num + neighbors.num < max_neighbor_connections)
|
||||||
discarded[discarded_num++]= vec;
|
discarded[discarded_num++]= vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i=0; i < discarded_num && neighbors.num < max_neighbor_connections; i++)
|
for (size_t i=0; i < discarded_num && neighbors.num < max_neighbor_connections; i++)
|
||||||
target.push_neighbor(layer, discarded[i]->node);
|
target->push_neighbor(layer, discarded[i]->node);
|
||||||
|
|
||||||
my_safe_afree(discarded, sizeof(Visited**)*max_neighbor_connections);
|
my_safe_afree(discarded, sizeof(Visited**)*max_neighbor_connections);
|
||||||
return 0;
|
return 0;
|
||||||
@ -1096,7 +1097,7 @@ static int update_second_degree_neighbors(MHNSW_Share *ctx, TABLE *graph,
|
|||||||
if (neighneighbors.num < max_neighbors)
|
if (neighneighbors.num < max_neighbors)
|
||||||
neigh->push_neighbor(layer, node);
|
neigh->push_neighbor(layer, node);
|
||||||
else
|
else
|
||||||
if (int err= select_neighbors(ctx, graph, layer, *neigh, neighneighbors,
|
if (int err= select_neighbors(ctx, graph, layer, neigh, neighneighbors,
|
||||||
node, max_neighbors))
|
node, max_neighbors))
|
||||||
return err;
|
return err;
|
||||||
if (int err= neigh->save(graph))
|
if (int err= neigh->save(graph))
|
||||||
@ -1129,7 +1130,7 @@ static int search_layer(MHNSW_Share *ctx, TABLE *graph, const FVector *target,
|
|||||||
Queue<Visited> candidates, best;
|
Queue<Visited> candidates, best;
|
||||||
bool skip_deleted;
|
bool skip_deleted;
|
||||||
uint ef= result_size;
|
uint ef= result_size;
|
||||||
float generosity= 1.1f + ctx->M/500.0f;
|
const float generosity= 1.1f + ctx->M/500.0f;
|
||||||
|
|
||||||
if (construction)
|
if (construction)
|
||||||
{
|
{
|
||||||
@ -1309,7 +1310,7 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo)
|
|||||||
max_neighbors, cur_layer, &candidates, true))
|
max_neighbors, cur_layer, &candidates, true))
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
if (int err= select_neighbors(ctx, graph, cur_layer, *target, candidates,
|
if (int err= select_neighbors(ctx, graph, cur_layer, target, candidates,
|
||||||
0, max_neighbors))
|
0, max_neighbors))
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user