diff --git a/sql/bloom_filters.h b/sql/bloom_filters.h index 0dafaf1b6f3..728e5d9bf2f 100644 --- a/sql/bloom_filters.h +++ b/sql/bloom_filters.h @@ -53,6 +53,10 @@ SOFTWARE. #define NEON_IMPLEMENTATION #endif #endif +#if defined __powerpc64__ && defined __VSX__ +#include +#define POWER_IMPLEMENTATION +#endif template struct PatternedSimdBloomFilter diff --git a/sql/vector_mhnsw.cc b/sql/vector_mhnsw.cc index d8a63a7558c..db6bdb09277 100644 --- a/sql/vector_mhnsw.cc +++ b/sql/vector_mhnsw.cc @@ -229,6 +229,58 @@ struct FVector } #endif +#ifdef POWER_IMPLEMENTATION + /************* POWERPC *****************************************************/ + static constexpr size_t POWER_bytes= 128 / 8; // Assume 128-bit vector width + static constexpr size_t POWER_dims= POWER_bytes / sizeof(int16_t); + + static float dot_product(const int16_t *v1, const int16_t *v2, size_t len) + { + // Using vector long long for int64_t accumulation + vector long long ll_sum= {0, 0}; + // Round up to process full vector, including padding + size_t base= ((len + POWER_dims - 1) / POWER_dims) * POWER_dims; + + for (size_t i= 0; i < base; i+= POWER_dims) + { + vector short x= vec_ld(0, &v1[i]); + vector short y= vec_ld(0, &v2[i]); + + // Vectorized multiplication using vec_mule() and vec_mulo() + vector int product_hi= vec_mule(x, y); + vector int product_lo= vec_mulo(x, y); + + // Extend vector int to vector long long for accumulation + vector long long llhi1= vec_unpackh(product_hi); + vector long long llhi2= vec_unpackl(product_hi); + vector long long lllo1= vec_unpackh(product_lo); + vector long long lllo2= vec_unpackl(product_lo); + + ll_sum+= llhi1 + llhi2 + lllo1 + lllo2; + } + + return static_cast(static_cast(ll_sum[0]) + + static_cast(ll_sum[1])); + } + + static size_t alloc_size(size_t n) + { + return alloc_header + MY_ALIGN(n * 2, POWER_bytes) + POWER_bytes - 1; + } + + static FVector *align_ptr(void *ptr) + { + return (FVector*)(MY_ALIGN(((intptr)ptr) + alloc_header, POWER_bytes) + - alloc_header); + } + + void fix_tail(size_t vec_len) + { + bzero(dims + vec_len, (MY_ALIGN(vec_len, POWER_dims) - vec_len) * 2); + } +#undef DEFAULT_IMPLEMENTATION +#endif + /************* no-SIMD default ******************************************/ #ifdef DEFAULT_IMPLEMENTATION DEFAULT_IMPLEMENTATION