diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index 21962c3bad3..2332f2bea4e 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,7 +1,9 @@ -/* auto-generated on 2025-01-08 17:51:07 -0500. Do not edit! */ +/* auto-generated on 2025-03-17 16:12:36 -0400. Do not edit! */ /* begin file src/simdutf.cpp */ #include "simdutf.h" -// We include base64_tables once. + +#if SIMDUTF_FEATURE_BASE64 + // We include base64_tables once. /* begin file src/tables/base64_tables.h */ #ifndef SIMDUTF_BASE64_TABLES_H #define SIMDUTF_BASE64_TABLES_H @@ -692,11830 +694,8 @@ static_assert(to_base64_url_value[uint8_t('_')] == 63, #endif // SIMDUTF_BASE64_TABLES_H /* end file src/tables/base64_tables.h */ -/* begin file src/implementation.cpp */ -#include -#include -#include +#endif // SIMDUTF_FEATURE_BASE64 -static_assert(sizeof(uint8_t) == sizeof(char), - "simdutf requires that uint8_t be a char"); -static_assert(sizeof(uint16_t) == sizeof(char16_t), - "simdutf requires that char16_t be 16 bits"); -static_assert(sizeof(uint32_t) == sizeof(char32_t), - "simdutf requires that char32_t be 32 bits"); -// next line is redundant, but it is kept to catch defective systems. -static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes"); - -// Useful for debugging purposes -namespace simdutf { -namespace { - -template std::string toBinaryString(T b) { - std::string binary = ""; - T mask = T(1) << (sizeof(T) * CHAR_BIT - 1); - while (mask > 0) { - binary += ((b & mask) == 0) ? '0' : '1'; - mask >>= 1; - } - return binary; -} -} // namespace -} // namespace simdutf - -// Implementations -// The best choice should always come first! -/* begin file src/simdutf/arm64.h */ -#ifndef SIMDUTF_ARM64_H -#define SIMDUTF_ARM64_H - -#ifdef SIMDUTF_FALLBACK_H - #error "arm64.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_ARM64 - #define SIMDUTF_IMPLEMENTATION_ARM64 (SIMDUTF_IS_ARM64) -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 && SIMDUTF_IS_ARM64 - #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 0 -#endif - - -#if SIMDUTF_IMPLEMENTATION_ARM64 - -namespace simdutf { -/** - * Implementation for NEON (ARMv8). - */ -namespace arm64 {} // namespace arm64 -} // namespace simdutf - -/* begin file src/simdutf/arm64/implementation.h */ -#ifndef SIMDUTF_ARM64_IMPLEMENTATION_H -#define SIMDUTF_ARM64_IMPLEMENTATION_H - - -namespace simdutf { -namespace arm64 { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("arm64", "ARM NEON", - internal::instruction_set::NEON) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -}; - -} // namespace arm64 -} // namespace simdutf - -#endif // SIMDUTF_ARM64_IMPLEMENTATION_H -/* end file src/simdutf/arm64/implementation.h */ - -/* begin file src/simdutf/arm64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "arm64" -// #define SIMDUTF_IMPLEMENTATION arm64 -/* end file src/simdutf/arm64/begin.h */ - - // Declarations -/* begin file src/simdutf/arm64/intrinsics.h */ -#ifndef SIMDUTF_ARM64_INTRINSICS_H -#define SIMDUTF_ARM64_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -#endif // SIMDUTF_ARM64_INTRINSICS_H -/* end file src/simdutf/arm64/intrinsics.h */ -/* begin file src/simdutf/arm64/bitmanipulation.h */ -#ifndef SIMDUTF_ARM64_BITMANIPULATION_H -#define SIMDUTF_ARM64_BITMANIPULATION_H - -namespace simdutf { -namespace arm64 { -namespace { - -/* result might be undefined when input_num is zero */ -simdutf_really_inline int count_ones(uint64_t input_num) { - return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); -} - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; - #else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); - #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} -#endif - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf - -#endif // SIMDUTF_ARM64_BITMANIPULATION_H -/* end file src/simdutf/arm64/bitmanipulation.h */ -/* begin file src/simdutf/arm64/simd.h */ -#ifndef SIMDUTF_ARM64_SIMD_H -#define SIMDUTF_ARM64_SIMD_H - -#include - -namespace simdutf { -namespace arm64 { -namespace { -namespace simd { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -namespace { - // Start of private section with Visual Studio workaround - - #ifndef simdutf_make_uint8x16_t - #define simdutf_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ - x11, x12, x13, x14, x15, x16) \ - ([=]() { \ - uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_u8(array); \ - }()) - #endif - #ifndef simdutf_make_int8x16_t - #define simdutf_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ - x11, x12, x13, x14, x15, x16) \ - ([=]() { \ - int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_s8(array); \ - }()) - #endif - - #ifndef simdutf_make_uint8x8_t - #define simdutf_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_u8(array); \ - }()) - #endif - #ifndef simdutf_make_int8x8_t - #define simdutf_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_s8(array); \ - }()) - #endif - #ifndef simdutf_make_uint16x8_t - #define simdutf_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_u16(array); \ - }()) - #endif - #ifndef simdutf_make_int16x8_t - #define simdutf_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_s16(array); \ - }()) - #endif - -// End of private section with Visual Studio workaround -} // namespace -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO - -template struct simd8; - -// -// Base class of simd8 and simd8, both of which use uint8x16_t -// internally. -// -template > struct base_u8 { - uint8x16_t value; - static const int SIZE = sizeof(value); - - // Conversion from/to SIMD register - simdutf_really_inline base_u8(const uint8x16_t _value) : value(_value) {} - simdutf_really_inline operator const uint8x16_t &() const { - return this->value; - } - simdutf_really_inline operator uint8x16_t &() { return this->value; } - simdutf_really_inline T first() const { return vgetq_lane_u8(*this, 0); } - simdutf_really_inline T last() const { return vgetq_lane_u8(*this, 15); } - - // Bit operations - simdutf_really_inline simd8 operator|(const simd8 other) const { - return vorrq_u8(*this, other); - } - simdutf_really_inline simd8 operator&(const simd8 other) const { - return vandq_u8(*this, other); - } - simdutf_really_inline simd8 operator^(const simd8 other) const { - return veorq_u8(*this, other); - } - simdutf_really_inline simd8 bit_andnot(const simd8 other) const { - return vbicq_u8(*this, other); - } - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdutf_really_inline simd8 &operator|=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline simd8 &operator&=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline simd8 &operator^=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } - - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return vceqq_u8(lhs, rhs); - } - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_u8(prev_chunk, *this, 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base_u8 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - static simdutf_really_inline simd8 splat(bool _value) { - return vmovq_n_u8(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8(const uint8x16_t _value) - : base_u8(_value) {} - // False constructor - simdutf_really_inline simd8() : simd8(vdupq_n_u8(0)) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} - simdutf_really_inline void store(uint8_t dst[16]) const { - return vst1q_u8(dst, *this); - } - - // We return uint32_t instead of uint16_t because that seems to be more - // efficient for most purposes (cutting it down to uint16_t costs performance - // in some compilers). - simdutf_really_inline uint32_t to_bitmask() const { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - auto minput = *this & bit_mask; - uint8x16_t tmp = vpaddq_u8(minput, minput); - tmp = vpaddq_u8(tmp, tmp); - tmp = vpaddq_u8(tmp, tmp); - return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); - } - - // Returns 4-bit out of each byte, alternating between the high 4 bits and low - // bits result it is 64 bit. This method is expected to be faster than none() - // and is equivalent when the vector register is the result of a comparison, - // with byte values 0xff and 0x00. - simdutf_really_inline uint64_t to_bitmask64() const { - return vget_lane_u64( - vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); - } - - simdutf_really_inline bool any() const { - return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; - } - simdutf_really_inline bool none() const { - return vmaxvq_u32(vreinterpretq_u32_u8(*this)) == 0; - } - simdutf_really_inline bool all() const { - return vminvq_u32(vreinterpretq_u32_u8(*this)) == 0xFFFFF; - } -}; - -// Unsigned bytes -template <> struct simd8 : base_u8 { - static simdutf_really_inline simd8 splat(uint8_t _value) { - return vmovq_n_u8(_value); - } - static simdutf_really_inline simd8 zero() { return vdupq_n_u8(0); } - static simdutf_really_inline simd8 load(const uint8_t *values) { - return vld1q_u8(values); - } - simdutf_really_inline simd8(const uint8x16_t _value) - : base_u8(_value) {} - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8(simdutf_make_uint8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15)) {} -#else - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8(uint8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} -#endif - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Store to array - simdutf_really_inline void store(uint8_t dst[16]) const { - return vst1q_u8(dst, *this); - } - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return vqaddq_u8(*this, other); - } - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return vqsubq_u8(*this, other); - } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 - operator+(const simd8 other) const { - return vaddq_u8(*this, other); - } - simdutf_really_inline simd8 - operator-(const simd8 other) const { - return vsubq_u8(*this, other); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *this; - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *this; - } - - // Order-specific operations - simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } - simdutf_really_inline uint8_t min_val() const { return vminvq_u8(*this); } - simdutf_really_inline simd8 - max_val(const simd8 other) const { - return vmaxq_u8(*this, other); - } - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return vminq_u8(*this, other); - } - simdutf_really_inline simd8 - operator<=(const simd8 other) const { - return vcleq_u8(*this, other); - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return vcgeq_u8(*this, other); - } - simdutf_really_inline simd8 - operator<(const simd8 other) const { - return vcltq_u8(*this, other); - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return vcgtq_u8(*this, other); - } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true - // = nonzero. For ARM, returns all 1's. - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return simd8(*this > other); - } - // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true - // = nonzero. For ARM, returns all 1's. - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return simd8(*this < other); - } - - // Bit-specific operations - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return vtstq_u8(*this, bits); - } - simdutf_really_inline bool is_ascii() const { - return this->max_val() < 0b10000000u; - } - - simdutf_really_inline bool any_bits_set_anywhere() const { - return this->max_val() != 0; - } - simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return (*this & bits).any_bits_set_anywhere(); - } - template simdutf_really_inline simd8 shr() const { - return vshrq_n_u8(*this, N); - } - template simdutf_really_inline simd8 shl() const { - return vshlq_n_u8(*this, N); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - return vqtbl1q_u8(*this, simd8(original)); - } -}; - -// Signed bytes -template <> struct simd8 { - int8x16_t value; - - static simdutf_really_inline simd8 splat(int8_t _value) { - return vmovq_n_s8(_value); - } - static simdutf_really_inline simd8 zero() { return vdupq_n_s8(0); } - static simdutf_really_inline simd8 load(const int8_t values[16]) { - return vld1q_s8(values); - } - - // Use ST2 instead of UXTL+UXTL2 to interleave zeroes. UXTL is actually a - // USHLL #0, and shifting in NEON is actually quite slow. - // - // While this needs the registers to be in a specific order, bigger cores can - // interleave these with no overhead, and it still performs decently on little - // cores. - // movi v1.3d, #0 - // mov v0.16b, value[0] - // st2 {v0.16b, v1.16b}, [ptr], #32 - // mov v0.16b, value[1] - // st2 {v0.16b, v1.16b}, [ptr], #32 - // ... - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - int8x16x2_t pair = match_system(big_endian) - ? int8x16x2_t{{this->value, vmovq_n_s8(0)}} - : int8x16x2_t{{vmovq_n_s8(0), this->value}}; - vst2q_s8(reinterpret_cast(p), pair); - } - - // currently unused - // Technically this could be done with ST4 like in store_ascii_as_utf16, but - // it is very much not worth it, as explicitly mentioned in the ARM Cortex-X1 - // Core Software Optimization Guide: - // 4.18 Complex ASIMD instructions - // The bandwidth of [ST4 with element size less than 64b] is limited by - // decode constraints and it is advisable to avoid them when high - // performing code is desired. - // Instead, it is better to use ZIP1+ZIP2 and two ST2. - simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { - const uint16x8_t low = - vreinterpretq_u16_s8(vzip1q_s8(this->value, vmovq_n_s8(0))); - const uint16x8_t high = - vreinterpretq_u16_s8(vzip2q_s8(this->value, vmovq_n_s8(0))); - const uint16x8x2_t low_pair{{low, vmovq_n_u16(0)}}; - vst2q_u16(reinterpret_cast(p), low_pair); - const uint16x8x2_t high_pair{{high, vmovq_n_u16(0)}}; - vst2q_u16(reinterpret_cast(p + 8), high_pair); - } - - // In places where the table can be reused, which is most uses in simdutf, it - // is worth it to do 4 table lookups, as there is no direct zero extension - // from u8 to u32. - simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { - const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, - 2, 255, 255, 255, 3, 255, 255, 255}; - const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, - 6, 255, 255, 255, 7, 255, 255, 255}; - const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, - 10, 255, 255, 255, 11, 255, 255, 255}; - const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, - 14, 255, 255, 255, 15, 255, 255, 255}; - - // encourage store pairing and interleaving - const auto shuf1 = this->apply_lookup_16_to(tb1); - const auto shuf2 = this->apply_lookup_16_to(tb2); - shuf1.store(reinterpret_cast(p)); - shuf2.store(reinterpret_cast(p + 4)); - - const auto shuf3 = this->apply_lookup_16_to(tb3); - const auto shuf4 = this->apply_lookup_16_to(tb4); - shuf3.store(reinterpret_cast(p + 8)); - shuf4.store(reinterpret_cast(p + 12)); - } - // Conversion from/to SIMD register - simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {} - simdutf_really_inline operator const int8x16_t &() const { - return this->value; - } -#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline operator const uint8x16_t() const { - return vreinterpretq_u8_s8(this->value); - } -#endif - simdutf_really_inline operator int8x16_t &() { return this->value; } - - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8(simdutf_make_int8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15)) {} -#else - simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8(int8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} -#endif - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Store to array - simdutf_really_inline void store(int8_t dst[16]) const { - return vst1q_s8(dst, value); - } - // Explicit conversion to/from unsigned - // - // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same - // type. In theory, we could check this occurrence with std::same_as and - // std::enabled_if but it is C++14 and relatively ugly and hard to read. -#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline explicit simd8(const uint8x16_t other) - : simd8(vreinterpretq_s8_u8(other)) {} -#endif - simdutf_really_inline operator simd8() const { - return vreinterpretq_u8_s8(this->value); - } - - simdutf_really_inline simd8 - operator|(const simd8 other) const { - return vorrq_s8(value, other.value); - } - simdutf_really_inline simd8 - operator&(const simd8 other) const { - return vandq_s8(value, other.value); - } - simdutf_really_inline simd8 - operator^(const simd8 other) const { - return veorq_s8(value, other.value); - } - simdutf_really_inline simd8 - bit_andnot(const simd8 other) const { - return vbicq_s8(value, other.value); - } - - // Math - simdutf_really_inline simd8 - operator+(const simd8 other) const { - return vaddq_s8(value, other.value); - } - simdutf_really_inline simd8 - operator-(const simd8 other) const { - return vsubq_s8(value, other.value); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *this; - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *this; - } - - simdutf_really_inline int8_t max_val() const { return vmaxvq_s8(value); } - simdutf_really_inline int8_t min_val() const { return vminvq_s8(value); } - simdutf_really_inline bool is_ascii() const { return this->min_val() >= 0; } - - // Order-sensitive comparisons - simdutf_really_inline simd8 max_val(const simd8 other) const { - return vmaxq_s8(value, other.value); - } - simdutf_really_inline simd8 min_val(const simd8 other) const { - return vminq_s8(value, other.value); - } - simdutf_really_inline simd8 operator>(const simd8 other) const { - return vcgtq_s8(value, other.value); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return vcltq_s8(value, other.value); - } - simdutf_really_inline simd8 - operator==(const simd8 other) const { - return vceqq_s8(value, other.value); - } - - template - simdutf_really_inline simd8 - prev(const simd8 prev_chunk) const { - return vextq_s8(prev_chunk, *this, 16 - N); - } - - // Perform a lookup assuming no value is larger than 16 - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - return vqtbl1q_s8(*this, simd8(original)); - } -}; - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "ARM kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - // Add each of the elements next to each other, successively, to stuff each - // 8 byte mask into one. - uint8x16_t sum0 = - vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask), - vandq_u8(uint8x16_t(this->chunks[1]), bit_mask)); - uint8x16_t sum1 = - vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask), - vandq_u8(uint8x16_t(this->chunks[3]), bit_mask)); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - - return simd8x64( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), - (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), - (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - return simd8x64( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), - (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), - (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, - this->chunks[2] >= mask, this->chunks[3] >= mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(uint8x16_t(this->chunks[0])) >= mask, - simd8(uint8x16_t(this->chunks[1])) >= mask, - simd8(uint8x16_t(this->chunks[2])) >= mask, - simd8(uint8x16_t(this->chunks[3])) >= mask) - .to_bitmask(); - } -}; // struct simd8x64 -/* begin file src/simdutf/arm64/simd16-inl.h */ -template struct simd16; - -template > struct base_u16 { - uint16x8_t value; - static const int SIZE = sizeof(value); - - // Conversion from/to SIMD register - simdutf_really_inline base_u16() = default; - simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {} - simdutf_really_inline operator const uint16x8_t &() const { - return this->value; - } - simdutf_really_inline operator uint16x8_t &() { return this->value; } - // Bit operations - simdutf_really_inline simd16 operator|(const simd16 other) const { - return vorrq_u16(*this, other); - } - simdutf_really_inline simd16 operator&(const simd16 other) const { - return vandq_u16(*this, other); - } - simdutf_really_inline simd16 operator^(const simd16 other) const { - return veorq_u16(*this, other); - } - simdutf_really_inline simd16 bit_andnot(const simd16 other) const { - return vbicq_u16(*this, other); - } - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - simdutf_really_inline simd16 &operator|=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline simd16 &operator&=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline simd16 &operator^=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } - - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return vceqq_u16(lhs, rhs); - } - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return vextq_u18(prev_chunk, *this, 8 - N); - } -}; - -template > -struct base16 : base_u16 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdutf_really_inline base16() : base_u16() {} - simdutf_really_inline base16(const uint16x8_t _value) : base_u16(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) : base16(vld1q_u16(ptr)) {} - - static const int SIZE = sizeof(base_u16::value); - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return vextq_u18(prev_chunk, *this, 8 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return vmovq_n_u16(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const uint16x8_t _value) - : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return vmovq_n_u16(_value); - } - static simdutf_really_inline simd16 zero() { return vdupq_n_u16(0); } - static simdutf_really_inline simd16 load(const T values[8]) { - return vld1q_u16(reinterpret_cast(values)); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const uint16x8_t _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return vst1q_u16(dst, *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return vaddq_u8(*this, other); - } - simdutf_really_inline simd16 operator-(const simd16 other) const { - return vsubq_u8(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd16 &operator-=(const simd16 other) { - *this = *this - other; - return *static_cast *>(this); - } -}; - -// Signed code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} -#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline simd16(const uint16x8_t _value) - : base16_numeric(_value) {} -#endif - simdutf_really_inline simd16(const int16x8_t _value) - : base16_numeric(vreinterpretq_u16_s16(_value)) {} - - // Splat constructor - simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - simdutf_really_inline operator simd16() const; - simdutf_really_inline operator const uint16x8_t &() const { - return this->value; - } - simdutf_really_inline operator const int16x8_t() const { - return vreinterpretq_s16_u16(this->value); - } - - simdutf_really_inline int16_t max_val() const { - return vmaxvq_s16(vreinterpretq_s16_u16(this->value)); - } - simdutf_really_inline int16_t min_val() const { - return vminvq_s16(vreinterpretq_s16_u16(this->value)); - } - // Order-sensitive comparisons - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return vmaxq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return vmaxq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return vcgtq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return vcltq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const uint16x8_t _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - simdutf_really_inline int16_t max_val() const { return vmaxvq_u16(*this); } - simdutf_really_inline int16_t min_val() const { return vminvq_u16(*this); } - // Saturated math - simdutf_really_inline simd16 - saturating_add(const simd16 other) const { - return vqaddq_u16(*this, other); - } - simdutf_really_inline simd16 - saturating_sub(const simd16 other) const { - return vqsubq_u16(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return vmaxq_u16(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return vminq_u16(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - gt_bits(const simd16 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - lt_bits(const simd16 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return vcleq_u16(*this, other); - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return vcgeq_u16(*this, other); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return vcgtq_u16(*this, other); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return vcltq_u16(*this, other); - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - template simdutf_really_inline simd16 shr() const { - return simd16(vshrq_n_u16(*this, N)); - } - template simdutf_really_inline simd16 shl() const { - return simd16(vshlq_n_u16(*this, N)); - } - - // logical operations - simdutf_really_inline simd16 - operator|(const simd16 other) const { - return vorrq_u16(*this, other); - } - simdutf_really_inline simd16 - operator&(const simd16 other) const { - return vandq_u16(*this, other); - } - simdutf_really_inline simd16 - operator^(const simd16 other) const { - return veorq_u16(*this, other); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return vqmovn_high_u16(vqmovn_u16(v0), v1); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this))); - } -}; -simdutf_really_inline simd16::operator simd16() const { - return this->value; -} - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 4, - "ARM kernel should use four registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); - } - - simdutf_really_inline simd16 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); - this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); - this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - // Add each of the elements next to each other, successively, to stuff each - // 8 byte mask into one. - uint8x16_t sum0 = vpaddq_u8( - vreinterpretq_u8_u16(this->chunks[0] & vreinterpretq_u16_u8(bit_mask)), - vreinterpretq_u8_u16(this->chunks[1] & vreinterpretq_u16_u8(bit_mask))); - uint8x16_t sum1 = vpaddq_u8( - vreinterpretq_u8_u16(this->chunks[2] & vreinterpretq_u16_u8(bit_mask)), - vreinterpretq_u8_u16(this->chunks[3] & vreinterpretq_u16_u8(bit_mask))); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - - return simd16x32( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), - (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), - (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - return simd16x32( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), - (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), - (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - -}; // struct simd16x32 -template <> -simdutf_really_inline uint64_t simd16x32::not_in_range( - const uint16_t low, const uint16_t high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - simd16x32 x(simd16((this->chunks[0] > mask_high) | - (this->chunks[0] < mask_low)), - simd16((this->chunks[1] > mask_high) | - (this->chunks[1] < mask_low)), - simd16((this->chunks[2] > mask_high) | - (this->chunks[2] < mask_low)), - simd16((this->chunks[3] > mask_high) | - (this->chunks[3] < mask_low))); - return x.to_bitmask(); -} -/* end file src/simdutf/arm64/simd16-inl.h */ -} // namespace simd -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf - -#endif // SIMDUTF_ARM64_SIMD_H -/* end file src/simdutf/arm64/simd.h */ - -/* begin file src/simdutf/arm64/end.h */ -/* end file src/simdutf/arm64/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_ARM64 - -#endif // SIMDUTF_ARM64_H -/* end file src/simdutf/arm64.h */ -/* begin file src/simdutf/icelake.h */ -#ifndef SIMDUTF_ICELAKE_H -#define SIMDUTF_ICELAKE_H - - -#ifdef __has_include - // How do we detect that a compiler supports vbmi2? - // For sure if the following header is found, we are ok? - #if __has_include() - #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 - #endif -#endif - -#ifdef _MSC_VER - #if _MSC_VER >= 1930 - // Visual Studio 2022 and up support VBMI2 under x64 even if the header - // avx512vbmi2intrin.h is not found. - // Visual Studio 2019 technically supports VBMI2, but the implementation - // might be unreliable. Search for visualstudio2019icelakeissue in our - // tests. - #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 - #endif -#endif - -// We allow icelake on x64 as long as the compiler is known to support VBMI2. -#ifndef SIMDUTF_IMPLEMENTATION_ICELAKE - #define SIMDUTF_IMPLEMENTATION_ICELAKE \ - ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2)) -#endif - -// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdutf/simdutf/issues/1247 -#if ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && \ - (SIMDUTF_HAS_AVX512F && SIMDUTF_HAS_AVX512DQ && SIMDUTF_HAS_AVX512VL && \ - SIMDUTF_HAS_AVX512VBMI2) && \ - (!SIMDUTF_IS_32BITS)) - #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 0 -#endif - -#if SIMDUTF_IMPLEMENTATION_ICELAKE - #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE - #define SIMDUTF_TARGET_ICELAKE - #else - #define SIMDUTF_TARGET_ICELAKE \ - SIMDUTF_TARGET_REGION( \ - "avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2," \ - "avx512vl,avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512vpopcntdq") - #endif - -namespace simdutf { -namespace icelake {} // namespace icelake -} // namespace simdutf - - // - // These two need to be included outside SIMDUTF_TARGET_REGION - // -/* begin file src/simdutf/icelake/intrinsics.h */ -#ifndef SIMDUTF_ICELAKE_INTRINSICS_H -#define SIMDUTF_ICELAKE_INTRINSICS_H - - -#ifdef SIMDUTF_VISUAL_STUDIO - // under clang within visual studio, this will include - #include // visual studio or clang - #include -#else - - #if SIMDUTF_GCC11ORMORE -// We should not get warnings while including yet we do -// under some versions of GCC. -// If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnings. -SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) - #endif - - #include // elsewhere - - #if SIMDUTF_GCC11ORMORE -// cancels the suppression of the -Wuninitialized -SIMDUTF_POP_DISABLE_WARNINGS - #endif - - #ifndef _tzcnt_u64 - #define _tzcnt_u64(x) __tzcnt_u64(x) - #endif // _tzcnt_u64 -#endif // SIMDUTF_VISUAL_STUDIO - -#ifdef SIMDUTF_CLANG_VISUAL_STUDIO - /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdutf, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ - #include // for _blsr_u64 - #include // for _pext_u64, _pdep_u64 - #include // for __lzcnt64 - #include // for most things (AVX2, AVX512, _popcnt64) - #include - #include - #include - #include - // Important: we need the AVX-512 headers: - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - // unfortunately, we may not get _blsr_u64, but, thankfully, clang - // has it as a macro. - #ifndef _blsr_u64 - // we roll our own - #define _blsr_u64(n) ((n - 1) & n) - #endif // _blsr_u64 -#endif // SIMDUTF_CLANG_VISUAL_STUDIO - -#if defined(__GNUC__) && !defined(__clang__) - - #if __GNUC__ == 8 - #define SIMDUTF_GCC8 1 - #elif __GNUC__ == 9 - #define SIMDUTF_GCC9 1 - #endif // __GNUC__ == 8 || __GNUC__ == 9 - -#endif // defined(__GNUC__) && !defined(__clang__) - -#if SIMDUTF_GCC8 - #pragma GCC push_options - #pragma GCC target("avx512f") -/** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. - */ -inline __m512i -_mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, - uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, - uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, - uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, - uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, - uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, - uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, - uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, - uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, - uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, - uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, - uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, - uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64( - uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + - (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + - (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + - (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + - (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + - (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + - (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + - (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + - (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + - (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + - (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + - (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + - (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + - (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + - (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + - (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + - (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + - (uint64_t(a56) << 56)); -} - #pragma GCC pop_options -#endif // SIMDUTF_GCC8 - -#endif // SIMDUTF_HASWELL_INTRINSICS_H -/* end file src/simdutf/icelake/intrinsics.h */ -/* begin file src/simdutf/icelake/implementation.h */ -#ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H -#define SIMDUTF_ICELAKE_IMPLEMENTATION_H - - -namespace simdutf { -namespace icelake { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation( - "icelake", - "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 " - "extensions)", - internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | - internal::instruction_set::BMI2 | - internal::instruction_set::AVX512BW | - internal::instruction_set::AVX512CD | - internal::instruction_set::AVX512VL | - internal::instruction_set::AVX512VBMI2 | - internal::instruction_set::AVX512VPOPCNTDQ) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -}; - -} // namespace icelake -} // namespace simdutf - -#endif // SIMDUTF_ICELAKE_IMPLEMENTATION_H -/* end file src/simdutf/icelake/implementation.h */ - - // - // The rest need to be inside the region - // -/* begin file src/simdutf/icelake/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "icelake" -// #define SIMDUTF_IMPLEMENTATION icelake - -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -// nothing needed. -#else -SIMDUTF_TARGET_ICELAKE -#endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/icelake/begin.h */ - // Declarations -/* begin file src/simdutf/icelake/bitmanipulation.h */ -#ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H -#define SIMDUTF_ICELAKE_BITMANIPULATION_H - -namespace simdutf { -namespace icelake { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - #if SIMDUTF_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); - #else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); - #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} -#endif - -} // unnamed namespace -} // namespace icelake -} // namespace simdutf - -#endif // SIMDUTF_ICELAKE_BITMANIPULATION_H -/* end file src/simdutf/icelake/bitmanipulation.h */ -/* begin file src/simdutf/icelake/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/icelake/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_ICELAKE -#endif // SIMDUTF_ICELAKE_H -/* end file src/simdutf/icelake.h */ -/* begin file src/simdutf/haswell.h */ -#ifndef SIMDUTF_HASWELL_H -#define SIMDUTF_HASWELL_H - -#ifdef SIMDUTF_WESTMERE_H - #error "haswell.h must be included before westmere.h" -#endif -#ifdef SIMDUTF_FALLBACK_H - #error "haswell.h must be included before fallback.h" -#endif - - -// Default Haswell to on if this is x86-64. Even if we are not compiled for it, -// it could be selected at runtime. -#ifndef SIMDUTF_IMPLEMENTATION_HASWELL - // - // You do not want to restrict it like so: SIMDUTF_IS_X86_64 && __AVX2__ - // because we want to rely on *runtime dispatch*. - // - #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE - #define SIMDUTF_IMPLEMENTATION_HASWELL 0 - #else - #define SIMDUTF_IMPLEMENTATION_HASWELL (SIMDUTF_IS_X86_64) - #endif - -#endif -// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdutf/simdutf/issues/1247 -#if ((SIMDUTF_IMPLEMENTATION_HASWELL) && (SIMDUTF_IS_X86_64) && (__AVX2__)) - #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 0 -#endif - -#if SIMDUTF_IMPLEMENTATION_HASWELL - - #define SIMDUTF_TARGET_HASWELL SIMDUTF_TARGET_REGION("avx2,bmi,lzcnt,popcnt") - -namespace simdutf { -/** - * Implementation for Haswell (Intel AVX2). - */ -namespace haswell {} // namespace haswell -} // namespace simdutf - - // - // These two need to be included outside SIMDUTF_TARGET_REGION - // -/* begin file src/simdutf/haswell/implementation.h */ -#ifndef SIMDUTF_HASWELL_IMPLEMENTATION_H -#define SIMDUTF_HASWELL_IMPLEMENTATION_H - - -// The constructor may be executed on any host, so we take care not to use -// SIMDUTF_TARGET_REGION -namespace simdutf { -namespace haswell { - -using namespace simdutf; - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("haswell", "Intel/AMD AVX2", - internal::instruction_set::AVX2 | - internal::instruction_set::BMI1 | - internal::instruction_set::BMI2) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; - simdutf_warn_unused virtual size_t - maximal_binary_length_from_base64(const char *input, - size_t length) const noexcept; - simdutf_warn_unused virtual result - base64_to_binary(const char *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual size_t - maximal_binary_length_from_base64(const char16_t *input, - size_t length) const noexcept; - simdutf_warn_unused virtual result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual size_t - base64_length_from_binary(size_t length, - base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -}; - -} // namespace haswell -} // namespace simdutf - -#endif // SIMDUTF_HASWELL_IMPLEMENTATION_H -/* end file src/simdutf/haswell/implementation.h */ -/* begin file src/simdutf/haswell/intrinsics.h */ -#ifndef SIMDUTF_HASWELL_INTRINSICS_H -#define SIMDUTF_HASWELL_INTRINSICS_H - - -#ifdef SIMDUTF_VISUAL_STUDIO - // under clang within visual studio, this will include - #include // visual studio or clang -#else - - #if SIMDUTF_GCC11ORMORE -// We should not get warnings while including yet we do -// under some versions of GCC. -// If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnings. -SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) - #endif - - #include // elsewhere - - #if SIMDUTF_GCC11ORMORE -// cancels the suppression of the -Wuninitialized -SIMDUTF_POP_DISABLE_WARNINGS - #endif - -#endif // SIMDUTF_VISUAL_STUDIO - -#ifdef SIMDUTF_CLANG_VISUAL_STUDIO - /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdutf, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ - #include // for _blsr_u64 - #include // for __lzcnt64 - #include // for most things (AVX2, AVX512, _popcnt64) - #include - #include - #include - #include - // unfortunately, we may not get _blsr_u64, but, thankfully, clang - // has it as a macro. - #ifndef _blsr_u64 - // we roll our own - #define _blsr_u64(n) ((n - 1) & n) - #endif // _blsr_u64 -#endif // SIMDUTF_CLANG_VISUAL_STUDIO - -#endif // SIMDUTF_HASWELL_INTRINSICS_H -/* end file src/simdutf/haswell/intrinsics.h */ - - // - // The rest need to be inside the region - // -/* begin file src/simdutf/haswell/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "haswell" -// #define SIMDUTF_IMPLEMENTATION haswell - -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL -// nothing needed. -#else -SIMDUTF_TARGET_HASWELL -#endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/haswell/begin.h */ - // Declarations -/* begin file src/simdutf/haswell/bitmanipulation.h */ -#ifndef SIMDUTF_HASWELL_BITMANIPULATION_H -#define SIMDUTF_HASWELL_BITMANIPULATION_H - -namespace simdutf { -namespace haswell { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_inline int trailing_zeroes(uint64_t input_num) { - #if SIMDUTF_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); - #else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); - #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} -#endif - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf - -#endif // SIMDUTF_HASWELL_BITMANIPULATION_H -/* end file src/simdutf/haswell/bitmanipulation.h */ -/* begin file src/simdutf/haswell/simd.h */ -#ifndef SIMDUTF_HASWELL_SIMD_H -#define SIMDUTF_HASWELL_SIMD_H - -namespace simdutf { -namespace haswell { -namespace { -namespace simd { - -// Forward-declared so they can be used by splat and friends. -template struct base { - __m256i value; - - // Zero constructor - simdutf_really_inline base() : value{__m256i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m256i _value) : value(_value) {} - // Conversion to SIMD register - simdutf_really_inline operator const __m256i &() const { return this->value; } - simdutf_really_inline operator __m256i &() { return this->value; } - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - __m256i first = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(*this)); - __m256i second = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(*this, 1)); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - first = _mm256_shuffle_epi8(first, swap); - second = _mm256_shuffle_epi8(second, swap); - } - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), first); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 16), second); - } - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), - _mm256_cvtepu8_epi32(_mm256_castsi256_si128(*this))); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 8), - _mm256_cvtepu8_epi32(_mm256_castsi256_si128( - _mm256_srli_si256(*this, 8)))); - _mm256_storeu_si256( - reinterpret_cast<__m256i *>(ptr + 16), - _mm256_cvtepu8_epi32(_mm256_extractf128_si256(*this, 1))); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 24), - _mm256_cvtepu8_epi32(_mm_srli_si128( - _mm256_extractf128_si256(*this, 1), 8))); - } - // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return _mm256_or_si256(*this, other); - } - simdutf_really_inline Child operator&(const Child other) const { - return _mm256_and_si256(*this, other); - } - simdutf_really_inline Child operator^(const Child other) const { - return _mm256_xor_si256(*this, other); - } - simdutf_really_inline Child bit_andnot(const Child other) const { - return _mm256_andnot_si256(other, *this); - } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; - -// Forward-declared so they can be used by splat and friends. -template struct simd8; - -template > -struct base8 : base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; - - simdutf_really_inline base8() : base>() {} - simdutf_really_inline base8(const __m256i _value) : base>(_value) {} - simdutf_really_inline T first() const { - return _mm256_extract_epi8(*this, 0); - } - simdutf_really_inline T last() const { - return _mm256_extract_epi8(*this, 31); - } - friend simdutf_always_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return _mm256_cmpeq_epi8(lhs, rhs); - } - - static const int SIZE = sizeof(base::value); - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return _mm256_alignr_epi8( - *this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdutf_really_inline simd8 splat(bool _value) { - return _mm256_set1_epi8(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8() : base8() {} - simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdutf_really_inline uint32_t to_bitmask() const { - return uint32_t(_mm256_movemask_epi8(*this)); - } - simdutf_really_inline bool any() const { - return !_mm256_testz_si256(*this, *this); - } - simdutf_really_inline bool none() const { - return _mm256_testz_si256(*this, *this); - } - simdutf_really_inline bool all() const { - return static_cast(_mm256_movemask_epi8(*this)) == 0xFFFFFFFF; - } - simdutf_really_inline simd8 operator~() const { return *this ^ true; } -}; - -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T _value) { - return _mm256_set1_epi8(_value); - } - static simdutf_really_inline simd8 zero() { - return _mm256_setzero_si256(); - } - static simdutf_really_inline simd8 load(const T values[32]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m256i _value) - : base8(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[32]) const { - return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); - } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 operator+(const simd8 other) const { - return _mm256_add_epi8(*this, other); - } - simdutf_really_inline simd8 operator-(const simd8 other) const { - return _mm256_sub_epi8(*this, other); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm256_shuffle_epi8(lookup_table, *this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - simdutf_really_inline operator simd8() const; - // Member-by-member initialization - simdutf_really_inline - simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, - int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, - int8_t v30, int8_t v31) - : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, - v22, v23, v24, v25, v26, v27, v28, v29, v30, - v31)) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15); - } - simdutf_really_inline bool is_ascii() const { - return _mm256_movemask_epi8(*this) == 0; - } - // Order-sensitive comparisons - simdutf_really_inline simd8 max_val(const simd8 other) const { - return _mm256_max_epi8(*this, other); - } - simdutf_really_inline simd8 min_val(const simd8 other) const { - return _mm256_min_epi8(*this, other); - } - simdutf_really_inline simd8 operator>(const simd8 other) const { - return _mm256_cmpgt_epi8(*this, other); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return _mm256_cmpgt_epi8(other, *this); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, - uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, - uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, - uint8_t v31) - : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, - v22, v23, v24, v25, v26, v27, v28, v29, v30, - v31)) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15); - } - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return _mm256_adds_epu8(*this, other); - } - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return _mm256_subs_epu8(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd8 - max_val(const simd8 other) const { - return _mm256_max_epu8(*this, other); - } - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return _mm256_min_epu8(other, *this); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdutf_really_inline simd8 - operator<(const simd8 other) const { - return this->lt_bits(other).any_bits_set(); - } - - // Bit-specific operations - simdutf_really_inline simd8 bits_not_set() const { - return *this == uint8_t(0); - } - simdutf_really_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdutf_really_inline bool is_ascii() const { - return _mm256_movemask_epi8(*this) == 0; - } - simdutf_really_inline bool bits_not_set_anywhere() const { - return _mm256_testz_si256(*this, *this); - } - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdutf_really_inline bool bits_not_set_anywhere(simd8 bits) const { - return _mm256_testz_si256(*this, bits); - } - simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdutf_really_inline simd8 shr() const { - return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); - } - template simdutf_really_inline simd8 shl() const { - return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); - } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template simdutf_really_inline int get_bit() const { - return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7 - N)); - } -}; -simdutf_really_inline simd8::operator simd8() const { - return this->value; -} - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, - "Haswell kernel should use two registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); - } - - simdutf_really_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] | mask, this->chunks[1] | mask); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1]) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - - return simd8x64( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - return simd8x64( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), - (simd8(__m256i(this->chunks[1])) >= mask)) - .to_bitmask(); - } -}; // struct simd8x64 - -/* begin file src/simdutf/haswell/simd16-inl.h */ -#ifdef __GNUC__ - #if __GNUC__ < 8 - #define _mm256_set_m128i(xmm1, xmm2) \ - _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ - _mm256_castsi128_si256(xmm2), 2) - #define _mm256_setr_m128i(xmm2, xmm1) \ - _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ - _mm256_castsi128_si256(xmm2), 2) - #endif -#endif - -template struct simd16; - -template > -struct base16 : base> { - using bitmask_type = uint32_t; - - simdutf_really_inline base16() : base>() {} - simdutf_really_inline base16(const __m256i _value) - : base>(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) - : base16(_mm256_loadu_si256(reinterpret_cast(ptr))) {} - friend simdutf_always_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return _mm256_cmpeq_epi16(lhs, rhs); - } - - /// the size of vector in bytes - static const int SIZE = sizeof(base>::value); - - /// the number of elements of type T a vector can hold - static const int ELEMENTS = SIZE / sizeof(T); - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return _mm256_alignr_epi8(*this, prev_chunk, 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return _mm256_set1_epi16(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} - - simdutf_really_inline bitmask_type to_bitmask() const { - return _mm256_movemask_epi8(*this); - } - simdutf_really_inline bool any() const { - return !_mm256_testz_si256(*this, *this); - } - simdutf_really_inline simd16 operator~() const { return *this ^ true; } -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return _mm256_set1_epi16(_value); - } - static simdutf_really_inline simd16 zero() { - return _mm256_setzero_si256(); - } - static simdutf_really_inline simd16 load(const T values[8]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const __m256i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return _mm256_add_epi16(*this, other); - } - simdutf_really_inline simd16 operator-(const simd16 other) const { - return _mm256_sub_epi16(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd16 &operator-=(const simd16 other) { - *this = *this - other; - return *static_cast *>(this); - } -}; - -// Signed code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m256i _value) - : base16_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - // Order-sensitive comparisons - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return _mm256_max_epi16(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return _mm256_min_epi16(*this, other); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return _mm256_cmpgt_epi16(*this, other); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return _mm256_cmpgt_epi16(other, *this); - } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m256i _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - // Saturated math - simdutf_really_inline simd16 - saturating_add(const simd16 other) const { - return _mm256_adds_epu16(*this, other); - } - simdutf_really_inline simd16 - saturating_sub(const simd16 other) const { - return _mm256_subs_epu16(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return _mm256_max_epu16(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return _mm256_min_epu16(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - gt_bits(const simd16 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - lt_bits(const simd16 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return other.min_val(*this) == other; - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return this->gt_bits(other).any_bits_set(); - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - simdutf_really_inline simd16 bits_not_set(simd16 bits) const { - return (*this & bits).bits_not_set(); - } - simdutf_really_inline simd16 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline simd16 any_bits_set(simd16 bits) const { - return ~this->bits_not_set(bits); - } - - simdutf_really_inline bool bits_not_set_anywhere() const { - return _mm256_testz_si256(*this, *this); - } - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdutf_really_inline bool - bits_not_set_anywhere(simd16 bits) const { - return _mm256_testz_si256(*this, bits); - } - simdutf_really_inline bool - any_bits_set_anywhere(simd16 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdutf_really_inline simd16 shr() const { - return simd16(_mm256_srli_epi16(*this, N)); - } - template simdutf_really_inline simd16 shl() const { - return simd16(_mm256_slli_epi16(*this, N)); - } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template simdutf_really_inline int get_bit() const { - return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 15 - N)); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - return _mm256_shuffle_epi8(*this, swap); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - // Note: the AVX2 variant of pack operates on 128-bit lanes, thus - // we have to shuffle lanes in order to produce bytes in the - // correct order. - - // get the 0th lanes - const __m128i lo_0 = _mm256_extracti128_si256(v0, 0); - const __m128i lo_1 = _mm256_extracti128_si256(v1, 0); - - // get the 1st lanes - const __m128i hi_0 = _mm256_extracti128_si256(v0, 1); - const __m128i hi_1 = _mm256_extracti128_si256(v1, 1); - - // build new vectors (shuffle lanes) - const __m256i t0 = _mm256_set_m128i(lo_1, lo_0); - const __m256i t1 = _mm256_set_m128i(hi_1, hi_0); - - // pack code units in linear order from v0 and v1 - return _mm256_packus_epi16(t0, t1); - } -}; - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 2, - "Haswell kernel should use two registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline simd16x32(const simd16 chunk0, - const simd16 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdutf_really_inline simd16 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16)); - } - - simdutf_really_inline simd16x32 bit_or(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] | mask, this->chunks[1] | mask); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t eq(const simd16x32 &other) const { - return simd16x32(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1]) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - - return simd16x32( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(static_cast(low - 1)); - const simd16 mask_high = simd16::splat(static_cast(high + 1)); - return simd16x32( - (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), - (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask) - .to_bitmask(); - } -}; // struct simd16x32 -/* end file src/simdutf/haswell/simd16-inl.h */ - -} // namespace simd - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf - -#endif // SIMDUTF_HASWELL_SIMD_H -/* end file src/simdutf/haswell/simd.h */ - -/* begin file src/simdutf/haswell/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/haswell/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_HASWELL -#endif // SIMDUTF_HASWELL_COMMON_H -/* end file src/simdutf/haswell.h */ -/* begin file src/simdutf/westmere.h */ -#ifndef SIMDUTF_WESTMERE_H -#define SIMDUTF_WESTMERE_H - -#ifdef SIMDUTF_FALLBACK_H - #error "westmere.h must be included before fallback.h" -#endif - - -// Default Westmere to on if this is x86-64, unless we'll always select Haswell. -#ifndef SIMDUTF_IMPLEMENTATION_WESTMERE - // - // You do not want to set it to (SIMDUTF_IS_X86_64 && - // !SIMDUTF_REQUIRES_HASWELL) because you want to rely on runtime dispatch! - // - #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || SIMDUTF_CAN_ALWAYS_RUN_HASWELL - #define SIMDUTF_IMPLEMENTATION_WESTMERE 0 - #else - #define SIMDUTF_IMPLEMENTATION_WESTMERE (SIMDUTF_IS_X86_64) - #endif - -#endif - -#if (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__) - #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 0 -#endif - -#if SIMDUTF_IMPLEMENTATION_WESTMERE - - #define SIMDUTF_TARGET_WESTMERE SIMDUTF_TARGET_REGION("sse4.2,popcnt") - -namespace simdutf { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere {} // namespace westmere -} // namespace simdutf - - // - // These two need to be included outside SIMDUTF_TARGET_REGION - // -/* begin file src/simdutf/westmere/implementation.h */ -#ifndef SIMDUTF_WESTMERE_IMPLEMENTATION_H -#define SIMDUTF_WESTMERE_IMPLEMENTATION_H - - -// The constructor may be executed on any host, so we take care not to use -// SIMDUTF_TARGET_REGION -namespace simdutf { -namespace westmere { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("westmere", "Intel/AMD SSE4.2", - internal::instruction_set::SSE42) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -}; - -} // namespace westmere -} // namespace simdutf - -#endif // SIMDUTF_WESTMERE_IMPLEMENTATION_H -/* end file src/simdutf/westmere/implementation.h */ -/* begin file src/simdutf/westmere/intrinsics.h */ -#ifndef SIMDUTF_WESTMERE_INTRINSICS_H -#define SIMDUTF_WESTMERE_INTRINSICS_H - -#ifdef SIMDUTF_VISUAL_STUDIO - // under clang within visual studio, this will include - #include // visual studio or clang -#else - - #if SIMDUTF_GCC11ORMORE -// We should not get warnings while including yet we do -// under some versions of GCC. -// If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnings. -SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) - #endif - - #include // elsewhere - - #if SIMDUTF_GCC11ORMORE -// cancels the suppression of the -Wuninitialized -SIMDUTF_POP_DISABLE_WARNINGS - #endif - -#endif // SIMDUTF_VISUAL_STUDIO - -#ifdef SIMDUTF_CLANG_VISUAL_STUDIO - /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ - #include // for _mm_alignr_epi8 -#endif - -#endif // SIMDUTF_WESTMERE_INTRINSICS_H -/* end file src/simdutf/westmere/intrinsics.h */ - - // - // The rest need to be inside the region - // -/* begin file src/simdutf/westmere/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "westmere" -// #define SIMDUTF_IMPLEMENTATION westmere - -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_TARGET_WESTMERE -#endif -/* end file src/simdutf/westmere/begin.h */ - - // Declarations -/* begin file src/simdutf/westmere/bitmanipulation.h */ -#ifndef SIMDUTF_WESTMERE_BITMANIPULATION_H -#define SIMDUTF_WESTMERE_BITMANIPULATION_H - -namespace simdutf { -namespace westmere { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - #if SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long ret; - _BitScanForward64(&ret, input_num); - return (int)ret; - #else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); - #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} -#endif - -} // unnamed namespace -} // namespace westmere -} // namespace simdutf - -#endif // SIMDUTF_WESTMERE_BITMANIPULATION_H -/* end file src/simdutf/westmere/bitmanipulation.h */ -/* begin file src/simdutf/westmere/simd.h */ -#ifndef SIMDUTF_WESTMERE_SIMD_H -#define SIMDUTF_WESTMERE_SIMD_H - -namespace simdutf { -namespace westmere { -namespace { -namespace simd { - -template struct base { - __m128i value; - - // Zero constructor - simdutf_really_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m128i _value) : value(_value) {} - // Conversion to SIMD register - simdutf_really_inline operator const __m128i &() const { return this->value; } - simdutf_really_inline operator __m128i &() { return this->value; } - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - __m128i first = _mm_cvtepu8_epi16(*this); - __m128i second = _mm_cvtepu8_epi16(_mm_srli_si128(*this, 8)); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - first = _mm_shuffle_epi8(first, swap); - second = _mm_shuffle_epi8(second, swap); - } - _mm_storeu_si128(reinterpret_cast<__m128i *>(p), first); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), second); - } - simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { - _mm_storeu_si128(reinterpret_cast<__m128i *>(p), _mm_cvtepu8_epi32(*this)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 4), - _mm_cvtepu8_epi32(_mm_srli_si128(*this, 4))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), - _mm_cvtepu8_epi32(_mm_srli_si128(*this, 8))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 12), - _mm_cvtepu8_epi32(_mm_srli_si128(*this, 12))); - } - // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return _mm_or_si128(*this, other); - } - simdutf_really_inline Child operator&(const Child other) const { - return _mm_and_si128(*this, other); - } - simdutf_really_inline Child operator^(const Child other) const { - return _mm_xor_si128(*this, other); - } - simdutf_really_inline Child bit_andnot(const Child other) const { - return _mm_andnot_si128(other, *this); - } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; - -// Forward-declared so they can be used by splat and friends. -template struct simd8; - -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdutf_really_inline T first() const { return _mm_extract_epi8(*this, 0); } - simdutf_really_inline T last() const { return _mm_extract_epi8(*this, 15); } - simdutf_really_inline base8() : base>() {} - simdutf_really_inline base8(const __m128i _value) : base>(_value) {} - - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return _mm_cmpeq_epi8(lhs, rhs); - } - - static const int SIZE = sizeof(base>::value); - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdutf_really_inline simd8 splat(bool _value) { - return _mm_set1_epi8(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8() : base8() {} - simdutf_really_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdutf_really_inline int to_bitmask() const { - return _mm_movemask_epi8(*this); - } - simdutf_really_inline bool any() const { - return !_mm_testz_si128(*this, *this); - } - simdutf_really_inline bool none() const { - return _mm_testz_si128(*this, *this); - } - simdutf_really_inline bool all() const { - return _mm_movemask_epi8(*this) == 0xFFFF; - } - simdutf_really_inline simd8 operator~() const { return *this ^ true; } -}; - -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T _value) { - return _mm_set1_epi8(_value); - } - static simdutf_really_inline simd8 zero() { return _mm_setzero_si128(); } - static simdutf_really_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m128i _value) - : base8(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[16]) const { - return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 operator+(const simd8 other) const { - return _mm_add_epi8(*this, other); - } - simdutf_really_inline simd8 operator-(const simd8 other) const { - return _mm_sub_epi8(*this, other); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15)) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - simdutf_really_inline operator simd8() const; - simdutf_really_inline bool is_ascii() const { - return _mm_movemask_epi8(*this) == 0; - } - - // Order-sensitive comparisons - simdutf_really_inline simd8 max_val(const simd8 other) const { - return _mm_max_epi8(*this, other); - } - simdutf_really_inline simd8 min_val(const simd8 other) const { - return _mm_min_epi8(*this, other); - } - simdutf_really_inline simd8 operator>(const simd8 other) const { - return _mm_cmpgt_epi8(*this, other); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return _mm_cmpgt_epi8(other, *this); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15)) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return _mm_adds_epu8(*this, other); - } - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return _mm_subs_epu8(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd8 - max_val(const simd8 other) const { - return _mm_max_epu8(*this, other); - } - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return _mm_min_epu8(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdutf_really_inline simd8 - operator<(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - - // Bit-specific operations - simdutf_really_inline simd8 bits_not_set() const { - return *this == uint8_t(0); - } - simdutf_really_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdutf_really_inline bool is_ascii() const { - return _mm_movemask_epi8(*this) == 0; - } - - simdutf_really_inline bool bits_not_set_anywhere() const { - return _mm_testz_si128(*this, *this); - } - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdutf_really_inline bool bits_not_set_anywhere(simd8 bits) const { - return _mm_testz_si128(*this, bits); - } - simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdutf_really_inline simd8 shr() const { - return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); - } - template simdutf_really_inline simd8 shl() const { - return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); - } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template simdutf_really_inline int get_bit() const { - return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N)); - } -}; -simdutf_really_inline simd8::operator simd8() const { - return this->value; -} - -// Unsigned bytes -template <> struct simd8 : base { - static simdutf_really_inline simd8 splat(uint16_t _value) { - return _mm_set1_epi16(_value); - } - static simdutf_really_inline simd8 load(const uint16_t values[8]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - - simdutf_really_inline simd8() : base() {} - simdutf_really_inline simd8(const __m128i _value) : base(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint16_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint16_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline simd8(uint16_t v0, uint16_t v1, uint16_t v2, - uint16_t v3, uint16_t v4, uint16_t v5, - uint16_t v6, uint16_t v7) - : simd8(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {} - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return _mm_adds_epu16(*this, other); - } - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return _mm_subs_epu16(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd8 - max_val(const simd8 other) const { - return _mm_max_epu16(*this, other); - } - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return _mm_min_epu16(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdutf_really_inline simd8 - operator==(const simd8 other) const { - return _mm_cmpeq_epi16(*this, other); - } - simdutf_really_inline simd8 - operator&(const simd8 other) const { - return _mm_and_si128(*this, other); - } - simdutf_really_inline simd8 - operator|(const simd8 other) const { - return _mm_or_si128(*this, other); - } - - // Bit-specific operations - simdutf_really_inline simd8 bits_not_set() const { - return *this == uint16_t(0); - } - simdutf_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - - simdutf_really_inline bool bits_not_set_anywhere() const { - return _mm_testz_si128(*this, *this); - } - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdutf_really_inline bool bits_not_set_anywhere(simd8 bits) const { - return _mm_testz_si128(*this, bits); - } - simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } -}; -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "Westmere kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - - return simd8x64( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), - (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), - (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low - 1); - const simd8 mask_high = simd8::splat(high + 1); - return simd8x64( - (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), - (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), - (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), - (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, - this->chunks[2] >= mask, this->chunks[3] >= mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(__m128i(this->chunks[0])) >= mask, - simd8(__m128i(this->chunks[1])) >= mask, - simd8(__m128i(this->chunks[2])) >= mask, - simd8(__m128i(this->chunks[3])) >= mask) - .to_bitmask(); - } -}; // struct simd8x64 - -/* begin file src/simdutf/westmere/simd16-inl.h */ -template struct simd16; - -template > -struct base16 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdutf_really_inline base16() : base>() {} - simdutf_really_inline base16(const __m128i _value) - : base>(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) - : base16(_mm_loadu_si128(reinterpret_cast(ptr))) {} - - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return _mm_cmpeq_epi16(lhs, rhs); - } - - static const int SIZE = sizeof(base>::value); - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return _mm_set1_epi16(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} - - simdutf_really_inline int to_bitmask() const { - return _mm_movemask_epi8(*this); - } - simdutf_really_inline bool any() const { - return !_mm_testz_si128(*this, *this); - } - simdutf_really_inline simd16 operator~() const { return *this ^ true; } -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return _mm_set1_epi16(_value); - } - static simdutf_really_inline simd16 zero() { return _mm_setzero_si128(); } - static simdutf_really_inline simd16 load(const T values[8]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const __m128i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return _mm_add_epi16(*this, other); - } - simdutf_really_inline simd16 operator-(const simd16 other) const { - return _mm_sub_epi16(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd16 &operator-=(const simd16 other) { - *this = *this - other; - return *static_cast *>(this); - } -}; - -// Signed code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m128i _value) - : base16_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - // Member-by-member initialization - simdutf_really_inline simd16(int16_t v0, int16_t v1, int16_t v2, int16_t v3, - int16_t v4, int16_t v5, int16_t v6, int16_t v7) - : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {} - simdutf_really_inline operator simd16() const; - - // Order-sensitive comparisons - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return _mm_max_epi16(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return _mm_min_epi16(*this, other); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return _mm_cmpgt_epi16(*this, other); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return _mm_cmpgt_epi16(other, *this); - } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m128i _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - // Member-by-member initialization - simdutf_really_inline simd16(uint16_t v0, uint16_t v1, uint16_t v2, - uint16_t v3, uint16_t v4, uint16_t v5, - uint16_t v6, uint16_t v7) - : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd16 - repeat_16(uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, - uint16_t v5, uint16_t v6, uint16_t v7) { - return simd16(v0, v1, v2, v3, v4, v5, v6, v7); - } - - // Saturated math - simdutf_really_inline simd16 - saturating_add(const simd16 other) const { - return _mm_adds_epu16(*this, other); - } - simdutf_really_inline simd16 - saturating_sub(const simd16 other) const { - return _mm_subs_epu16(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return _mm_max_epu16(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return _mm_min_epu16(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - gt_bits(const simd16 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - lt_bits(const simd16 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return other.min_val(*this) == other; - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return this->gt_bits(other).any_bits_set(); - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - simdutf_really_inline simd16 bits_not_set(simd16 bits) const { - return (*this & bits).bits_not_set(); - } - simdutf_really_inline simd16 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline simd16 any_bits_set(simd16 bits) const { - return ~this->bits_not_set(bits); - } - - simdutf_really_inline bool bits_not_set_anywhere() const { - return _mm_testz_si128(*this, *this); - } - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdutf_really_inline bool - bits_not_set_anywhere(simd16 bits) const { - return _mm_testz_si128(*this, bits); - } - simdutf_really_inline bool - any_bits_set_anywhere(simd16 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdutf_really_inline simd16 shr() const { - return simd16(_mm_srli_epi16(*this, N)); - } - template simdutf_really_inline simd16 shl() const { - return simd16(_mm_slli_epi16(*this, N)); - } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template simdutf_really_inline int get_bit() const { - return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N)); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - return _mm_shuffle_epi8(*this, swap); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return _mm_packus_epi16(v0, v1); - } -}; -simdutf_really_inline simd16::operator simd16() const { - return this->value; -} - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 4, - "Westmere kernel should use four registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); - } - - simdutf_really_inline simd16 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); - this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); - this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t eq(const simd16x32 &other) const { - return simd16x32(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - - return simd16x32( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), - (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), - (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(static_cast(low - 1)); - const simd16 mask_high = simd16::splat(static_cast(high + 1)); - return simd16x32( - (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), - (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), - (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), - (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } -}; // struct simd16x32 -/* end file src/simdutf/westmere/simd16-inl.h */ - -} // namespace simd -} // unnamed namespace -} // namespace westmere -} // namespace simdutf - -#endif // SIMDUTF_WESTMERE_SIMD_INPUT_H -/* end file src/simdutf/westmere/simd.h */ - -/* begin file src/simdutf/westmere/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -/* end file src/simdutf/westmere/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_WESTMERE -#endif // SIMDUTF_WESTMERE_COMMON_H -/* end file src/simdutf/westmere.h */ -/* begin file src/simdutf/ppc64.h */ -#ifndef SIMDUTF_PPC64_H -#define SIMDUTF_PPC64_H - -#ifdef SIMDUTF_FALLBACK_H - #error "ppc64.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_PPC64 - #define SIMDUTF_IMPLEMENTATION_PPC64 (SIMDUTF_IS_PPC64) -#endif -#define SIMDUTF_CAN_ALWAYS_RUN_PPC64 \ - SIMDUTF_IMPLEMENTATION_PPC64 &&SIMDUTF_IS_PPC64 - - -#if SIMDUTF_IMPLEMENTATION_PPC64 - -namespace simdutf { -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 {} // namespace ppc64 -} // namespace simdutf - -/* begin file src/simdutf/ppc64/implementation.h */ -#ifndef SIMDUTF_PPC64_IMPLEMENTATION_H -#define SIMDUTF_PPC64_IMPLEMENTATION_H - - -namespace simdutf { -namespace ppc64 { - -namespace { -using namespace simdutf; -} // namespace - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("ppc64", "PPC64 ALTIVEC", - internal::instruction_set::ALTIVEC) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -}; - -} // namespace ppc64 -} // namespace simdutf - -#endif // SIMDUTF_PPC64_IMPLEMENTATION_H -/* end file src/simdutf/ppc64/implementation.h */ - -/* begin file src/simdutf/ppc64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "ppc64" -// #define SIMDUTF_IMPLEMENTATION ppc64 -/* end file src/simdutf/ppc64/begin.h */ - - // Declarations -/* begin file src/simdutf/ppc64/intrinsics.h */ -#ifndef SIMDUTF_PPC64_INTRINSICS_H -#define SIMDUTF_PPC64_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -// These are defined by altivec.h in GCC toolchain, it is safe to undef them. -#ifdef bool - #undef bool -#endif - -#ifdef vector - #undef vector -#endif - -#endif // SIMDUTF_PPC64_INTRINSICS_H -/* end file src/simdutf/ppc64/intrinsics.h */ -/* begin file src/simdutf/ppc64/bitmanipulation.h */ -#ifndef SIMDUTF_PPC64_BITMANIPULATION_H -#define SIMDUTF_PPC64_BITMANIPULATION_H - -namespace simdutf { -namespace ppc64 { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline int count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline int count_ones(uint64_t input_num) { - return __builtin_popcountll(input_num); -} -#endif - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf - -#endif // SIMDUTF_PPC64_BITMANIPULATION_H -/* end file src/simdutf/ppc64/bitmanipulation.h */ -/* begin file src/simdutf/ppc64/simd.h */ -#ifndef SIMDUTF_PPC64_SIMD_H -#define SIMDUTF_PPC64_SIMD_H - -#include - -namespace simdutf { -namespace ppc64 { -namespace { -namespace simd { - -using __m128i = __vector unsigned char; - -template struct base { - __m128i value; - - // Zero constructor - simdutf_really_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m128i _value) : value(_value) {} - - // Conversion to SIMD register - simdutf_really_inline operator const __m128i &() const { return this->value; } - simdutf_really_inline operator __m128i &() { return this->value; } - - // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return vec_or(this->value, (__m128i)other); - } - simdutf_really_inline Child operator&(const Child other) const { - return vec_and(this->value, (__m128i)other); - } - simdutf_really_inline Child operator^(const Child other) const { - return vec_xor(this->value, (__m128i)other); - } - simdutf_really_inline Child bit_andnot(const Child other) const { - return vec_andc(this->value, (__m128i)other); - } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; - -// Forward-declared so they can be used by splat and friends. -template struct simd8; - -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdutf_really_inline base8() : base>() {} - simdutf_really_inline base8(const __m128i _value) : base>(_value) {} - - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); - } - - static const int SIZE = sizeof(base>::value); - - template - simdutf_really_inline simd8 prev(simd8 prev_chunk) const { - __m128i chunk = this->value; -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve(this->value); - prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); -#endif - chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve((__m128i)chunk); -#endif - return chunk; - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdutf_really_inline simd8 splat(bool _value) { - return (__m128i)vec_splats((unsigned char)(-(!!_value))); - } - - simdutf_really_inline simd8() : base8() {} - simdutf_really_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdutf_really_inline int to_bitmask() const { - __vector unsigned long long result; - const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, - 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; - - result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, - (__m128i)perm_mask)); -#ifdef __LITTLE_ENDIAN__ - return static_cast(result[1]); -#else - return static_cast(result[0]); -#endif - } - simdutf_really_inline bool any() const { - return !vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdutf_really_inline simd8 operator~() const { - return this->value ^ (__m128i)splat(true); - } -}; - -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T value) { - (void)value; - return (__m128i)vec_splats(value); - } - static simdutf_really_inline simd8 zero() { return splat(0); } - static simdutf_really_inline simd8 load(const T values[16]) { - return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m128i _value) - : base8(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[16]) const { - vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); - } - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 operator+(const simd8 other) const { - return (__m128i)((__m128i)this->value + (__m128i)other); - } - simdutf_really_inline simd8 operator-(const simd8 other) const { - return (__m128i)((__m128i)this->value - (__m128i)other); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, - this->value); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, - v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Order-sensitive comparisons - simdutf_really_inline simd8 max_val(const simd8 other) const { - return (__m128i)vec_max((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdutf_really_inline simd8 min_val(const simd8 other) const { - return (__m128i)vec_min((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdutf_really_inline simd8 operator>(const simd8 other) const { - return (__m128i)vec_cmpgt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return (__m128i)vec_cmplt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return (__m128i)vec_adds(this->value, (__m128i)other); - } - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return (__m128i)vec_subs(this->value, (__m128i)other); - } - - // Order-specific operations - simdutf_really_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max(this->value, (__m128i)other); - } - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min(this->value, (__m128i)other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdutf_really_inline simd8 - operator<(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - - // Bit-specific operations - simdutf_really_inline simd8 bits_not_set() const { - return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); - } - simdutf_really_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - - simdutf_really_inline bool is_ascii() const { - return this->saturating_sub(0b01111111u).bits_not_set_anywhere(); - } - - simdutf_really_inline bool bits_not_set_anywhere() const { - return vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdutf_really_inline bool bits_not_set_anywhere(simd8 bits) const { - return vec_all_eq(vec_and(this->value, (__m128i)bits), - (__m128i)vec_splats(0)); - } - simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdutf_really_inline simd8 shr() const { - return simd8( - (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); - } - template simdutf_really_inline simd8 shl() const { - return simd8( - (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); - } -}; - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "PPC64 kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { - return input.reduce_or().is_ascii(); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - - return simd8x64( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), - (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), - (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - return simd8x64( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), - (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), - (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, - this->chunks[2] >= mask, this->chunks[3] >= mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(this->chunks[0]) >= mask, - simd8(this->chunks[1]) >= mask, - simd8(this->chunks[2]) >= mask, - simd8(this->chunks[3]) >= mask) - .to_bitmask(); - } -}; // struct simd8x64 - -} // namespace simd -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf - -#endif // SIMDUTF_PPC64_SIMD_INPUT_H -/* end file src/simdutf/ppc64/simd.h */ - -/* begin file src/simdutf/ppc64/end.h */ -/* end file src/simdutf/ppc64/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_PPC64 - -#endif // SIMDUTF_PPC64_H -/* end file src/simdutf/ppc64.h */ -/* begin file src/simdutf/rvv.h */ -#ifndef SIMDUTF_RVV_H -#define SIMDUTF_RVV_H - -#ifdef SIMDUTF_FALLBACK_H - #error "rvv.h must be included before fallback.h" -#endif - - -#define SIMDUTF_CAN_ALWAYS_RUN_RVV SIMDUTF_IS_RVV - -#ifndef SIMDUTF_IMPLEMENTATION_RVV - #define SIMDUTF_IMPLEMENTATION_RVV \ - (SIMDUTF_CAN_ALWAYS_RUN_RVV || \ - (SIMDUTF_IS_RISCV64 && SIMDUTF_HAS_RVV_INTRINSICS && \ - SIMDUTF_HAS_RVV_TARGET_REGION)) -#endif - -#if SIMDUTF_IMPLEMENTATION_RVV - - #if SIMDUTF_CAN_ALWAYS_RUN_RVV - #define SIMDUTF_TARGET_RVV - #else - #define SIMDUTF_TARGET_RVV SIMDUTF_TARGET_REGION("arch=+v") - #endif - #if !SIMDUTF_IS_ZVBB && SIMDUTF_HAS_ZVBB_INTRINSICS - #define SIMDUTF_TARGET_ZVBB SIMDUTF_TARGET_REGION("arch=+v,+zvbb") - #endif - -namespace simdutf { -namespace rvv {} // namespace rvv -} // namespace simdutf - -/* begin file src/simdutf/rvv/implementation.h */ -#ifndef SIMDUTF_RVV_IMPLEMENTATION_H -#define SIMDUTF_RVV_IMPLEMENTATION_H - - -namespace simdutf { -namespace rvv { - -namespace { -using namespace simdutf; -} // namespace - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("rvv", "RISC-V Vector Extension", - internal::instruction_set::RVV), - _supports_zvbb(internal::detect_supported_architectures() & - internal::instruction_set::ZVBB) {} - simdutf_warn_unused int detect_encodings(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t len, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *buf, size_t len) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *buf, size_t len) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf16le(const char16_t *buf, size_t len) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf16be(const char16_t *buf, size_t len) const noexcept; - simdutf_warn_unused size_t utf16_length_from_utf8(const char *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf8(const char *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t len) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t len) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t len) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t len) const noexcept; - simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, - size_t len) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - -private: - const bool _supports_zvbb; - -#if SIMDUTF_IS_ZVBB - bool supports_zvbb() const { return true; } -#elif SIMDUTF_HAS_ZVBB_INTRINSICS - bool supports_zvbb() const { return _supports_zvbb; } -#else - bool supports_zvbb() const { return false; } -#endif -}; - -} // namespace rvv -} // namespace simdutf - -#endif // SIMDUTF_RVV_IMPLEMENTATION_H -/* end file src/simdutf/rvv/implementation.h */ -/* begin file src/simdutf/rvv/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "rvv" -// #define SIMDUTF_IMPLEMENTATION rvv - -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_TARGET_RVV -#endif -/* end file src/simdutf/rvv/begin.h */ -/* begin file src/simdutf/rvv/intrinsics.h */ -#ifndef SIMDUTF_RVV_INTRINSICS_H -#define SIMDUTF_RVV_INTRINSICS_H - - -#include - -#if __riscv_v_intrinsic >= 1000000 || __GCC__ >= 14 - #define simdutf_vrgather_u8m1x2(tbl, idx) \ - __riscv_vcreate_v_u8m1_u8m2( \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1())); - - #define simdutf_vrgather_u8m1x4(tbl, idx) \ - __riscv_vcreate_v_u8m1_u8m4( \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ - __riscv_vsetvlmax_e8m1())); -#else - // This has worse codegen on gcc - #define simdutf_vrgather_u8m1x2(tbl, idx) \ - __riscv_vset_v_u8m1_u8m2( \ - __riscv_vlmul_ext_v_u8m1_u8m2(__riscv_vrgather_vv_u8m1( \ - tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), __riscv_vsetvlmax_e8m1())), \ - 1, \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1())) - - #define simdutf_vrgather_u8m1x4(tbl, idx) \ - __riscv_vset_v_u8m1_u8m4( \ - __riscv_vset_v_u8m1_u8m4( \ - __riscv_vset_v_u8m1_u8m4( \ - __riscv_vlmul_ext_v_u8m1_u8m4(__riscv_vrgather_vv_u8m1( \ - tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ - __riscv_vsetvlmax_e8m1())), \ - 1, \ - __riscv_vrgather_vv_u8m1(tbl, \ - __riscv_vget_v_u8m4_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1())), \ - 2, \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ - __riscv_vsetvlmax_e8m1())), \ - 3, \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ - __riscv_vsetvlmax_e8m1())) -#endif - -/* Zvbb adds dedicated support for endianness swaps with vrev8, but if we can't - * use that, we have to emulate it with the standard V extension. - * Using LMUL=1 vrgathers could be faster than the srl+macc variant, but that - * would increase register pressure, and vrgather implementations performance - * varies a lot. */ -enum class simdutf_ByteFlip { NONE, V, ZVBB }; - -template -simdutf_really_inline static uint16_t simdutf_byteflip(uint16_t v) { - if (method != simdutf_ByteFlip::NONE) - return (uint16_t)((v * 1u) << 8 | (v * 1u) >> 8); - return v; -} - -#ifdef SIMDUTF_TARGET_ZVBB -SIMDUTF_UNTARGET_REGION -SIMDUTF_TARGET_ZVBB -#endif - -template -simdutf_really_inline static vuint16m1_t simdutf_byteflip(vuint16m1_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m1(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m1(__riscv_vsrl_vx_u16m1(v, 8, vl), 0x100, v, - vl); - return v; -} - -template -simdutf_really_inline static vuint16m2_t simdutf_byteflip(vuint16m2_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m2(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 8, vl), 0x100, v, - vl); - return v; -} - -template -simdutf_really_inline static vuint16m4_t simdutf_byteflip(vuint16m4_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m4(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m4(__riscv_vsrl_vx_u16m4(v, 8, vl), 0x100, v, - vl); - return v; -} - -template -simdutf_really_inline static vuint16m8_t simdutf_byteflip(vuint16m8_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m8(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m8(__riscv_vsrl_vx_u16m8(v, 8, vl), 0x100, v, - vl); - return v; -} - -#ifdef SIMDUTF_TARGET_ZVBB -SIMDUTF_UNTARGET_REGION -SIMDUTF_TARGET_RVV -#endif - -#endif // SIMDUTF_RVV_INTRINSICS_H -/* end file src/simdutf/rvv/intrinsics.h */ -/* begin file src/simdutf/rvv/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -/* end file src/simdutf/rvv/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_RVV - -#endif // SIMDUTF_RVV_H -/* end file src/simdutf/rvv.h */ -/* begin file src/simdutf/lsx.h */ -#ifndef SIMDUTF_LSX_H -#define SIMDUTF_LSX_H - -#ifdef SIMDUTF_FALLBACK_H - #error "lsx.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_LSX - #define SIMDUTF_IMPLEMENTATION_LSX (SIMDUTF_IS_LSX) -#endif -#if SIMDUTF_IMPLEMENTATION_LSX && SIMDUTF_IS_LSX - #define SIMDUTF_CAN_ALWAYS_RUN_LSX 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_LSX 0 -#endif - -#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) - -#if SIMDUTF_IMPLEMENTATION_LSX - -namespace simdutf { -/** - * Implementation for LoongArch SX. - */ -namespace lsx {} // namespace lsx -} // namespace simdutf - -/* begin file src/simdutf/lsx/implementation.h */ -#ifndef SIMDUTF_LSX_IMPLEMENTATION_H -#define SIMDUTF_LSX_IMPLEMENTATION_H - - -namespace simdutf { -namespace lsx { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("lsx", "LOONGARCH SX", - internal::instruction_set::LSX) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char *input, size_t length, char *output, - base64_options options) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - - simdutf_warn_unused virtual result - base64_to_binary(const char *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; -}; - -} // namespace lsx -} // namespace simdutf - -#endif // SIMDUTF_LSX_IMPLEMENTATION_H -/* end file src/simdutf/lsx/implementation.h */ - -/* begin file src/simdutf/lsx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lsx" -// #define SIMDUTF_IMPLEMENTATION lsx -/* end file src/simdutf/lsx/begin.h */ - - // Declarations -/* begin file src/simdutf/lsx/intrinsics.h */ -#ifndef SIMDUTF_LSX_INTRINSICS_H -#define SIMDUTF_LSX_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -#endif // SIMDUTF_LSX_INTRINSICS_H -/* end file src/simdutf/lsx/intrinsics.h */ -/* begin file src/simdutf/lsx/bitmanipulation.h */ -#ifndef SIMDUTF_LSX_BITMANIPULATION_H -#define SIMDUTF_LSX_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace lsx { -namespace { - -simdutf_really_inline int count_ones(uint64_t input_num) { - return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); -} - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - return __builtin_ctzll(input_num); -} -#endif - -} // unnamed namespace -} // namespace lsx -} // namespace simdutf - -#endif // SIMDUTF_LSX_BITMANIPULATION_H -/* end file src/simdutf/lsx/bitmanipulation.h */ -/* begin file src/simdutf/lsx/simd.h */ -#ifndef SIMDUTF_LSX_SIMD_H -#define SIMDUTF_LSX_SIMD_H - -#include - -namespace simdutf { -namespace lsx { -namespace { -namespace simd { - -template struct simd8; - -// -// Base class of simd8 and simd8, both of which use __m128i -// internally. -// -template > struct base_u8 { - __m128i value; - static const int SIZE = sizeof(value); - - // Conversion from/to SIMD register - simdutf_really_inline base_u8(const __m128i _value) : value(_value) {} - simdutf_really_inline operator const __m128i &() const { return this->value; } - simdutf_really_inline operator __m128i &() { return this->value; } - simdutf_really_inline T first() const { - return __lsx_vpickve2gr_bu(this->value, 0); - } - simdutf_really_inline T last() const { - return __lsx_vpickve2gr_bu(this->value, 15); - } - - // Bit operations - simdutf_really_inline simd8 operator|(const simd8 other) const { - return __lsx_vor_v(this->value, other); - } - simdutf_really_inline simd8 operator&(const simd8 other) const { - return __lsx_vand_v(this->value, other); - } - simdutf_really_inline simd8 operator^(const simd8 other) const { - return __lsx_vxor_v(this->value, other); - } - simdutf_really_inline simd8 bit_andnot(const simd8 other) const { - return __lsx_vandn_v(this->value, other); - } - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdutf_really_inline simd8 &operator|=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline simd8 &operator&=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline simd8 &operator^=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } - - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return __lsx_vseq_b(lhs, rhs); - } - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(this->value, N), - __lsx_vbsrl_v(prev_chunk.value, 16 - N)); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base_u8 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - static simdutf_really_inline simd8 splat(bool _value) { - return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {} - // False constructor - simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} - simdutf_really_inline void store(uint8_t dst[16]) const { - return __lsx_vst(this->value, dst, 0); - } - - simdutf_really_inline uint32_t to_bitmask() const { - return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0); - } - - simdutf_really_inline bool any() const { - return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) != 0; - } - simdutf_really_inline bool none() const { - return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0; - } - simdutf_really_inline bool all() const { - return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0xFFFF; - } -}; - -// Unsigned bytes -template <> struct simd8 : base_u8 { - static simdutf_really_inline simd8 splat(uint8_t _value) { - return __lsx_vreplgr2vr_b(_value); - } - static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } - static simdutf_really_inline simd8 load(const uint8_t *values) { - return __lsx_vld(values, 0); - } - simdutf_really_inline simd8(const __m128i _value) - : base_u8(_value) {} - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization - - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15}) {} - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Store to array - simdutf_really_inline void store(uint8_t dst[16]) const { - return __lsx_vst(this->value, dst, 0); - } - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return __lsx_vsadd_bu(this->value, other); - } - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return __lsx_vssub_bu(this->value, other); - } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 - operator+(const simd8 other) const { - return __lsx_vadd_b(this->value, other); - } - simdutf_really_inline simd8 - operator-(const simd8 other) const { - return __lsx_vsub_b(this->value, other); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *this; - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *this; - } - - // Order-specific operations - simdutf_really_inline simd8 - max_val(const simd8 other) const { - return __lsx_vmax_bu(*this, other); - } - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return __lsx_vmin_bu(*this, other); - } - simdutf_really_inline simd8 - operator<=(const simd8 other) const { - return __lsx_vsle_bu(*this, other); - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return __lsx_vsle_bu(other, *this); - } - simdutf_really_inline simd8 - operator<(const simd8 other) const { - return __lsx_vslt_bu(*this, other); - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return __lsx_vslt_bu(other, *this); - } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true - // = nonzero. For ARM, returns all 1's. - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return simd8(*this > other); - } - // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true - // = nonzero. For ARM, returns all 1's. - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return simd8(*this < other); - } - - // Bit-specific operations - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits)); - } - simdutf_really_inline bool is_ascii() const { - return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF; - } - - simdutf_really_inline bool any_bits_set_anywhere() const { - return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0; - } - simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return (*this & bits).any_bits_set_anywhere(); - } - template simdutf_really_inline simd8 shr() const { - return __lsx_vsrli_b(this->value, N); - } - template simdutf_really_inline simd8 shl() const { - return __lsx_vslli_b(this->value, N); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); - return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp)); - } -}; - -// Signed bytes -template <> struct simd8 { - __m128i value; - - static simdutf_really_inline simd8 splat(int8_t _value) { - return __lsx_vreplgr2vr_b(_value); - } - static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } - static simdutf_really_inline simd8 load(const int8_t values[16]) { - return __lsx_vld(values, 0); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - __m128i zero = __lsx_vldi(0); - if (match_system(big_endian)) { - __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value), - reinterpret_cast(p), 0); - __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value), - reinterpret_cast(p + 8), 0); - } else { - __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero), - reinterpret_cast(p), 0); - __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero), - reinterpret_cast(p + 8), 0); - } - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { - __m128i zero = __lsx_vldi(0); - __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value); - __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value); - __m128i in32_0 = __lsx_vilvl_h(zero, in16low); - __m128i in32_1 = __lsx_vilvh_h(zero, in16low); - __m128i in32_2 = __lsx_vilvl_h(zero, in16high); - __m128i in32_3 = __lsx_vilvh_h(zero, in16high); - __lsx_vst(in32_0, reinterpret_cast(p), 0); - __lsx_vst(in32_1, reinterpret_cast(p + 4), 0); - __lsx_vst(in32_2, reinterpret_cast(p + 8), 0); - __lsx_vst(in32_3, reinterpret_cast(p + 12), 0); - } - - // In places where the table can be reused, which is most uses in simdutf, it - // is worth it to do 4 table lookups, as there is no direct zero extension - // from u8 to u32. - simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { - const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, - 2, 255, 255, 255, 3, 255, 255, 255}; - const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, - 6, 255, 255, 255, 7, 255, 255, 255}; - const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, - 10, 255, 255, 255, 11, 255, 255, 255}; - const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, - 14, 255, 255, 255, 15, 255, 255, 255}; - - // encourage store pairing and interleaving - const auto shuf1 = this->apply_lookup_16_to(tb1); - const auto shuf2 = this->apply_lookup_16_to(tb2); - shuf1.store(reinterpret_cast(p)); - shuf2.store(reinterpret_cast(p + 4)); - - const auto shuf3 = this->apply_lookup_16_to(tb3); - const auto shuf4 = this->apply_lookup_16_to(tb4); - shuf3.store(reinterpret_cast(p + 8)); - shuf4.store(reinterpret_cast(p + 12)); - } - // Conversion from/to SIMD register - simdutf_really_inline simd8(const __m128i _value) : value(_value) {} - simdutf_really_inline operator const __m128i &() const { return this->value; } - - simdutf_really_inline operator const __m128i() const { return this->value; } - - simdutf_really_inline operator __m128i &() { return this->value; } - - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - - simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8((__m128i)v16i8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15}) {} - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Store to array - simdutf_really_inline void store(int8_t dst[16]) const { - return __lsx_vst(value, dst, 0); - } - - simdutf_really_inline operator simd8() const { - return ((__m128i)this->value); - } - - simdutf_really_inline simd8 - operator|(const simd8 other) const { - return __lsx_vor_v((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 - operator&(const simd8 other) const { - return __lsx_vand_v((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 - operator^(const simd8 other) const { - return __lsx_vxor_v((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 - bit_andnot(const simd8 other) const { - return __lsx_vandn_v((__m128i)other.value, (__m128i)value); - } - - // Math - simdutf_really_inline simd8 - operator+(const simd8 other) const { - return __lsx_vadd_b((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 - operator-(const simd8 other) const { - return __lsx_vsub_b((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *this; - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *this; - } - - simdutf_really_inline bool is_ascii() const { - return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) == - 0xffff); - } - - // Order-sensitive comparisons - simdutf_really_inline simd8 max_val(const simd8 other) const { - return __lsx_vmax_b((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 min_val(const simd8 other) const { - return __lsx_vmin_b((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 operator>(const simd8 other) const { - return __lsx_vslt_b((__m128i)other.value, (__m128i)value); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return __lsx_vslt_b((__m128i)value, (__m128i)other.value); - } - simdutf_really_inline simd8 - operator==(const simd8 other) const { - return __lsx_vseq_b((__m128i)value, (__m128i)other.value); - } - - template - simdutf_really_inline simd8 - prev(const simd8 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(this->value, N), - __lsx_vbsrl_v(prev_chunk.value, 16 - N)); - } - - // Perform a lookup assuming no value is larger than 16 - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); - return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value, - simd8(original_tmp)); - } -}; - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert( - NUM_CHUNKS == 4, - "LoongArch kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6); - mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4)); - mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2)); - mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0])); - return __lsx_vpickve2gr_du(mask, 0); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - - return simd8x64( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), - (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), - (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - return simd8x64( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), - (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), - (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, - this->chunks[2] >= mask, this->chunks[3] >= mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(this->chunks[0].value) >= mask, - simd8(this->chunks[1].value) >= mask, - simd8(this->chunks[2].value) >= mask, - simd8(this->chunks[3].value) >= mask) - .to_bitmask(); - } -}; // struct simd8x64 -/* begin file src/simdutf/lsx/simd16-inl.h */ -template struct simd16; - -template > struct base_u16 { - __m128i value; - static const int SIZE = sizeof(value); - - // Conversion from/to SIMD register - simdutf_really_inline base_u16() = default; - simdutf_really_inline base_u16(const __m128i _value) : value(_value) {} - // Bit operations - simdutf_really_inline simd16 operator|(const simd16 other) const { - return __lsx_vor_v(this->value, other.value); - } - simdutf_really_inline simd16 operator&(const simd16 other) const { - return __lsx_vand_v(this->value, other.value); - } - simdutf_really_inline simd16 operator^(const simd16 other) const { - return __lsx_vxor_v(this->value, other.value); - } - simdutf_really_inline simd16 bit_andnot(const simd16 other) const { - return __lsx_vandn_v(this->value, other.value); - } - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - simdutf_really_inline simd16 &operator|=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline simd16 &operator&=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline simd16 &operator^=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } - - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return __lsx_vseq_h(lhs.value, rhs.value); - } - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), - __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); - } -}; - -template > -struct base16 : base_u16 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdutf_really_inline base16() : base_u16() {} - simdutf_really_inline base16(const __m128i _value) : base_u16(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) - : base16(__lsx_vld(ptr, 0)) {} - - static const int SIZE = sizeof(base_u16::value); - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), - __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return __lsx_vreplgr2vr_h(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return __lsx_vreplgr2vr_h(_value); - } - static simdutf_really_inline simd16 zero() { return __lsx_vldi(0); } - static simdutf_really_inline simd16 load(const T values[8]) { - return __lsx_vld(reinterpret_cast(values), 0); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const __m128i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return __lsx_vst(this->value, dst, 0); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return __lsx_vadd_b(*this, other); - } - simdutf_really_inline simd16 operator-(const simd16 other) const { - return __lsx_vsub_b(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd16 &operator-=(const simd16 other) { - *this = *this - other; - return *static_cast *>(this); - } -}; - -// Signed code unitstemplate<> -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m128i _value) - : base16_numeric(_value) {} - simdutf_really_inline simd16(simd16 other) - : base16_numeric(other.value) {} - - // Splat constructor - simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - simdutf_really_inline operator simd16() const; - - // Order-sensitive comparisons - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return __lsx_vmax_h(this->value, other.value); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return __lsx_vmin_h(this->value, other.value); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return __lsx_vsle_h(other.value, this->value); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return __lsx_vslt_h(this->value, other.value); - } -}; - -// Unsigned code unitstemplate<> -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m128i _value) - : base16_numeric((__m128i)_value) {} - simdutf_really_inline simd16(simd16 other) - : base16_numeric(other.value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - // Saturated math - simdutf_really_inline simd16 - saturating_add(const simd16 other) const { - return __lsx_vsadd_hu(this->value, other.value); - } - simdutf_really_inline simd16 - saturating_sub(const simd16 other) const { - return __lsx_vssub_hu(this->value, other.value); - } - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return __lsx_vmax_hu(this->value, other.value); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return __lsx_vmin_hu(this->value, other.value); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - gt_bits(const simd16 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - lt_bits(const simd16 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return __lsx_vsle_hu(this->value, other.value); - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return __lsx_vsle_hu(other.value, this->value); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return __lsx_vslt_hu(other.value, this->value); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return __lsx_vslt_hu(this->value, other.value); - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - template simdutf_really_inline simd16 shr() const { - return simd16(__lsx_vsrli_h(this->value, N)); - } - template simdutf_really_inline simd16 shl() const { - return simd16(__lsx_vslli_h(this->value, N)); - } - - // logical operations - simdutf_really_inline simd16 - operator|(const simd16 other) const { - return __lsx_vor_v(this->value, other.value); - } - simdutf_really_inline simd16 - operator&(const simd16 other) const { - return __lsx_vand_v(this->value, other.value); - } - simdutf_really_inline simd16 - operator^(const simd16 other) const { - return __lsx_vxor_v(this->value, other.value); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return __lsx_vssrlni_bu_h(v1.value, v0.value, 0); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return __lsx_vshuf4i_b(this->value, 0b10110001); - } -}; - -simdutf_really_inline simd16::operator simd16() const { - return this->value; -} - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert( - NUM_CHUNKS == 4, - "LOONGARCH kernel should use four registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); - } - - simdutf_really_inline simd16 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); - this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); - this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[3]).value), 6); - mask = __lsx_vor_v( - mask, __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[2]).value), 4)); - mask = __lsx_vor_v( - mask, __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[1]).value), 2)); - mask = __lsx_vor_v(mask, __lsx_vmsknz_b((this->chunks[0]).value)); - return __lsx_vpickve2gr_du(mask, 0); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - - return simd16x32( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), - (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), - (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - return simd16x32( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), - (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), - (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - -}; // struct simd16x32 - -template <> -simdutf_really_inline uint64_t simd16x32::not_in_range( - const uint16_t low, const uint16_t high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - simd16x32 x(simd16((this->chunks[0] > mask_high) | - (this->chunks[0] < mask_low)), - simd16((this->chunks[1] > mask_high) | - (this->chunks[1] < mask_low)), - simd16((this->chunks[2] > mask_high) | - (this->chunks[2] < mask_low)), - simd16((this->chunks[3] > mask_high) | - (this->chunks[3] < mask_low))); - return x.to_bitmask(); -} -/* end file src/simdutf/lsx/simd16-inl.h */ -} // namespace simd -} // unnamed namespace -} // namespace lsx -} // namespace simdutf - -#endif // SIMDUTF_LSX_SIMD_H -/* end file src/simdutf/lsx/simd.h */ - -/* begin file src/simdutf/lsx/end.h */ -/* end file src/simdutf/lsx/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_LSX - -#endif // SIMDUTF_LSX_H -/* end file src/simdutf/lsx.h */ -/* begin file src/simdutf/lasx.h */ -#ifndef SIMDUTF_LASX_H -#define SIMDUTF_LASX_H - -#ifdef SIMDUTF_FALLBACK_H - #error "lasx.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_LASX - #define SIMDUTF_IMPLEMENTATION_LASX (SIMDUTF_IS_LASX) -#endif -#if SIMDUTF_IMPLEMENTATION_LASX && SIMDUTF_IS_LASX - #define SIMDUTF_CAN_ALWAYS_RUN_LASX 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_LASX 0 -#endif - -#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) - -#if SIMDUTF_IMPLEMENTATION_LASX - -namespace simdutf { -/** - * Implementation for LoongArch ASX. - */ -namespace lasx {} // namespace lasx -} // namespace simdutf - -/* begin file src/simdutf/lasx/implementation.h */ -#ifndef SIMDUTF_LASX_IMPLEMENTATION_H -#define SIMDUTF_LASX_IMPLEMENTATION_H - - -namespace simdutf { -namespace lasx { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("lasx", "LOONGARCH ASX", - internal::instruction_set::LSX | - internal::instruction_set::LASX) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char *input, size_t length, char *output, - base64_options options) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; - - simdutf_warn_unused virtual result - base64_to_binary(const char *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; -}; - -} // namespace lasx -} // namespace simdutf - -#endif // SIMDUTF_LASX_IMPLEMENTATION_H -/* end file src/simdutf/lasx/implementation.h */ - -/* begin file src/simdutf/lasx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lasx" -// #define SIMDUTF_IMPLEMENTATION lasx -/* end file src/simdutf/lasx/begin.h */ - - // Declarations -/* begin file src/simdutf/lasx/intrinsics.h */ -#ifndef SIMDUTF_LASX_INTRINSICS_H -#define SIMDUTF_LASX_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include -#include - -#if defined(__loongarch_asx) - #ifdef __clang__ - #define VREGS_PREFIX "$vr" - #define XREGS_PREFIX "$xr" - #else // GCC - #define VREGS_PREFIX "$f" - #define XREGS_PREFIX "$f" - #endif - #define __ALL_REGS \ - "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26," \ - "27,28,29,30,31" -// Convert __m128i to __m256i -static inline __m256i ____m256i(__m128i in) { - __m256i out = __lasx_xvldi(0); - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " XREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - : [out] "+f"(out) - : [in] "f"(in)); - return out; -} -// Convert two __m128i to __m256i -static inline __m256i lasx_set_q(__m128i inhi, __m128i inlo) { - __m256i out; - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[hi], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[lo], " VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".ifnc %[out], %[hi] \n\t" - ".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " XREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[hi], " VREGS_PREFIX "\\j \n\t" - " xvori.b $xr\\i, $xr\\j, 0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".endif \n\t" - : [out] "=f"(out), [hi] "+f"(inhi) - : [lo] "f"(inlo)); - return out; -} -// Convert __m256i low part to __m128i -static inline __m128i lasx_extracti128_lo(__m256i in) { - __m128i out; - __asm__ volatile(".ifnc %[out], %[in] \n\t" - ".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " XREGS_PREFIX "\\j \n\t" - " vori.b $vr\\i, $vr\\j, 0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".endif \n\t" - : [out] "=f"(out) - : [in] "f"(in)); - return out; -} -// Convert __m256i high part to __m128i -static inline __m128i lasx_extracti128_hi(__m256i in) { - __m128i out; - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " XREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x11 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - : [out] "=f"(out) - : [in] "f"(in)); - return out; -} -#endif - -#endif // SIMDUTF_LASX_INTRINSICS_H -/* end file src/simdutf/lasx/intrinsics.h */ -/* begin file src/simdutf/lasx/bitmanipulation.h */ -#ifndef SIMDUTF_LASX_BITMANIPULATION_H -#define SIMDUTF_LASX_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace lasx { -namespace { - -simdutf_really_inline int count_ones(uint64_t input_num) { - return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); -} - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - return __builtin_ctzll(input_num); -} -#endif - -} // unnamed namespace -} // namespace lasx -} // namespace simdutf - -#endif // SIMDUTF_LASX_BITMANIPULATION_H -/* end file src/simdutf/lasx/bitmanipulation.h */ -/* begin file src/simdutf/lasx/simd.h */ -#ifndef SIMDUTF_LASX_SIMD_H -#define SIMDUTF_LASX_SIMD_H - -#include - -namespace simdutf { -namespace lasx { -namespace { -namespace simd { - -__attribute__((aligned(32))) static const uint8_t prev_shuf_table[32][32] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, - {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, - {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, - {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, - 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, - 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0}, - {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0}, - {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0}, - {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0}, - {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0}, - {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0}, - {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0}, - {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, -}; - -__attribute__((aligned(32))) static const uint8_t bitsel_mask_table[32][32] = { - {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0}}; - -// Forward-declared so they can be used by splat and friends. -template struct base { - __m256i value; - - // Zero constructor - simdutf_really_inline base() : value{__m256i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m256i _value) : value(_value) {} - // Conversion to SIMD register - simdutf_really_inline operator const __m256i &() const { return this->value; } - simdutf_really_inline operator __m256i &() { return this->value; } - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - if (big_endian) { - __m256i zero = __lasx_xvldi(0); - __m256i in8 = __lasx_xvpermi_d(this->value, 0b11011000); - __m256i inlow = __lasx_xvilvl_b(in8, zero); - __m256i inhigh = __lasx_xvilvh_b(in8, zero); - __lasx_xvst(inlow, reinterpret_cast(ptr), 0); - __lasx_xvst(inhigh, reinterpret_cast(ptr), 32); - } else { - __m256i inlow = __lasx_vext2xv_hu_bu(this->value); - __m256i inhigh = __lasx_vext2xv_hu_bu( - __lasx_xvpermi_q(this->value, this->value, 0b00000001)); - __lasx_xvst(inlow, reinterpret_cast<__m256i *>(ptr), 0); - __lasx_xvst(inhigh, reinterpret_cast<__m256i *>(ptr), 32); - } - } - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - __m256i in32_0 = __lasx_vext2xv_wu_bu(this->value); - __lasx_xvst(in32_0, reinterpret_cast(ptr), 0); - - __m256i in8_1 = __lasx_xvpermi_d(this->value, 0b00000001); - __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); - __lasx_xvst(in32_1, reinterpret_cast(ptr), 32); - - __m256i in8_2 = __lasx_xvpermi_d(this->value, 0b00000010); - __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); - __lasx_xvst(in32_2, reinterpret_cast(ptr), 64); - - __m256i in8_3 = __lasx_xvpermi_d(this->value, 0b00000011); - __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); - __lasx_xvst(in32_3, reinterpret_cast(ptr), 96); - } - // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return __lasx_xvor_v(this->value, other); - } - simdutf_really_inline Child operator&(const Child other) const { - return __lasx_xvand_v(this->value, other); - } - simdutf_really_inline Child operator^(const Child other) const { - return __lasx_xvxor_v(this->value, other); - } - simdutf_really_inline Child bit_andnot(const Child other) const { - return __lasx_xvandn_v(this->value, other); - } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; - -template struct simd8; - -template > -struct base8 : base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; - - simdutf_really_inline base8() : base>() {} - simdutf_really_inline base8(const __m256i _value) : base>(_value) {} - simdutf_really_inline T first() const { - return __lasx_xvpickve2gr_wu(this->value, 0); - } - simdutf_really_inline T last() const { - return __lasx_xvpickve2gr_wu(this->value, 7); - } - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return __lasx_xvseq_b(lhs, rhs); - } - - static const int SIZE = sizeof(base::value); - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - if (!N) - return this->value; - - __m256i zero = __lasx_xvldi(0); - __m256i result, shuf; - if (N < 16) { - shuf = __lasx_xvld(prev_shuf_table[N], 0); - - result = __lasx_xvshuf_b( - __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, - shuf); - __m256i srl_prev = __lasx_xvbsrl_v( - __lasx_xvpermi_q(zero, prev_chunk.value, 0b00110001), (16 - N)); - __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); - result = __lasx_xvbitsel_v(result, srl_prev, mask); - - return result; - } else if (N == 16) { - return __lasx_xvpermi_q(this->value, prev_chunk.value, 0b00100001); - } /*else { - __m256i sll_value = __lasx_xvbsll_v( - __lasx_xvpermi_q(zero, this->value, 0b00000011), (N - 16) % 32); - __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); - shuf = __lasx_xvld(prev_shuf_table[N], 0); - result = __lasx_xvshuf_b( - __lasx_xvpermi_q(prev_chunk.value, prev_chunk.value, 0b00000001), - prev_chunk.value, shuf); - result = __lasx_xvbitsel_v(sll_value, result, mask); - return result; - }*/ - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdutf_really_inline simd8 splat(bool _value) { - return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8() : base8() {} - simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdutf_really_inline uint32_t to_bitmask() const { - __m256i mask = __lasx_xvmsknz_b(this->value); - uint32_t mask0 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t mask1 = __lasx_xvpickve2gr_wu(mask, 4); - return (mask0 | (mask1 << 16)); - } - simdutf_really_inline bool any() const { - if (__lasx_xbz_b(this->value)) - return false; - return true; - } - simdutf_really_inline bool none() const { - if (__lasx_xbz_b(this->value)) - return true; - return false; - } - simdutf_really_inline bool all() const { - if (__lasx_xbnz_b(this->value)) - return true; - return false; - } - simdutf_really_inline simd8 operator~() const { return *this ^ true; } -}; - -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T _value) { - return __lasx_xvreplgr2vr_b(_value); - } - static simdutf_really_inline simd8 zero() { return __lasx_xvldi(0); } - static simdutf_really_inline simd8 load(const T values[32]) { - return __lasx_xvld(reinterpret_cast(values), 0); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m256i _value) - : base8(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[32]) const { - return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); - } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 operator+(const simd8 other) const { - return __lasx_xvadd_b(this->value, other); - } - simdutf_really_inline simd8 operator-(const simd8 other) const { - return __lasx_xvsub_b(this->value, other); - } - simdutf_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - __m256i origin = __lasx_xvand_v(this->value, __lasx_xvldi(0x1f)); - return __lasx_xvshuf_b(__lasx_xvldi(0), lookup_table, origin); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - simdutf_really_inline operator simd8() const; - // Member-by-member initialization - simdutf_really_inline - simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, - int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, - int8_t v30, int8_t v31) - : simd8((__m256i)v32i8{v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, v15, - v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15); - } - simdutf_really_inline bool is_ascii() const { - __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); - if (__lasx_xbnz_v(ascii_mask)) - return false; - return true; - } - // Order-sensitive comparisons - simdutf_really_inline simd8 max_val(const simd8 other) const { - return __lasx_xvmax_b(this->value, other); - } - simdutf_really_inline simd8 min_val(const simd8 other) const { - return __lasx_xvmin_b(this->value, other); - } - simdutf_really_inline simd8 operator>(const simd8 other) const { - return __lasx_xvslt_b(other, this->value); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return __lasx_xvslt_b(this->value, other); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, - uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, - uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, - uint8_t v31) - : simd8((__m256i)v32u8{v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, v15, - v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15); - } - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return __lasx_xvsadd_bu(this->value, other); - } - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return __lasx_xvssub_bu(this->value, other); - } - - // Order-specific operations - simdutf_really_inline simd8 - max_val(const simd8 other) const { - return __lasx_xvmax_bu(*this, other); - } - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return __lasx_xvmin_bu(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd8 - operator<=(const simd8 other) const { - return __lasx_xvsle_bu(*this, other); - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return __lasx_xvsle_bu(other, *this); - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return __lasx_xvslt_bu(*this, other); - } - simdutf_really_inline simd8 - operator<(const simd8 other) const { - return __lasx_xvslt_bu(other, *this); - } - - // Bit-specific operations - simdutf_really_inline simd8 bits_not_set() const { - return *this == uint8_t(0); - } - simdutf_really_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdutf_really_inline bool is_ascii() const { - __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); - if (__lasx_xbnz_v(ascii_mask)) - return false; - return true; - } - simdutf_really_inline bool any_bits_set_anywhere() const { - if (__lasx_xbnz_v(this->value)) - return true; - return false; - } - simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return (*this & bits).any_bits_set_anywhere(); - } - template simdutf_really_inline simd8 shr() const { - return __lasx_xvsrli_b(this->value, N); - } - template simdutf_really_inline simd8 shl() const { - return __lasx_xvslli_b(this->value, N); - } -}; -simdutf_really_inline simd8::operator simd8() const { - return this->value; -} - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, - "LASX kernel should use two registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); - } - - simdutf_really_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] | mask, this->chunks[1] | mask); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1]) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - - return simd8x64( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - return simd8x64( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), - (simd8(__m256i(this->chunks[1])) >= mask)) - .to_bitmask(); - } -}; // struct simd8x64 - -/* begin file src/simdutf/lasx/simd16-inl.h */ -template struct simd16; - -template > -struct base16 : base> { - using bitmask_type = uint32_t; - - simdutf_really_inline base16() : base>() {} - simdutf_really_inline base16(const __m256i _value) - : base>(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) - : base16(__lasx_xvld(reinterpret_cast(ptr), 0)) {} - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return __lasx_xvseq_h(lhs.value, rhs.value); - } - - /// the size of vector in bytes - static const int SIZE = sizeof(base>::value); - - /// the number of elements of type T a vector can hold - static const int ELEMENTS = SIZE / sizeof(T); - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - if (!N) - return this->value; - - __m256i zero = __lasx_xvldi(0); - __m256i result, shuf; - if (N < 8) { - shuf = __lasx_xvld(prev_shuf_table[N * 2], 0); - - result = __lasx_xvshuf_b( - __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, - shuf); - __m256i srl_prev = __lasx_xvbsrl_v( - __lasx_xvpermi_q(zero, prev_chunk, 0b00110001), (16 - N * 2)); - __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); - result = __lasx_xvbitsel_v(result, srl_prev, mask); - - return result; - } else if (N == 8) { - return __lasx_xvpermi_q(this->value, prev_chunk, 0b00100001); - } else { - __m256i sll_value = __lasx_xvbsll_v( - __lasx_xvpermi_q(zero, this->value, 0b00000011), (N * 2 - 16)); - __m256i mask = __lasx_xvld(bitsel_mask_table[N * 2], 0); - shuf = __lasx_xvld(prev_shuf_table[N * 2], 0); - result = - __lasx_xvshuf_b(__lasx_xvpermi_q(prev_chunk, prev_chunk, 0b00000001), - prev_chunk, shuf); - result = __lasx_xvbitsel_v(sll_value, result, mask); - return result; - } - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return __lasx_xvreplgr2vr_h(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} - - simdutf_really_inline bitmask_type to_bitmask() const { - __m256i mask = __lasx_xvmsknz_b(this->value); - bitmask_type mask0 = __lasx_xvpickve2gr_wu(mask, 0); - bitmask_type mask1 = __lasx_xvpickve2gr_wu(mask, 4); - return (mask0 | (mask1 << 16)); - } - simdutf_really_inline bool any() const { - if (__lasx_xbz_v(this->value)) - return false; - return true; - } - simdutf_really_inline simd16 operator~() const { return *this ^ true; } -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return __lasx_xvreplgr2vr_h((uint16_t)_value); - } - static simdutf_really_inline simd16 zero() { return __lasx_xvldi(0); } - static simdutf_really_inline simd16 load(const T values[8]) { - return __lasx_xvld(reinterpret_cast(values), 0); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const __m256i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return __lasx_xvadd_h(*this, other); - } - simdutf_really_inline simd16 operator-(const simd16 other) const { - return __lasx_xvsub_h(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd16 &operator-=(const simd16 other) { - *this = *this - other; - return *static_cast *>(this); - } -}; - -// Signed code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m256i _value) - : base16_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - // Order-sensitive comparisons - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return __lasx_xvmax_h(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return __lasx_xvmin_h(*this, other); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return __lasx_xvsle_h(other.value, this->value); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return __lasx_xvslt_h(this->value, other.value); - } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m256i _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - // Saturated math - simdutf_really_inline simd16 - saturating_add(const simd16 other) const { - return __lasx_xvsadd_hu(this->value, other.value); - } - simdutf_really_inline simd16 - saturating_sub(const simd16 other) const { - return __lasx_xvssub_hu(this->value, other.value); - } - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return __lasx_xvmax_hu(this->value, other.value); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return __lasx_xvmin_hu(this->value, other.value); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - gt_bits(const simd16 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - lt_bits(const simd16 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return __lasx_xvsle_hu(this->value, other.value); - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return __lasx_xvsle_hu(other.value, this->value); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return __lasx_xvslt_hu(other.value, this->value); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return __lasx_xvslt_hu(this->value, other.value); - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - simdutf_really_inline simd16 bits_not_set(simd16 bits) const { - return (*this & bits).bits_not_set(); - } - simdutf_really_inline simd16 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline simd16 any_bits_set(simd16 bits) const { - return ~this->bits_not_set(bits); - } - - simdutf_really_inline bool any_bits_set_anywhere() const { - if (__lasx_xbnz_v(this->value)) - return true; - return false; - } - simdutf_really_inline bool - any_bits_set_anywhere(simd16 bits) const { - return (*this & bits).any_bits_set_anywhere(); - } - - template simdutf_really_inline simd16 shr() const { - return simd16(__lasx_xvsrli_h(this->value, N)); - } - template simdutf_really_inline simd16 shl() const { - return simd16(__lasx_xvslli_h(this->value, N)); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return __lasx_xvshuf4i_b(this->value, 0b10110001); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(v1.value, v0.value, 0), - 0b11011000); - } -}; - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 2, - "LASX kernel should use two registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline simd16x32(const simd16 chunk0, - const simd16 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdutf_really_inline simd16 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16)); - } - - simdutf_really_inline simd16x32 bit_or(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] | mask, this->chunks[1] | mask); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - } - - simdutf_really_inline uint64_t eq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t eq(const simd16x32 &other) const { - return simd16x32(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1]) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - - return simd16x32( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(static_cast(low - 1)); - const simd16 mask_high = simd16::splat(static_cast(high + 1)); - return simd16x32( - (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), - (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)) - .to_bitmask(); - } - simdutf_really_inline uint64_t lt(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask) - .to_bitmask(); - } -}; // struct simd16x32 -/* end file src/simdutf/lasx/simd16-inl.h */ -} // namespace simd -} // unnamed namespace -} // namespace lasx -} // namespace simdutf - -#endif // SIMDUTF_LASX_SIMD_H -/* end file src/simdutf/lasx/simd.h */ - -/* begin file src/simdutf/lasx/end.h */ -/* end file src/simdutf/lasx/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_LASX - -#endif // SIMDUTF_LASX_H -/* end file src/simdutf/lasx.h */ -/* begin file src/simdutf/fallback.h */ -#ifndef SIMDUTF_FALLBACK_H -#define SIMDUTF_FALLBACK_H - - -// Note that fallback.h is always imported last. - -// Default Fallback to on unless a builtin implementation has already been -// selected. -#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK - #if SIMDUTF_CAN_ALWAYS_RUN_ARM64 || SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || \ - SIMDUTF_CAN_ALWAYS_RUN_HASWELL || SIMDUTF_CAN_ALWAYS_RUN_WESTMERE || \ - SIMDUTF_CAN_ALWAYS_RUN_PPC64 || SIMDUTF_CAN_ALWAYS_RUN_RVV || \ - SIMDUTF_CAN_ALWAYS_RUN_LSX || SIMDUTF_CAN_ALWAYS_RUN_LASX - #define SIMDUTF_IMPLEMENTATION_FALLBACK 0 - #else - #define SIMDUTF_IMPLEMENTATION_FALLBACK 1 - #endif -#endif - -#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) - -#if SIMDUTF_IMPLEMENTATION_FALLBACK - -namespace simdutf { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback {} // namespace fallback -} // namespace simdutf - -/* begin file src/simdutf/fallback/implementation.h */ -#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H -#define SIMDUTF_FALLBACK_IMPLEMENTATION_H - - -namespace simdutf { -namespace fallback { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("fallback", "Generic fallback implementation", - 0) {} - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t length) const noexcept; - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t length) const noexcept; - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept; - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -}; -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H -/* end file src/simdutf/fallback/implementation.h */ - -/* begin file src/simdutf/fallback/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "fallback" -// #define SIMDUTF_IMPLEMENTATION fallback -/* end file src/simdutf/fallback/begin.h */ - - // Declarations -/* begin file src/simdutf/fallback/bitmanipulation.h */ -#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H -#define SIMDUTF_FALLBACK_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace fallback { -namespace {} // unnamed namespace -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H -/* end file src/simdutf/fallback/bitmanipulation.h */ - -/* begin file src/simdutf/fallback/end.h */ -/* end file src/simdutf/fallback/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK -#endif // SIMDUTF_FALLBACK_H -/* end file src/simdutf/fallback.h */ - -/* begin file src/scalar/utf8.h */ -#ifndef SIMDUTF_UTF8_H -#define SIMDUTF_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8 { -#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_RVV -// only used by the fallback kernel. -// credit: based on code from Google Fuchsia (Apache Licensed) -inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - uint64_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - - while (byte < 0b10000000) { - if (++pos == len) { - return true; - } - byte = data[pos]; - } - - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return false; - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point) || - (0xd7ff < code_point && code_point < 0xe000)) { - return false; - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return false; - } - } else { - // we may have a continuation - return false; - } - pos = next_pos; - } - return true; -} -#endif - -inline simdutf_warn_unused result validate_with_errors(const char *buf, - size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - size_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - - while (byte < 0b10000000) { - if (++pos == len) { - return result(error_code::SUCCESS, len); - } - byte = data[pos]; - } - - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return result(error_code::OVERLONG, pos); - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point)) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - pos = next_pos; - } - return result(error_code::SUCCESS, len); -} - -// Finds the previous leading byte starting backward from buf and validates with -// errors from there Used to pinpoint the location of an error when an invalid -// chunk is detected We assume that the stream starts with a leading byte, and -// to check that it is the case, we ask that you pass a pointer to the start of -// the stream (start). -inline simdutf_warn_unused result rewind_and_validate_with_errors( - const char *start, const char *buf, size_t len) noexcept { - // First check that we start with a leading byte - if ((*start & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, 0); - } - size_t extra_len{0}; - // A leading byte cannot be further than 4 bytes away - for (int i = 0; i < 5; i++) { - unsigned char byte = *buf; - if ((byte & 0b11000000) != 0b10000000) { - break; - } else { - buf--; - extra_len++; - } - } - - result res = validate_with_errors(buf, len + extra_len); - res.count -= extra_len; - return res; -} - -inline size_t count_code_points(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // -65 is 0b10111111, anything larger in two-complement's should start a new - // code point. - if (p[i] > -65) { - counter++; - } - } - return counter; -} - -inline size_t utf16_length_from_utf8(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - if (p[i] > -65) { - counter++; - } - if (uint8_t(p[i]) >= 240) { - counter++; - } - } - return counter; -} - -simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, - size_t length) { - if (length < 3) { - switch (length) { - case 2: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 2 bytes left - return length; - case 1: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - return length; - case 0: - return length; - } - } - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 3]) >= 0xf0) { - return length - 3; - } // 4-byte characters with only 3 bytes left - return length; -} - -} // namespace utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8.h */ -/* begin file src/scalar/utf16.h */ -#ifndef SIMDUTF_UTF16_H -#define SIMDUTF_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16 { - -inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) { - return uint16_t((word >> 8) | (word << 8)); -} - -template -inline simdutf_warn_unused bool validate(const char16_t *data, - size_t len) noexcept { - uint64_t pos = 0; - while (pos < len) { - char16_t word = - !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return false; - } - char16_t diff = char16_t(word - 0xD800); - if (diff > 0x3FF) { - return false; - } - char16_t next_word = - !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; - char16_t diff2 = char16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return false; - } - pos += 2; - } else { - pos++; - } - } - return true; -} - -template -inline simdutf_warn_unused result validate_with_errors(const char16_t *data, - size_t len) noexcept { - size_t pos = 0; - while (pos < len) { - char16_t word = - !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } - char16_t diff = char16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - char16_t next_word = - !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; - char16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - pos += 2; - } else { - pos++; - } - } - return result(error_code::SUCCESS, pos); -} - -template -inline size_t count_code_points(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; - counter += ((word & 0xFC00) != 0xDC00); - } - return counter; -} - -template -inline size_t utf8_length_from_utf16(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; - counter++; // ASCII - counter += static_cast( - word > - 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes - counter += static_cast((word > 0x7FF && word <= 0xD7FF) || - (word >= 0xE000)); // three-byte - } - return counter; -} - -template -inline size_t utf32_length_from_utf16(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; - counter += ((word & 0xFC00) != 0xDC00); - } - return counter; -} - -inline size_t latin1_length_from_utf16(size_t len) { return len; } - -simdutf_really_inline void -change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) { - for (size_t i = 0; i < size; i++) { - *output++ = char16_t(input[i] >> 8 | input[i] << 8); - } -} - -template -simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t *input, - size_t length) { - if (length <= 1) { - return length; - } - uint16_t last_word = uint16_t(input[length - 1]); - last_word = !match_system(big_endian) ? swap_bytes(last_word) : last_word; - length -= ((last_word & 0xFC00) == 0xD800); - return length; -} - -} // namespace utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16.h */ -/* begin file src/scalar/utf32.h */ -#ifndef SIMDUTF_UTF32_H -#define SIMDUTF_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32 { - -inline simdutf_warn_unused bool validate(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { - return false; - } - } - return true; -} - -inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - } - return result(error_code::SUCCESS, pos); -} - -inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // credit: @ttsugriy for the vectorizable approach - counter++; // ASCII - counter += static_cast(p[i] > 0x7F); // two-byte - counter += static_cast(p[i] > 0x7FF); // three-byte - counter += static_cast(p[i] > 0xFFFF); // four-bytes - } - return counter; -} - -inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - counter++; // non-surrogate word - counter += static_cast(p[i] > 0xFFFF); // surrogate pair - } - return counter; -} - -inline size_t latin1_length_from_utf32(size_t len) { - // We are not BOM aware. - return len; // a utf32 codepoint will always represent 1 latin1 character -} - -inline simdutf_warn_unused uint32_t swap_bytes(const uint32_t word) { - return ((word >> 24) & 0xff) | // move byte 3 to byte 0 - ((word << 8) & 0xff0000) | // move byte 1 to byte 2 - ((word >> 8) & 0xff00) | // move byte 2 to byte 1 - ((word << 24) & 0xff000000); // byte 0 to byte 3 -} - -} // namespace utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32.h */ -/* begin file src/scalar/base64.h */ -#ifndef SIMDUTF_BASE64_H -#define SIMDUTF_BASE64_H - -#include -#include -#include -#include - -namespace simdutf { -namespace scalar { -namespace { -namespace base64 { - -// This function is not expected to be fast. Do not use in long loops. -template bool is_ascii_white_space(char_type c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; -} - -template bool is_ascii_white_space_or_padding(char_type c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || - c == '='; -} - -template bool is_eight_byte(char_type c) { - if (sizeof(char_type) == 1) { - return true; - } - return uint8_t(c) == c; -} - -// Returns true upon success. The destination buffer must be large enough. -// This functions assumes that the padding (=) has been removed. -template -full_result -base64_tail_decode(char *dst, const char_type *src, size_t length, - size_t padded_characters, // number of padding characters - // '=', typically 0, 1, 2. - base64_options options, - last_chunk_handling_options last_chunk_options) { - // This looks like 5 branches, but we expect the compiler to resolve this to a - // single branch: - const uint8_t *to_base64 = (options & base64_url) - ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - const uint32_t *d0 = (options & base64_url) - ? tables::base64::base64_url::d0 - : tables::base64::base64_default::d0; - const uint32_t *d1 = (options & base64_url) - ? tables::base64::base64_url::d1 - : tables::base64::base64_default::d1; - const uint32_t *d2 = (options & base64_url) - ? tables::base64::base64_url::d2 - : tables::base64::base64_default::d2; - const uint32_t *d3 = (options & base64_url) - ? tables::base64::base64_url::d3 - : tables::base64::base64_default::d3; - - const char_type *srcend = src + length; - const char_type *srcinit = src; - const char *dstinit = dst; - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - - uint32_t x; - size_t idx; - uint8_t buffer[4]; - while (true) { - while (src + 4 <= srcend && is_eight_byte(src[0]) && - is_eight_byte(src[1]) && is_eight_byte(src[2]) && - is_eight_byte(src[3]) && - (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | - d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { - if (match_system(endianness::BIG)) { - x = scalar::utf32::swap_bytes(x); - } - std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes - dst += 3; - src += 4; - } - idx = 0; - // we need at least four characters. -#ifdef __clang__ - // If possible, we read four characters at a time. (It is an optimization.) - if (ignore_garbage && src + 4 <= srcend) { - char_type c0 = src[0]; - char_type c1 = src[1]; - char_type c2 = src[2]; - char_type c3 = src[3]; - uint8_t code0 = to_base64[uint8_t(c0)]; - uint8_t code1 = to_base64[uint8_t(c1)]; - uint8_t code2 = to_base64[uint8_t(c2)]; - uint8_t code3 = to_base64[uint8_t(c3)]; - buffer[idx] = code0; - idx += (is_eight_byte(c0) && code0 <= 63); - buffer[idx] = code1; - idx += (is_eight_byte(c1) && code1 <= 63); - buffer[idx] = code2; - idx += (is_eight_byte(c2) && code2 <= 63); - buffer[idx] = code3; - idx += (is_eight_byte(c3) && code3 <= 63); - src += 4; - } -#endif - while ((idx < 4) && (src < srcend)) { - char_type c = *src; - uint8_t code = to_base64[uint8_t(c)]; - buffer[idx] = uint8_t(code); - if (is_eight_byte(c) && code <= 63) { - idx++; - } else if (!ignore_garbage && - (code > 64 || !scalar::base64::is_eight_byte(c))) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else { - // We have a space or a newline or garbage. We ignore it. - } - src++; - } - if (idx != 4) { - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict && - (idx != 1) && ((idx + padded_characters) & 3) != 0) { - // The partial chunk was at src - idx - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else if (!ignore_garbage && - last_chunk_options == - last_chunk_handling_options::stop_before_partial && - (idx != 1) && ((idx + padded_characters) & 3) != 0) { - // Rewind src to before partial chunk - src -= idx; - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } else { - if (idx == 2) { - uint32_t triple = - (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xffff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 1); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 1); - } - dst += 1; - } else if (idx == 3) { - uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + - (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 2); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 2); - } - dst += 2; - } else if (!ignore_garbage && idx == 1) { - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } - } - - uint32_t triple = - (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 3); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 3); - } - dst += 3; - } -} - -// like base64_tail_decode, but it will not write past the end of the output -// buffer. The outlen paramter is modified to reflect the number of bytes -// written. This functions assumes that the padding (=) has been removed. -template -result base64_tail_decode_safe( - char *dst, size_t &outlen, const char_type *&srcr, size_t length, - size_t padded_characters, // number of padding characters '=', typically 0, - // 1, 2. - base64_options options, last_chunk_handling_options last_chunk_options) { - const char_type *src = srcr; - if (length == 0) { - outlen = 0; - return {SUCCESS, 0}; - } - // This looks like 5 branches, but we expect the compiler to resolve this to a - // single branch: - const uint8_t *to_base64 = (options & base64_url) - ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - const uint32_t *d0 = (options & base64_url) - ? tables::base64::base64_url::d0 - : tables::base64::base64_default::d0; - const uint32_t *d1 = (options & base64_url) - ? tables::base64::base64_url::d1 - : tables::base64::base64_default::d1; - const uint32_t *d2 = (options & base64_url) - ? tables::base64::base64_url::d2 - : tables::base64::base64_default::d2; - const uint32_t *d3 = (options & base64_url) - ? tables::base64::base64_url::d3 - : tables::base64::base64_default::d3; - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - - const char_type *srcend = src + length; - const char_type *srcinit = src; - const char *dstinit = dst; - const char *dstend = dst + outlen; - - uint32_t x; - size_t idx; - uint8_t buffer[4]; - while (true) { - while (src + 4 <= srcend && is_eight_byte(src[0]) && - is_eight_byte(src[1]) && is_eight_byte(src[2]) && - is_eight_byte(src[3]) && - (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | - d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { - if (dstend - dst < 3) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)}; - } - if (match_system(endianness::BIG)) { - x = scalar::utf32::swap_bytes(x); - } - std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes - dst += 3; - src += 4; - } - idx = 0; - const char_type *srccur = src; - // We need at least four characters. -#ifdef __clang__ - // If possible, we read four characters at a time. (It is an optimization.) - if (ignore_garbage && src + 4 <= srcend) { - char_type c0 = src[0]; - char_type c1 = src[1]; - char_type c2 = src[2]; - char_type c3 = src[3]; - uint8_t code0 = to_base64[uint8_t(c0)]; - uint8_t code1 = to_base64[uint8_t(c1)]; - uint8_t code2 = to_base64[uint8_t(c2)]; - uint8_t code3 = to_base64[uint8_t(c3)]; - buffer[idx] = code0; - idx += (is_eight_byte(c0) && code0 <= 63); - buffer[idx] = code1; - idx += (is_eight_byte(c1) && code1 <= 63); - buffer[idx] = code2; - idx += (is_eight_byte(c2) && code2 <= 63); - buffer[idx] = code3; - idx += (is_eight_byte(c3) && code3 <= 63); - src += 4; - } -#endif - while (idx < 4 && src < srcend) { - char_type c = *src; - uint8_t code = to_base64[uint8_t(c)]; - - buffer[idx] = uint8_t(code); - if (is_eight_byte(c) && code <= 63) { - idx++; - } else if (!ignore_garbage && - (code > 64 || !scalar::base64::is_eight_byte(c))) { - outlen = size_t(dst - dstinit); - srcr = src; - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; - } else { - // We have a space or a newline or garbage. We ignore it. - } - src++; - } - if (idx != 4) { - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict && - ((idx + padded_characters) & 3) != 0) { - outlen = size_t(dst - dstinit); - srcr = src; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; - } else if (!ignore_garbage && - last_chunk_options == - last_chunk_handling_options::stop_before_partial && - ((idx + padded_characters) & 3) != 0) { - // Rewind src to before partial chunk - srcr = srccur; - outlen = size_t(dst - dstinit); - return {SUCCESS, size_t(dst - dstinit)}; - } else { // loose mode - if (idx == 0) { - // No data left; return success - outlen = size_t(dst - dstinit); - srcr = src; - return {SUCCESS, size_t(dst - dstinit)}; - } else if (!ignore_garbage && idx == 1) { - // Error: Incomplete chunk of length 1 is invalid in loose mode - outlen = size_t(dst - dstinit); - srcr = src; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; - } else if (idx == 2 || idx == 3) { - // Check if there's enough space in the destination buffer - size_t required_space = (idx == 2) ? 1 : 2; - if (size_t(dstend - dst) < required_space) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; - } - uint32_t triple = 0; - if (idx == 2) { - triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xffff)) { - srcr = src; - return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; - } - // Extract the first byte - triple >>= 16; - dst[0] = static_cast(triple & 0xFF); - dst += 1; - } else if (idx == 3) { - triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12) + - (uint32_t(buffer[2]) << 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xff)) { - srcr = src; - return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; - } - // Extract the first two bytes - triple >>= 8; - dst[0] = static_cast((triple >> 8) & 0xFF); - dst[1] = static_cast(triple & 0xFF); - dst += 2; - } - outlen = size_t(dst - dstinit); - srcr = src; - return {SUCCESS, size_t(dst - dstinit)}; - } - } - } - - if (dstend - dst < 3) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; - } - uint32_t triple = (uint32_t(buffer[0]) << 18) + - (uint32_t(buffer[1]) << 12) + (uint32_t(buffer[2]) << 6) + - (uint32_t(buffer[3])); - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 3); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 3); - } - dst += 3; - } -} - -// Returns the number of bytes written. The destination buffer must be large -// enough. It will add padding (=) if needed. -size_t tail_encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - // This looks like 3 branches, but we expect the compiler to resolve this to - // a single branch: - const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 - : tables::base64::base64_default::e0; - const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 - : tables::base64::base64_default::e1; - const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 - : tables::base64::base64_default::e2; - char *out = dst; - size_t i = 0; - uint8_t t1, t2, t3; - for (; i + 2 < srclen; i += 3) { - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - t3 = uint8_t(src[i + 2]); - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; - } - switch (srclen - i) { - case 0: - break; - case 1: - t1 = uint8_t(src[i]); - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - if (use_padding) { - *out++ = '='; - *out++ = '='; - } - break; - default: /* case 2 */ - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - if (use_padding) { - *out++ = '='; - } - } - return (size_t)(out - dst); -} - -template -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char_type *input, size_t length) noexcept { - // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode - size_t padding = 0; - if (length > 0) { - if (input[length - 1] == '=') { - padding++; - if (length > 1 && input[length - 2] == '=') { - padding++; - } - } - } - size_t actual_length = length - padding; - if (actual_length % 4 <= 1) { - return actual_length / 4 * 3; - } - // if we have a valid input, then the remainder must be 2 or 3 adding one or - // two extra bytes. - return actual_length / 4 * 3 + (actual_length % 4) - 1; -} - -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options options) noexcept { - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - if (!use_padding) { - return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); - } - return (length + 2) / 3 * - 4; // We use padding to make the length a multiple of 4. -} - -} // namespace base64 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/base64.h */ -/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF8_H -#define SIMDUTF_LATIN1_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf8 { - -inline size_t convert(const char *buf, size_t len, char *utf8_output) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t utf8_pos = 0; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - utf8_output[utf8_pos++] = char(buf[pos]); - pos++; - } - continue; - } - } - - unsigned char byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_pos; -} - -inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, - size_t utf8_len) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t skip_pos = 0; - size_t utf8_pos = 0; - while (pos < len && utf8_pos < utf8_len) { - // try to convert the next block of 16 ASCII bytes - if (pos >= skip_pos && pos + 16 <= len && - utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, - // check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - ::memcpy(utf8_output + utf8_pos, buf + pos, 16); - utf8_pos += 16; - pos += 16; - } else { - // At least one of the next 16 bytes are not ASCII, we will process them - // one by one - skip_pos = pos + 16; - } - } else { - const auto byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else if (utf8_pos + 2 <= utf8_len) { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } else { - break; - } - } - } - return utf8_pos; -} - -} // namespace latin1_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ - -namespace simdutf { -bool implementation::supported_by_runtime_system() const { - uint32_t required_instruction_sets = this->required_instruction_sets(); - uint32_t supported_instruction_sets = - internal::detect_supported_architectures(); - return ((supported_instruction_sets & required_instruction_sets) == - required_instruction_sets); -} - -simdutf_warn_unused encoding_type implementation::autodetect_encoding( - const char *input, size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - // UTF8 is common, it includes ASCII, and is commonly represented - // without a BOM, so if it fits, go with that. Note that it is still - // possible to get it wrong, we are only 'guessing'. If some has UTF-16 - // data without a BOM, it could pass as UTF-8. - // - // An interesting twist might be to check for UTF-16 ASCII first (every - // other byte is zero). - if (validate_utf8(input, length)) { - return encoding_type::UTF8; - } - // The next most common encoding that might appear without BOM is probably - // UTF-16LE, so try that next. - if ((length % 2) == 0) { - // important: we need to divide by two - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - return encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - return encoding_type::UTF32_LE; - } - } - return encoding_type::unspecified; -} - -namespace internal { -// When there is a single implementation, we should not pay a price -// for dispatching to the best implementation. We should just use the -// one we have. This is a compile-time check. -#define SIMDUTF_SINGLE_IMPLEMENTATION \ - (SIMDUTF_IMPLEMENTATION_ICELAKE + SIMDUTF_IMPLEMENTATION_HASWELL + \ - SIMDUTF_IMPLEMENTATION_WESTMERE + SIMDUTF_IMPLEMENTATION_ARM64 + \ - SIMDUTF_IMPLEMENTATION_PPC64 + SIMDUTF_IMPLEMENTATION_LSX + \ - SIMDUTF_IMPLEMENTATION_LASX + SIMDUTF_IMPLEMENTATION_FALLBACK == \ - 1) - -// Static array of known implementations. We are hoping these get baked into the -// executable without requiring a static initializer. - -#if SIMDUTF_IMPLEMENTATION_ICELAKE -static const icelake::implementation *get_icelake_singleton() { - static const icelake::implementation icelake_singleton{}; - return &icelake_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_HASWELL -static const haswell::implementation *get_haswell_singleton() { - static const haswell::implementation haswell_singleton{}; - return &haswell_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE -static const westmere::implementation *get_westmere_singleton() { - static const westmere::implementation westmere_singleton{}; - return &westmere_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 -static const arm64::implementation *get_arm64_singleton() { - static const arm64::implementation arm64_singleton{}; - return &arm64_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_PPC64 -static const ppc64::implementation *get_ppc64_singleton() { - static const ppc64::implementation ppc64_singleton{}; - return &ppc64_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_RVV -static const rvv::implementation *get_rvv_singleton() { - static const rvv::implementation rvv_singleton{}; - return &rvv_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_LSX -static const lsx::implementation *get_lsx_singleton() { - static const lsx::implementation lsx_singleton{}; - return &lsx_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_LASX -static const lasx::implementation *get_lasx_singleton() { - static const lasx::implementation lasx_singleton{}; - return &lasx_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK -static const fallback::implementation *get_fallback_singleton() { - static const fallback::implementation fallback_singleton{}; - return &fallback_singleton; -} -#endif - -#if SIMDUTF_SINGLE_IMPLEMENTATION -static const implementation *get_single_implementation() { - return - #if SIMDUTF_IMPLEMENTATION_ICELAKE - get_icelake_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_HASWELL - get_haswell_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_WESTMERE - get_westmere_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_ARM64 - get_arm64_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_PPC64 - get_ppc64_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_LSX - get_lsx_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_LASX - get_lasx_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_FALLBACK - get_fallback_singleton(); - #endif -} -#endif - -/** - * @private Detects best supported implementation on first use, and sets it - */ -class detect_best_supported_implementation_on_first_use final - : public implementation { -public: - std::string name() const noexcept final { return set_best()->name(); } - std::string description() const noexcept final { - return set_best()->description(); - } - uint32_t required_instruction_sets() const noexcept final { - return set_best()->required_instruction_sets(); - } - - simdutf_warn_unused int - detect_encodings(const char *input, size_t length) const noexcept override { - return set_best()->detect_encodings(input, length); - } - - simdutf_warn_unused bool - validate_utf8(const char *buf, size_t len) const noexcept final override { - return set_best()->validate_utf8(buf, len); - } - - simdutf_warn_unused result validate_utf8_with_errors( - const char *buf, size_t len) const noexcept final override { - return set_best()->validate_utf8_with_errors(buf, len); - } - - simdutf_warn_unused bool - validate_ascii(const char *buf, size_t len) const noexcept final override { - return set_best()->validate_ascii(buf, len); - } - - simdutf_warn_unused result validate_ascii_with_errors( - const char *buf, size_t len) const noexcept final override { - return set_best()->validate_ascii_with_errors(buf, len); - } - - simdutf_warn_unused bool - validate_utf16le(const char16_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf16le(buf, len); - } - - simdutf_warn_unused bool - validate_utf16be(const char16_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf16be(buf, len); - } - - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf16le_with_errors(buf, len); - } - - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf16be_with_errors(buf, len); - } - - simdutf_warn_unused bool - validate_utf32(const char32_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf32(buf, len); - } - - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf32_with_errors(buf, len); - } - - simdutf_warn_unused size_t - convert_latin1_to_utf8(const char *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_latin1_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, - char32_t *latin1_output) const noexcept final override { - return set_best()->convert_latin1_to_utf32(buf, len, latin1_output); - } - - simdutf_warn_unused size_t - convert_utf8_to_latin1(const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf8_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf8_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused size_t - convert_utf8_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf8_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf8_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused size_t - convert_utf16le_to_utf8(const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused size_t - convert_utf16be_to_utf8(const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, - utf8_output); - } - - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, - utf8_output); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused result convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused size_t - convert_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf32_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - } - - simdutf_warn_unused size_t - convert_valid_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output); - } - - void change_endianness_utf16(const char16_t *buf, size_t len, - char16_t *output) const noexcept final override { - set_best()->change_endianness_utf16(buf, len, output); - } - - simdutf_warn_unused size_t - count_utf16le(const char16_t *buf, size_t len) const noexcept final override { - return set_best()->count_utf16le(buf, len); - } - - simdutf_warn_unused size_t - count_utf16be(const char16_t *buf, size_t len) const noexcept final override { - return set_best()->count_utf16be(buf, len); - } - - simdutf_warn_unused size_t - count_utf8(const char *buf, size_t len) const noexcept final override { - return set_best()->count_utf8(buf, len); - } - - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->latin1_length_from_utf8(buf, len); - } - - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t len) const noexcept override { - return set_best()->latin1_length_from_utf16(len); - } - - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t len) const noexcept override { - return set_best()->latin1_length_from_utf32(len); - } - - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_latin1(buf, len); - } - - simdutf_warn_unused size_t utf8_length_from_utf16le( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf16le(buf, len); - } - - simdutf_warn_unused size_t utf8_length_from_utf16be( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf16be(buf, len); - } - - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t len) const noexcept override { - return set_best()->utf16_length_from_latin1(len); - } - - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t len) const noexcept override { - return set_best()->utf32_length_from_latin1(len); - } - - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf16le(buf, len); - } - - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf16be(buf, len); - } - - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->utf16_length_from_utf8(buf, len); - } - - simdutf_warn_unused size_t utf8_length_from_utf32( - const char32_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf32(buf, len); - } - - simdutf_warn_unused size_t utf16_length_from_utf32( - const char32_t *buf, size_t len) const noexcept override { - return set_best()->utf16_length_from_utf32(buf, len); - } - - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf8(buf, len); - } - - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept override { - return set_best()->maximal_binary_length_from_base64(input, length); - } - - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary(input, length, output, options, - last_chunk_handling_options); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary_details(input, length, output, options, - last_chunk_handling_options); - } - - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept override { - return set_best()->maximal_binary_length_from_base64(input, length); - } - - simdutf_warn_unused result base64_to_binary( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary(input, length, output, options, - last_chunk_handling_options); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary_details(input, length, output, options, - last_chunk_handling_options); - } - - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept override { - return set_best()->base64_length_from_binary(length, options); - } - - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept override { - return set_best()->binary_to_base64(input, length, output, options); - } - - simdutf_really_inline - detect_best_supported_implementation_on_first_use() noexcept - : implementation("best_supported_detector", - "Detects the best supported implementation and sets it", - 0) {} - -private: - const implementation *set_best() const noexcept; -}; - -static_assert(std::is_trivially_destructible< - detect_best_supported_implementation_on_first_use>::value, - "detect_best_supported_implementation_on_first_use should be " - "trivially destructible"); - -static const std::initializer_list & -get_available_implementation_pointers() { - static const std::initializer_list - available_implementation_pointers{ -#if SIMDUTF_IMPLEMENTATION_ICELAKE - get_icelake_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_HASWELL - get_haswell_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE - get_westmere_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 - get_arm64_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_PPC64 - get_ppc64_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_RVV - get_rvv_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_LSX - get_lsx_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_LASX - get_lasx_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK - get_fallback_singleton(), -#endif - }; // available_implementation_pointers - return available_implementation_pointers; -} - -// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no -// support -class unsupported_implementation final : public implementation { -public: - simdutf_warn_unused int detect_encodings(const char *, - size_t) const noexcept override { - return encoding_type::unspecified; - } - - simdutf_warn_unused bool validate_utf8(const char *, - size_t) const noexcept final override { - return false; // Just refuse to validate. Given that we have a fallback - // implementation - // it seems unlikely that unsupported_implementation will ever be used. If - // it is used, then it will flag all strings as invalid. The alternative is - // to return an error_code from which the user has to figure out whether the - // string is valid UTF-8... which seems like a lot of work just to handle - // the very unlikely case that we have an unsupported implementation. And, - // when it does happen (that we have an unsupported implementation), what - // are the chances that the programmer has a fallback? Given that *we* - // provide the fallback, it implies that the programmer would need a - // fallback for our fallback. - } - - simdutf_warn_unused result validate_utf8_with_errors( - const char *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused bool - validate_ascii(const char *, size_t) const noexcept final override { - return false; - } - - simdutf_warn_unused result validate_ascii_with_errors( - const char *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused bool - validate_utf16le(const char16_t *, size_t) const noexcept final override { - return false; - } - - simdutf_warn_unused bool - validate_utf16be(const char16_t *, size_t) const noexcept final override { - return false; - } - - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused bool - validate_utf32(const char32_t *, size_t) const noexcept final override { - return false; - } - - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16le_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16be_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf32_to_latin1( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf32_to_latin1_with_errors( - const char32_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } - - void change_endianness_utf16(const char16_t *, size_t, - char16_t *) const noexcept final override {} - - simdutf_warn_unused size_t - count_utf16le(const char16_t *, size_t) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t - count_utf16be(const char16_t *, size_t) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t count_utf8(const char *, - size_t) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t) const noexcept override { - return 0; - } - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *, size_t) const noexcept override { - return 0; - } - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t) const noexcept override { - return 0; - } - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused result - base64_to_binary(const char *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return full_result(error_code::OTHER, 0, 0); - } - - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused result - base64_to_binary(const char16_t *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return full_result(error_code::OTHER, 0, 0); - } - - simdutf_warn_unused size_t - base64_length_from_binary(size_t, base64_options) const noexcept override { - return 0; - } - - size_t binary_to_base64(const char *, size_t, char *, - base64_options) const noexcept override { - return 0; - } - - unsupported_implementation() - : implementation("unsupported", - "Unsupported CPU (no detected SIMD instructions)", 0) {} -}; - -const unsupported_implementation *get_unsupported_singleton() { - static const unsupported_implementation unsupported_singleton{}; - return &unsupported_singleton; -} -static_assert(std::is_trivially_destructible::value, - "unsupported_singleton should be trivially destructible"); - -size_t available_implementation_list::size() const noexcept { - return internal::get_available_implementation_pointers().size(); -} -const implementation *const * -available_implementation_list::begin() const noexcept { - return internal::get_available_implementation_pointers().begin(); -} -const implementation *const * -available_implementation_list::end() const noexcept { - return internal::get_available_implementation_pointers().end(); -} -const implementation * -available_implementation_list::detect_best_supported() const noexcept { - // They are prelisted in priority order, so we just go down the list - uint32_t supported_instruction_sets = - internal::detect_supported_architectures(); - for (const implementation *impl : - internal::get_available_implementation_pointers()) { - uint32_t required_instruction_sets = impl->required_instruction_sets(); - if ((supported_instruction_sets & required_instruction_sets) == - required_instruction_sets) { - return impl; - } - } - return get_unsupported_singleton(); // this should never happen? -} - -const implementation * -detect_best_supported_implementation_on_first_use::set_best() const noexcept { - SIMDUTF_PUSH_DISABLE_WARNINGS - SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: - // manually verified this is safe - char *force_implementation_name = getenv("SIMDUTF_FORCE_IMPLEMENTATION"); - SIMDUTF_POP_DISABLE_WARNINGS - - if (force_implementation_name) { - auto force_implementation = - get_available_implementations()[force_implementation_name]; - if (force_implementation) { - return get_active_implementation() = force_implementation; - } else { - // Note: abort() and stderr usage within the library is forbidden. - return get_active_implementation() = get_unsupported_singleton(); - } - } - return get_active_implementation() = - get_available_implementations().detect_best_supported(); -} - -} // namespace internal - -/** - * The list of available implementations compiled into simdutf. - */ -SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & -get_available_implementations() { - static const internal::available_implementation_list - available_implementations{}; - return available_implementations; -} - -/** - * The active implementation. - */ -SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & -get_active_implementation() { -#if SIMDUTF_SINGLE_IMPLEMENTATION - // skip runtime detection - static internal::atomic_ptr active_implementation{ - internal::get_single_implementation()}; - return active_implementation; -#else - static const internal::detect_best_supported_implementation_on_first_use - detect_best_supported_implementation_on_first_use_singleton; - static internal::atomic_ptr active_implementation{ - &detect_best_supported_implementation_on_first_use_singleton}; - return active_implementation; -#endif -} - -#if SIMDUTF_SINGLE_IMPLEMENTATION -const implementation *get_default_implementation() { - return internal::get_single_implementation(); -} -#else -internal::atomic_ptr &get_default_implementation() { - return get_active_implementation(); -} -#endif -#define SIMDUTF_GET_CURRENT_IMPLEMENTION - -simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { - return get_default_implementation()->validate_utf8(buf, len); -} -simdutf_warn_unused result validate_utf8_with_errors(const char *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf8_with_errors(buf, len); -} -simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept { - return get_default_implementation()->validate_ascii(buf, len); -} -simdutf_warn_unused result validate_ascii_with_errors(const char *buf, - size_t len) noexcept { - return get_default_implementation()->validate_ascii_with_errors(buf, len); -} -simdutf_warn_unused size_t convert_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_output) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf8_to_utf16be(input, length, utf16_output); -#else - return convert_utf8_to_utf16le(input, length, utf16_output); -#endif -} -simdutf_warn_unused size_t convert_latin1_to_utf8(const char *buf, size_t len, - char *utf8_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf8(buf, len, - utf8_output); -} -simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf16le(buf, len, - utf16_output); -} -simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf16be(buf, len, - utf16_output); -} -simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *latin1_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf32(buf, len, - latin1_output); -} -simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf8_to_latin1(buf, len, - latin1_output); -} -simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf8_to_latin1_with_errors( - buf, len, latin1_output); -} -simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_valid_utf8_to_latin1( - buf, len, latin1_output); -} -simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16le(input, length, - utf16_output); -} -simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16be(input, length, - utf16_output); -} -simdutf_warn_unused result convert_utf8_to_utf16_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf8_to_utf16be_with_errors(input, length, utf16_output); -#else - return convert_utf8_to_utf16le_with_errors(input, length, utf16_output); -#endif -} -simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16le_with_errors( - input, length, utf16_output); -} -simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16be_with_errors( - input, length, utf16_output); -} -simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf32(input, length, - utf32_output); -} -simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *input, size_t length, char32_t *utf32_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf32_with_errors( - input, length, utf32_output); -} -simdutf_warn_unused bool validate_utf16(const char16_t *buf, - size_t len) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return validate_utf16be(buf, len); -#else - return validate_utf16le(buf, len); -#endif -} -simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16le(buf, len); -} -simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16be(buf, len); -} -simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, - size_t len) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return validate_utf16be_with_errors(buf, len); -#else - return validate_utf16le_with_errors(buf, len); -#endif -} -simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16le_with_errors(buf, len); -} -simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16be_with_errors(buf, len); -} -simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf32(buf, len); -} -simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf32_with_errors(buf, len); -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf8_to_utf16be(input, length, utf16_buffer); -#else - return convert_valid_utf8_to_utf16le(input, length, utf16_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf16le( - input, length, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf16be( - input, length, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf32( - input, length, utf32_buffer); -} -simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf8(buf, len, utf8_buffer); -#else - return convert_utf16le_to_utf8(buf, len, utf8_buffer); -#endif -} -simdutf_warn_unused size_t convert_utf16_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_latin1(buf, len, latin1_buffer); -#else - return convert_utf16le_to_latin1(buf, len, latin1_buffer); -#endif -} -simdutf_warn_unused size_t convert_latin1_to_utf16( - const char *buf, size_t len, char16_t *utf16_output) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_latin1_to_utf16be(buf, len, utf16_output); -#else - return convert_latin1_to_utf16le(buf, len, utf16_output); -#endif -} -simdutf_warn_unused size_t convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_latin1(buf, len, - latin1_buffer); -} -simdutf_warn_unused size_t convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_latin1(buf, len, - latin1_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_latin1( - buf, len, latin1_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_latin1( - buf, len, latin1_buffer); -} -simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_latin1_with_errors( - buf, len, latin1_buffer); -} -simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_latin1_with_errors( - buf, len, latin1_buffer); -} -simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused result convert_utf16_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); -#else - return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer); -#endif -} -simdutf_warn_unused result convert_utf16_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer); -#else - return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer); -#endif -} -simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); -#else - return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf16_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer); -#else - return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_utf8( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_utf8( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused size_t convert_utf32_to_utf16( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf32_to_utf16be(buf, len, utf16_buffer); -#else - return convert_utf32_to_utf16le(buf, len, utf16_buffer); -#endif -} -simdutf_warn_unused size_t convert_utf32_to_latin1( - const char32_t *input, size_t length, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf32_to_latin1(input, length, - latin1_output); -} -simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16le(buf, len, - utf16_buffer); -} -simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16be(buf, len, - utf16_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf16_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer); -#else - return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer); -#endif -} -simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16le_with_errors( - buf, len, utf16_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16be_with_errors( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer); -#else - return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf16le( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf16be( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_utf16_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf32(buf, len, utf32_buffer); -#else - return convert_utf16le_to_utf32(buf, len, utf32_buffer); -#endif -} -simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf32(buf, len, - utf32_buffer); -} -simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf32(buf, len, - utf32_buffer); -} -simdutf_warn_unused result convert_utf16_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer); -#else - return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer); -#endif -} -simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf32_with_errors( - buf, len, utf32_buffer); -} -simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf32_with_errors( - buf, len, utf32_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); -#else - return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_utf32( - buf, len, utf32_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_utf32( - buf, len, utf32_buffer); -} -void change_endianness_utf16(const char16_t *input, size_t length, - char16_t *output) noexcept { - get_default_implementation()->change_endianness_utf16(input, length, output); -} -simdutf_warn_unused size_t count_utf16(const char16_t *input, - size_t length) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return count_utf16be(input, length); -#else - return count_utf16le(input, length); -#endif -} -simdutf_warn_unused size_t count_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->count_utf16le(input, length); -} -simdutf_warn_unused size_t count_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->count_utf16be(input, length); -} -simdutf_warn_unused size_t count_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->count_utf8(input, length); -} -simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, - size_t len) noexcept { - return get_default_implementation()->latin1_length_from_utf8(buf, len); -} -simdutf_warn_unused size_t latin1_length_from_utf16(size_t len) noexcept { - return get_default_implementation()->latin1_length_from_utf16(len); -} -simdutf_warn_unused size_t latin1_length_from_utf32(size_t len) noexcept { - return get_default_implementation()->latin1_length_from_utf32(len); -} -simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, - size_t len) noexcept { - return get_default_implementation()->utf8_length_from_latin1(buf, len); -} -simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, - size_t length) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return utf8_length_from_utf16be(input, length); -#else - return utf8_length_from_utf16le(input, length); -#endif -} -simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf16le(input, length); -} -simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf16be(input, length); -} -simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, - size_t length) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return utf32_length_from_utf16be(input, length); -#else - return utf32_length_from_utf16le(input, length); -#endif -} -simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf16le(input, length); -} -simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf16be(input, length); -} -simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->utf16_length_from_utf8(input, length); -} -simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept { - return get_default_implementation()->utf16_length_from_latin1(length); -} -simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf32(input, length); -} -simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, - size_t length) noexcept { - return get_default_implementation()->utf16_length_from_utf32(input, length); -} -simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf8(input, length); -} - -simdutf_warn_unused size_t -maximal_binary_length_from_base64(const char *input, size_t length) noexcept { - return get_default_implementation()->maximal_binary_length_from_base64( - input, length); -} - -simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return get_default_implementation()->base64_to_binary( - input, length, output, options, last_chunk_handling_options); -} - -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) noexcept { - return get_default_implementation()->maximal_binary_length_from_base64( - input, length); -} - -simdutf_warn_unused result base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return get_default_implementation()->base64_to_binary( - input, length, output, options, last_chunk_handling_options); -} - -template -simdutf_warn_unused result base64_to_binary_safe_impl( - const chartype *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - static_assert(std::is_same::value || - std::is_same::value, - "Only char and char16_t are supported."); - // The implementation could be nicer, but we expect that most times, the user - // will provide us with a buffer that is large enough. - size_t max_length = maximal_binary_length_from_base64(input, length); - if (outlen >= max_length) { - // fast path - full_result r = get_default_implementation()->base64_to_binary_details( - input, length, output, options, last_chunk_handling_options); - if (r.error != error_code::INVALID_BASE64_CHARACTER && - r.error != error_code::BASE64_EXTRA_BITS) { - outlen = r.output_count; - if (last_chunk_handling_options == stop_before_partial) { - if ((r.output_count % 3) != 0) { - bool empty_trail = true; - for (size_t i = r.input_count; i < length; i++) { - if (!scalar::base64::is_ascii_white_space_or_padding(input[i])) { - empty_trail = false; - break; - } - } - if (empty_trail) { - r.input_count = length; - } - } - return {r.error, r.input_count}; - } - return {r.error, length}; - } - return r; - } - // The output buffer is maybe too small. We will decode a truncated version of - // the input. - size_t outlen3 = outlen / 3 * 3; // round down to multiple of 3 - size_t safe_input = base64_length_from_binary(outlen3, options); - full_result r = get_default_implementation()->base64_to_binary_details( - input, safe_input, output, options, loose); - if (r.error == error_code::INVALID_BASE64_CHARACTER) { - return r; - } - size_t offset = - (r.error == error_code::BASE64_INPUT_REMAINDER) - ? 1 - : ((r.output_count % 3) == 0 ? 0 : (r.output_count % 3) + 1); - size_t output_index = r.output_count - (r.output_count % 3); - size_t input_index = safe_input; - // offset is a value that is no larger than 3. We backtrack - // by up to offset characters + an undetermined number of - // white space characters. It is expected that the next loop - // runs at most 3 times + the number of white space characters - // in between them, so we are not worried about performance. - while (offset > 0 && input_index > 0) { - chartype c = input[--input_index]; - if (scalar::base64::is_ascii_white_space(c)) { - // skipping - } else { - offset--; - } - } - size_t remaining_out = outlen - output_index; - const chartype *tail_input = input + input_index; - size_t tail_length = length - input_index; - while (tail_length > 0 && - scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { - tail_length--; - } - size_t padding_characts = 0; - if (tail_length > 0 && tail_input[tail_length - 1] == '=') { - tail_length--; - padding_characts++; - while (tail_length > 0 && - scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { - tail_length--; - } - if (tail_length > 0 && tail_input[tail_length - 1] == '=') { - tail_length--; - padding_characts++; - } - } - // this will advance tail_input and tail_length - result rr = scalar::base64::base64_tail_decode_safe( - output + output_index, remaining_out, tail_input, tail_length, - padding_characts, options, last_chunk_handling_options); - outlen = output_index + remaining_out; - if (last_chunk_handling_options != stop_before_partial && - rr.error == error_code::SUCCESS && padding_characts > 0) { - // additional checks - if ((outlen % 3 == 0) || ((outlen % 3) + 1 + padding_characts != 4)) { - rr.error = error_code::INVALID_BASE64_CHARACTER; - } - } - if (rr.error == error_code::SUCCESS && - last_chunk_handling_options == stop_before_partial) { - if (tail_input > input + input_index) { - rr.count = tail_input - input; - } else if (r.input_count > 0) { - rr.count = r.input_count + rr.count; - } - return rr; - } - rr.count += input_index; - return rr; -} - -simdutf_warn_unused size_t convert_latin1_to_utf8_safe( - const char *buf, size_t len, char *utf8_output, size_t utf8_len) noexcept { - const auto start{utf8_output}; - - while (true) { - // convert_latin1_to_utf8 will never write more than input length * 2 - auto read_len = std::min(len, utf8_len >> 1); - if (read_len <= 16) { - break; - } - - const auto write_len = - simdutf::convert_latin1_to_utf8(buf, read_len, utf8_output); - - utf8_output += write_len; - utf8_len -= write_len; - buf += read_len; - len -= read_len; - } - - utf8_output += - scalar::latin1_to_utf8::convert_safe(buf, len, utf8_output, utf8_len); - - return utf8_output - start; -} - -simdutf_warn_unused result base64_to_binary_safe( - const char *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return base64_to_binary_safe_impl(input, length, output, outlen, - options, last_chunk_handling_options); -} -simdutf_warn_unused result base64_to_binary_safe( - const char16_t *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return base64_to_binary_safe_impl( - input, length, output, outlen, options, last_chunk_handling_options); -} - -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options options) noexcept { - return get_default_implementation()->base64_length_from_binary(length, - options); -} - -size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) noexcept { - return get_default_implementation()->binary_to_base64(input, length, output, - options); -} - -simdutf_warn_unused simdutf::encoding_type -autodetect_encoding(const char *buf, size_t length) noexcept { - return get_default_implementation()->autodetect_encoding(buf, length); -} -simdutf_warn_unused int detect_encodings(const char *buf, - size_t length) noexcept { - return get_default_implementation()->detect_encodings(buf, length); -} -const implementation *builtin_implementation() { - static const implementation *builtin_impl = - get_available_implementations()[SIMDUTF_STRINGIFY( - SIMDUTF_BUILTIN_IMPLEMENTATION)]; - return builtin_impl; -} - -simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) { - return scalar::utf8::trim_partial_utf8(input, length); -} - -simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, - size_t length) { - return scalar::utf16::trim_partial_utf16(input, length); -} - -simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, - size_t length) { - return scalar::utf16::trim_partial_utf16(input, length); -} - -simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, - size_t length) { -#if SIMDUTF_IS_BIG_ENDIAN - return trim_partial_utf16be(input, length); -#else - return trim_partial_utf16le(input, length); -#endif -} - -} // namespace simdutf -/* end file src/implementation.cpp */ /* begin file src/encoding_types.cpp */ namespace simdutf { @@ -14200,7 +2380,10590 @@ const uint8_t pack_1_2_3_utf8_bytes[256][17] = { /* end file src/tables/utf16_to_utf8_tables.h */ // End of tables. +// Implementations: they need to be setup before including +// scalar/* code, as the scalar code is sometimes enabled +// only for peculiar build targets. + +// The best choice should always come first! +/* begin file src/simdutf/arm64.h */ +#ifndef SIMDUTF_ARM64_H +#define SIMDUTF_ARM64_H + +#ifdef SIMDUTF_FALLBACK_H + #error "arm64.h must be included before fallback.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_ARM64 + #define SIMDUTF_IMPLEMENTATION_ARM64 (SIMDUTF_IS_ARM64) +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 && SIMDUTF_IS_ARM64 + #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 0 +#endif + + +#if SIMDUTF_IMPLEMENTATION_ARM64 + +namespace simdutf { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 {} // namespace arm64 +} // namespace simdutf + +/* begin file src/simdutf/arm64/implementation.h */ +#ifndef SIMDUTF_ARM64_IMPLEMENTATION_H +#define SIMDUTF_ARM64_IMPLEMENTATION_H + + +namespace simdutf { +namespace arm64 { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("arm64", "ARM NEON", + internal::instruction_set::NEON) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace arm64 +} // namespace simdutf + +#endif // SIMDUTF_ARM64_IMPLEMENTATION_H +/* end file src/simdutf/arm64/implementation.h */ + +/* begin file src/simdutf/arm64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "arm64" +// #define SIMDUTF_IMPLEMENTATION arm64 +/* end file src/simdutf/arm64/begin.h */ + + // Declarations +/* begin file src/simdutf/arm64/intrinsics.h */ +#ifndef SIMDUTF_ARM64_INTRINSICS_H +#define SIMDUTF_ARM64_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +#endif // SIMDUTF_ARM64_INTRINSICS_H +/* end file src/simdutf/arm64/intrinsics.h */ +/* begin file src/simdutf/arm64/bitmanipulation.h */ +#ifndef SIMDUTF_ARM64_BITMANIPULATION_H +#define SIMDUTF_ARM64_BITMANIPULATION_H + +namespace simdutf { +namespace arm64 { +namespace { + +/* result might be undefined when input_num is zero */ +simdutf_really_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; + #else // SIMDUTF_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); + #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +} +#endif + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf + +#endif // SIMDUTF_ARM64_BITMANIPULATION_H +/* end file src/simdutf/arm64/bitmanipulation.h */ +/* begin file src/simdutf/arm64/simd.h */ +#ifndef SIMDUTF_ARM64_SIMD_H +#define SIMDUTF_ARM64_SIMD_H + +#include + +namespace simdutf { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +namespace { + // Start of private section with Visual Studio workaround + + #ifndef simdutf_make_uint8x16_t + #define simdutf_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ + x11, x12, x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) + #endif + #ifndef simdutf_make_int8x16_t + #define simdutf_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ + x11, x12, x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) + #endif + + #ifndef simdutf_make_uint8x8_t + #define simdutf_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) + #endif + #ifndef simdutf_make_int8x8_t + #define simdutf_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) + #endif + #ifndef simdutf_make_uint16x8_t + #define simdutf_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) + #endif + #ifndef simdutf_make_int16x8_t + #define simdutf_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) + #endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDUTF_REGULAR_VISUAL_STUDIO + +template struct simd8; + +// +// Base class of simd8 and simd8, both of which use uint8x16_t +// internally. +// +template > struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdutf_really_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdutf_really_inline operator const uint8x16_t &() const { + return this->value; + } + simdutf_really_inline operator uint8x16_t &() { return this->value; } + simdutf_really_inline T first() const { return vgetq_lane_u8(*this, 0); } + simdutf_really_inline T last() const { return vgetq_lane_u8(*this, 15); } + + // Bit operations + simdutf_really_inline simd8 operator|(const simd8 other) const { + return vorrq_u8(*this, other); + } + simdutf_really_inline simd8 operator&(const simd8 other) const { + return vandq_u8(*this, other); + } + simdutf_really_inline simd8 operator^(const simd8 other) const { + return veorq_u8(*this, other); + } + simdutf_really_inline simd8 bit_andnot(const simd8 other) const { + return vbicq_u8(*this, other); + } + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd8 &operator|=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline simd8 &operator&=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline simd8 &operator^=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } + + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return vceqq_u8(lhs, rhs); + } + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdutf_really_inline simd8 splat(bool _value) { + return vmovq_n_u8(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8(const uint8x16_t _value) + : base_u8(_value) {} + // False constructor + simdutf_really_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdutf_really_inline void store(uint8_t dst[16]) const { + return vst1q_u8(dst, *this); + } + + // We return uint32_t instead of uint16_t because that seems to be more + // efficient for most purposes (cutting it down to uint16_t costs performance + // in some compilers). + simdutf_really_inline uint32_t to_bitmask() const { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. This method is expected to be faster than none() + // and is equivalent when the vector register is the result of a comparison, + // with byte values 0xff and 0x00. + simdutf_really_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } + + simdutf_really_inline bool any() const { + return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; + } + simdutf_really_inline bool none() const { + return vmaxvq_u32(vreinterpretq_u32_u8(*this)) == 0; + } + simdutf_really_inline bool all() const { + return vminvq_u32(vreinterpretq_u32_u8(*this)) == 0xFFFFF; + } +}; + +// Unsigned bytes +template <> struct simd8 : base_u8 { + static simdutf_really_inline simd8 splat(uint8_t _value) { + return vmovq_n_u8(_value); + } + static simdutf_really_inline simd8 zero() { return vdupq_n_u8(0); } + static simdutf_really_inline simd8 load(const uint8_t *values) { + return vld1q_u8(values); + } + simdutf_really_inline simd8(const uint8x16_t _value) + : base_u8(_value) {} + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8(simdutf_make_uint8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15)) {} +#else + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8(uint8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Store to array + simdutf_really_inline void store(uint8_t dst[16]) const { + return vst1q_u8(dst, *this); + } + + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return vqaddq_u8(*this, other); + } + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return vqsubq_u8(*this, other); + } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 + operator+(const simd8 other) const { + return vaddq_u8(*this, other); + } + simdutf_really_inline simd8 + operator-(const simd8 other) const { + return vsubq_u8(*this, other); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *this; + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *this; + } + + // Order-specific operations + simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdutf_really_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdutf_really_inline simd8 + max_val(const simd8 other) const { + return vmaxq_u8(*this, other); + } + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return vminq_u8(*this, other); + } + simdutf_really_inline simd8 + operator<=(const simd8 other) const { + return vcleq_u8(*this, other); + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return vcgeq_u8(*this, other); + } + simdutf_really_inline simd8 + operator<(const simd8 other) const { + return vcltq_u8(*this, other); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return vcgtq_u8(*this, other); + } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return simd8(*this > other); + } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return simd8(*this < other); + } + + // Bit-specific operations + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return vtstq_u8(*this, bits); + } + simdutf_really_inline bool is_ascii() const { + return this->max_val() < 0b10000000u; + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return this->max_val() != 0; + } + simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return (*this & bits).any_bits_set_anywhere(); + } + template simdutf_really_inline simd8 shr() const { + return vshrq_n_u8(*this, N); + } + template simdutf_really_inline simd8 shl() const { + return vshlq_n_u8(*this, N); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + return vqtbl1q_u8(*this, simd8(original)); + } +}; + +// Signed bytes +template <> struct simd8 { + int8x16_t value; + + static simdutf_really_inline simd8 splat(int8_t _value) { + return vmovq_n_s8(_value); + } + static simdutf_really_inline simd8 zero() { return vdupq_n_s8(0); } + static simdutf_really_inline simd8 load(const int8_t values[16]) { + return vld1q_s8(values); + } + + // Use ST2 instead of UXTL+UXTL2 to interleave zeroes. UXTL is actually a + // USHLL #0, and shifting in NEON is actually quite slow. + // + // While this needs the registers to be in a specific order, bigger cores can + // interleave these with no overhead, and it still performs decently on little + // cores. + // movi v1.3d, #0 + // mov v0.16b, value[0] + // st2 {v0.16b, v1.16b}, [ptr], #32 + // mov v0.16b, value[1] + // st2 {v0.16b, v1.16b}, [ptr], #32 + // ... + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + int8x16x2_t pair = match_system(big_endian) + ? int8x16x2_t{{this->value, vmovq_n_s8(0)}} + : int8x16x2_t{{vmovq_n_s8(0), this->value}}; + vst2q_s8(reinterpret_cast(p), pair); + } + + // currently unused + // Technically this could be done with ST4 like in store_ascii_as_utf16, but + // it is very much not worth it, as explicitly mentioned in the ARM Cortex-X1 + // Core Software Optimization Guide: + // 4.18 Complex ASIMD instructions + // The bandwidth of [ST4 with element size less than 64b] is limited by + // decode constraints and it is advisable to avoid them when high + // performing code is desired. + // Instead, it is better to use ZIP1+ZIP2 and two ST2. + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + const uint16x8_t low = + vreinterpretq_u16_s8(vzip1q_s8(this->value, vmovq_n_s8(0))); + const uint16x8_t high = + vreinterpretq_u16_s8(vzip2q_s8(this->value, vmovq_n_s8(0))); + const uint16x8x2_t low_pair{{low, vmovq_n_u16(0)}}; + vst2q_u16(reinterpret_cast(p), low_pair); + const uint16x8x2_t high_pair{{high, vmovq_n_u16(0)}}; + vst2q_u16(reinterpret_cast(p + 8), high_pair); + } + + // In places where the table can be reused, which is most uses in simdutf, it + // is worth it to do 4 table lookups, as there is no direct zero extension + // from u8 to u32. + simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { + const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, + 2, 255, 255, 255, 3, 255, 255, 255}; + const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, + 6, 255, 255, 255, 7, 255, 255, 255}; + const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, + 10, 255, 255, 255, 11, 255, 255, 255}; + const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, + 14, 255, 255, 255, 15, 255, 255, 255}; + + // encourage store pairing and interleaving + const auto shuf1 = this->apply_lookup_16_to(tb1); + const auto shuf2 = this->apply_lookup_16_to(tb2); + shuf1.store(reinterpret_cast(p)); + shuf2.store(reinterpret_cast(p + 4)); + + const auto shuf3 = this->apply_lookup_16_to(tb3); + const auto shuf4 = this->apply_lookup_16_to(tb4); + shuf3.store(reinterpret_cast(p + 8)); + shuf4.store(reinterpret_cast(p + 12)); + } + // Conversion from/to SIMD register + simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {} + simdutf_really_inline operator const int8x16_t &() const { + return this->value; + } +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline operator const uint8x16_t() const { + return vreinterpretq_u8_s8(this->value); + } +#endif + simdutf_really_inline operator int8x16_t &() { return this->value; } + + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8(simdutf_make_int8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15)) {} +#else + simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8(int8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Store to array + simdutf_really_inline void store(int8_t dst[16]) const { + return vst1q_s8(dst, value); + } + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same + // type. In theory, we could check this occurrence with std::same_as and + // std::enabled_if but it is C++14 and relatively ugly and hard to read. +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline explicit simd8(const uint8x16_t other) + : simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdutf_really_inline operator simd8() const { + return vreinterpretq_u8_s8(this->value); + } + + simdutf_really_inline simd8 + operator|(const simd8 other) const { + return vorrq_s8(value, other.value); + } + simdutf_really_inline simd8 + operator&(const simd8 other) const { + return vandq_s8(value, other.value); + } + simdutf_really_inline simd8 + operator^(const simd8 other) const { + return veorq_s8(value, other.value); + } + simdutf_really_inline simd8 + bit_andnot(const simd8 other) const { + return vbicq_s8(value, other.value); + } + + // Math + simdutf_really_inline simd8 + operator+(const simd8 other) const { + return vaddq_s8(value, other.value); + } + simdutf_really_inline simd8 + operator-(const simd8 other) const { + return vsubq_s8(value, other.value); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *this; + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *this; + } + + simdutf_really_inline int8_t max_val() const { return vmaxvq_s8(value); } + simdutf_really_inline int8_t min_val() const { return vminvq_s8(value); } + simdutf_really_inline bool is_ascii() const { return this->min_val() >= 0; } + + // Order-sensitive comparisons + simdutf_really_inline simd8 max_val(const simd8 other) const { + return vmaxq_s8(value, other.value); + } + simdutf_really_inline simd8 min_val(const simd8 other) const { + return vminq_s8(value, other.value); + } + simdutf_really_inline simd8 operator>(const simd8 other) const { + return vcgtq_s8(value, other.value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return vcltq_s8(value, other.value); + } + simdutf_really_inline simd8 + operator==(const simd8 other) const { + return vceqq_s8(value, other.value); + } + + template + simdutf_really_inline simd8 + prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + return vqtbl1q_s8(*this, simd8(original)); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "ARM kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + // Add each of the elements next to each other, successively, to stuff each + // 8 byte mask into one. + uint8x16_t sum0 = + vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask), + vandq_u8(uint8x16_t(this->chunks[1]), bit_mask)); + uint8x16_t sum1 = + vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask), + vandq_u8(uint8x16_t(this->chunks[3]), bit_mask)); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + return simd8x64( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), + (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), + (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, + this->chunks[2] >= mask, this->chunks[3] >= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(uint8x16_t(this->chunks[0])) >= mask, + simd8(uint8x16_t(this->chunks[1])) >= mask, + simd8(uint8x16_t(this->chunks[2])) >= mask, + simd8(uint8x16_t(this->chunks[3])) >= mask) + .to_bitmask(); + } +}; // struct simd8x64 +/* begin file src/simdutf/arm64/simd16-inl.h */ +template struct simd16; + +template > struct base_u16 { + uint16x8_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdutf_really_inline base_u16() = default; + simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {} + simdutf_really_inline operator const uint16x8_t &() const { + return this->value; + } + simdutf_really_inline operator uint16x8_t &() { return this->value; } + // Bit operations + simdutf_really_inline simd16 operator|(const simd16 other) const { + return vorrq_u16(*this, other); + } + simdutf_really_inline simd16 operator&(const simd16 other) const { + return vandq_u16(*this, other); + } + simdutf_really_inline simd16 operator^(const simd16 other) const { + return veorq_u16(*this, other); + } + simdutf_really_inline simd16 bit_andnot(const simd16 other) const { + return vbicq_u16(*this, other); + } + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd16 &operator|=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline simd16 &operator&=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline simd16 &operator^=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } + + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return vceqq_u16(lhs, rhs); + } + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return vextq_u18(prev_chunk, *this, 8 - N); + } +}; + +template > +struct base16 : base_u16 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdutf_really_inline base16() : base_u16() {} + simdutf_really_inline base16(const uint16x8_t _value) : base_u16(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) : base16(vld1q_u16(ptr)) {} + + static const int SIZE = sizeof(base_u16::value); + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return vextq_u18(prev_chunk, *this, 8 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return vmovq_n_u16(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const uint16x8_t _value) + : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return vmovq_n_u16(_value); + } + static simdutf_really_inline simd16 zero() { return vdupq_n_u16(0); } + static simdutf_really_inline simd16 load(const T values[8]) { + return vld1q_u16(reinterpret_cast(values)); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const uint16x8_t _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return vst1q_u16(dst, *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return vaddq_u8(*this, other); + } + simdutf_really_inline simd16 operator-(const simd16 other) const { + return vsubq_u8(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline simd16(const uint16x8_t _value) + : base16_numeric(_value) {} +#endif + simdutf_really_inline simd16(const int16x8_t _value) + : base16_numeric(vreinterpretq_u16_s16(_value)) {} + + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + simdutf_really_inline operator simd16() const; + simdutf_really_inline operator const uint16x8_t &() const { + return this->value; + } + simdutf_really_inline operator const int16x8_t() const { + return vreinterpretq_s16_u16(this->value); + } + + simdutf_really_inline int16_t max_val() const { + return vmaxvq_s16(vreinterpretq_s16_u16(this->value)); + } + simdutf_really_inline int16_t min_val() const { + return vminvq_s16(vreinterpretq_s16_u16(this->value)); + } + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return vmaxq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return vmaxq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return vcgtq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return vcltq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const uint16x8_t _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + simdutf_really_inline int16_t max_val() const { return vmaxvq_u16(*this); } + simdutf_really_inline int16_t min_val() const { return vminvq_u16(*this); } + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return vqaddq_u16(*this, other); + } + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return vqsubq_u16(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return vmaxq_u16(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return vminq_u16(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return vcleq_u16(*this, other); + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return vcgeq_u16(*this, other); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return vcgtq_u16(*this, other); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return vcltq_u16(*this, other); + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + template simdutf_really_inline simd16 shr() const { + return simd16(vshrq_n_u16(*this, N)); + } + template simdutf_really_inline simd16 shl() const { + return simd16(vshlq_n_u16(*this, N)); + } + + // logical operations + simdutf_really_inline simd16 + operator|(const simd16 other) const { + return vorrq_u16(*this, other); + } + simdutf_really_inline simd16 + operator&(const simd16 other) const { + return vandq_u16(*this, other); + } + simdutf_really_inline simd16 + operator^(const simd16 other) const { + return veorq_u16(*this, other); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return vqmovn_high_u16(vqmovn_u16(v0), v1); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this))); + } +}; +simdutf_really_inline simd16::operator simd16() const { + return this->value; +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 4, + "ARM kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + // Add each of the elements next to each other, successively, to stuff each + // 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8( + vreinterpretq_u8_u16(this->chunks[0] & vreinterpretq_u16_u8(bit_mask)), + vreinterpretq_u8_u16(this->chunks[1] & vreinterpretq_u16_u8(bit_mask))); + uint8x16_t sum1 = vpaddq_u8( + vreinterpretq_u8_u16(this->chunks[2] & vreinterpretq_u16_u8(bit_mask)), + vreinterpretq_u8_u16(this->chunks[3] & vreinterpretq_u16_u8(bit_mask))); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + + return simd16x32( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + return simd16x32( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), + (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), + (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + +}; // struct simd16x32 +template <> +simdutf_really_inline uint64_t simd16x32::not_in_range( + const uint16_t low, const uint16_t high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + simd16x32 x(simd16((this->chunks[0] > mask_high) | + (this->chunks[0] < mask_low)), + simd16((this->chunks[1] > mask_high) | + (this->chunks[1] < mask_low)), + simd16((this->chunks[2] > mask_high) | + (this->chunks[2] < mask_low)), + simd16((this->chunks[3] > mask_high) | + (this->chunks[3] < mask_low))); + return x.to_bitmask(); +} +/* end file src/simdutf/arm64/simd16-inl.h */ +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf + +#endif // SIMDUTF_ARM64_SIMD_H +/* end file src/simdutf/arm64/simd.h */ + +/* begin file src/simdutf/arm64/end.h */ +/* end file src/simdutf/arm64/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_ARM64 + +#endif // SIMDUTF_ARM64_H +/* end file src/simdutf/arm64.h */ +/* begin file src/simdutf/icelake.h */ +#ifndef SIMDUTF_ICELAKE_H +#define SIMDUTF_ICELAKE_H + + +#ifdef __has_include + // How do we detect that a compiler supports vbmi2? + // For sure if the following header is found, we are ok? + #if __has_include() + #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 + #endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1930 + // Visual Studio 2022 and up support VBMI2 under x64 even if the header + // avx512vbmi2intrin.h is not found. + // Visual Studio 2019 technically supports VBMI2, but the implementation + // might be unreliable. Search for visualstudio2019icelakeissue in our + // tests. + #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 + #endif +#endif + +// We allow icelake on x64 as long as the compiler is known to support VBMI2. +#ifndef SIMDUTF_IMPLEMENTATION_ICELAKE + #define SIMDUTF_IMPLEMENTATION_ICELAKE \ + ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2)) +#endif + +// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdutf/simdutf/issues/1247 +#if ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && \ + (SIMDUTF_HAS_AVX512F && SIMDUTF_HAS_AVX512DQ && SIMDUTF_HAS_AVX512VL && \ + SIMDUTF_HAS_AVX512VBMI2) && \ + (!SIMDUTF_IS_32BITS)) + #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#if SIMDUTF_IMPLEMENTATION_ICELAKE + #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE + #define SIMDUTF_TARGET_ICELAKE + #else + #define SIMDUTF_TARGET_ICELAKE \ + SIMDUTF_TARGET_REGION( \ + "avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2," \ + "avx512vl,avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512vpopcntdq") + #endif + +namespace simdutf { +namespace icelake {} // namespace icelake +} // namespace simdutf + + // + // These two need to be included outside SIMDUTF_TARGET_REGION + // +/* begin file src/simdutf/icelake/intrinsics.h */ +#ifndef SIMDUTF_ICELAKE_INTRINSICS_H +#define SIMDUTF_ICELAKE_INTRINSICS_H + + +#ifdef SIMDUTF_VISUAL_STUDIO + // under clang within visual studio, this will include + #include // visual studio or clang + #include +#else + + #if SIMDUTF_GCC11ORMORE +// We should not get warnings while including yet we do +// under some versions of GCC. +// If the x86intrin.h header has uninitialized values that are problematic, +// it is a GCC issue, we want to ignore these warnings. +SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) + #endif + + #include // elsewhere + + #if SIMDUTF_GCC11ORMORE +// cancels the suppression of the -Wuninitialized +SIMDUTF_POP_DISABLE_WARNINGS + #endif + + #ifndef _tzcnt_u64 + #define _tzcnt_u64(x) __tzcnt_u64(x) + #endif // _tzcnt_u64 +#endif // SIMDUTF_VISUAL_STUDIO + +#ifdef SIMDUTF_CLANG_VISUAL_STUDIO + /** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdutf, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ + #include // for _blsr_u64 + #include // for _pext_u64, _pdep_u64 + #include // for __lzcnt64 + #include // for most things (AVX2, AVX512, _popcnt64) + #include + #include + #include + #include + // Important: we need the AVX-512 headers: + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + // unfortunately, we may not get _blsr_u64, but, thankfully, clang + // has it as a macro. + #ifndef _blsr_u64 + // we roll our own + #define _blsr_u64(n) ((n - 1) & n) + #endif // _blsr_u64 +#endif // SIMDUTF_CLANG_VISUAL_STUDIO + +#if defined(__GNUC__) && !defined(__clang__) + + #if __GNUC__ == 8 + #define SIMDUTF_GCC8 1 + #elif __GNUC__ == 9 + #define SIMDUTF_GCC9 1 + #endif // __GNUC__ == 8 || __GNUC__ == 9 + +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDUTF_GCC8 + #pragma GCC push_options + #pragma GCC target("avx512f") +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i +_mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, + uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, + uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, + uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, + uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, + uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, + uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, + uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, + uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, + uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, + uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, + uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, + uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64( + uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + + (uint64_t(a56) << 56)); +} + #pragma GCC pop_options +#endif // SIMDUTF_GCC8 + +#endif // SIMDUTF_HASWELL_INTRINSICS_H +/* end file src/simdutf/icelake/intrinsics.h */ +/* begin file src/simdutf/icelake/implementation.h */ +#ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H +#define SIMDUTF_ICELAKE_IMPLEMENTATION_H + + +namespace simdutf { +namespace icelake { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation( + "icelake", + "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 " + "extensions)", + internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | + internal::instruction_set::BMI2 | + internal::instruction_set::AVX512BW | + internal::instruction_set::AVX512CD | + internal::instruction_set::AVX512VL | + internal::instruction_set::AVX512VBMI2 | + internal::instruction_set::AVX512VPOPCNTDQ) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_IMPLEMENTATION_H +/* end file src/simdutf/icelake/implementation.h */ + + // + // The rest need to be inside the region + // +/* begin file src/simdutf/icelake/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "icelake" +// #define SIMDUTF_IMPLEMENTATION icelake + +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_TARGET_ICELAKE +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/icelake/begin.h */ + // Declarations +/* begin file src/simdutf/icelake/bitmanipulation.h */ +#ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H +#define SIMDUTF_ICELAKE_BITMANIPULATION_H + +namespace simdutf { +namespace icelake { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +// simdutf_really_inline int trailing_zeroes(uint64_t input_num) { +// #if SIMDUTF_REGULAR_VISUAL_STUDIO +// return (int)_tzcnt_u64(input_num); +// #else // SIMDUTF_REGULAR_VISUAL_STUDIO +// return __builtin_ctzll(input_num); +// #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +// } +#endif + +} // unnamed namespace +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_BITMANIPULATION_H +/* end file src/simdutf/icelake/bitmanipulation.h */ +/* begin file src/simdutf/icelake/simd.h */ +#ifndef SIMDUTF_ICELAKE_SIMD_H +#define SIMDUTF_ICELAKE_SIMD_H + +namespace simdutf { +namespace icelake { +namespace { +namespace simd { + +/* begin file src/simdutf/icelake/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + static const size_t SIZE = sizeof(__m512i); + static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m512i value; + + simdutf_really_inline simd32(const __m512i v) : value(v) {} + + template + simdutf_really_inline simd32(const Pointer *ptr) + : value(_mm512_loadu_si512(reinterpret_cast(ptr))) {} + + uint64_t sum() const { + const __m512i mask = _mm512_set1_epi64(0xffffffff); + const __m512i t0 = _mm512_and_si512(value, mask); + const __m512i t1 = _mm512_srli_epi64(value, 32); + const __m512i t2 = _mm512_add_epi64(t0, t1); + return _mm512_reduce_add_epi64(t2); + } + + // operators + simdutf_really_inline simd32 &operator+=(const simd32 other) { + value = _mm512_add_epi32(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd32 zero() { + return _mm512_setzero_si512(); + } + + simdutf_really_inline static simd32 splat(uint32_t v) { + return _mm512_set1_epi32(v); + } +}; + +simdutf_really_inline simd32 min(const simd32 b, + const simd32 a) { + return _mm512_min_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 operator&(const simd32 b, + const simd32 a) { + return _mm512_and_si512(a.value, b.value); +} +/* end file src/simdutf/icelake/simd32-inl.h */ + +} // namespace simd +} // unnamed namespace +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_SIMD_H +/* end file src/simdutf/icelake/simd.h */ + +/* begin file src/simdutf/icelake/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/icelake/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_ICELAKE +#endif // SIMDUTF_ICELAKE_H +/* end file src/simdutf/icelake.h */ +/* begin file src/simdutf/haswell.h */ +#ifndef SIMDUTF_HASWELL_H +#define SIMDUTF_HASWELL_H + +#ifdef SIMDUTF_WESTMERE_H + #error "haswell.h must be included before westmere.h" +#endif +#ifdef SIMDUTF_FALLBACK_H + #error "haswell.h must be included before fallback.h" +#endif + + +// Default Haswell to on if this is x86-64. Even if we are not compiled for it, +// it could be selected at runtime. +#ifndef SIMDUTF_IMPLEMENTATION_HASWELL + // + // You do not want to restrict it like so: SIMDUTF_IS_X86_64 && __AVX2__ + // because we want to rely on *runtime dispatch*. + // + #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE + #define SIMDUTF_IMPLEMENTATION_HASWELL 0 + #else + #define SIMDUTF_IMPLEMENTATION_HASWELL (SIMDUTF_IS_X86_64) + #endif + +#endif +// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdutf/simdutf/issues/1247 +#if ((SIMDUTF_IMPLEMENTATION_HASWELL) && (SIMDUTF_IS_X86_64) && (__AVX2__)) + #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#if SIMDUTF_IMPLEMENTATION_HASWELL + + #define SIMDUTF_TARGET_HASWELL SIMDUTF_TARGET_REGION("avx2,bmi,lzcnt,popcnt") + +namespace simdutf { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell {} // namespace haswell +} // namespace simdutf + + // + // These two need to be included outside SIMDUTF_TARGET_REGION + // +/* begin file src/simdutf/haswell/implementation.h */ +#ifndef SIMDUTF_HASWELL_IMPLEMENTATION_H +#define SIMDUTF_HASWELL_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use +// SIMDUTF_TARGET_REGION +namespace simdutf { +namespace haswell { + +using namespace simdutf; + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("haswell", "Intel/AMD AVX2", + internal::instruction_set::AVX2 | + internal::instruction_set::BMI1 | + internal::instruction_set::BMI2) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace haswell +} // namespace simdutf + +#endif // SIMDUTF_HASWELL_IMPLEMENTATION_H +/* end file src/simdutf/haswell/implementation.h */ +/* begin file src/simdutf/haswell/intrinsics.h */ +#ifndef SIMDUTF_HASWELL_INTRINSICS_H +#define SIMDUTF_HASWELL_INTRINSICS_H + + +#ifdef SIMDUTF_VISUAL_STUDIO + // under clang within visual studio, this will include + #include // visual studio or clang +#else + + #if SIMDUTF_GCC11ORMORE +// We should not get warnings while including yet we do +// under some versions of GCC. +// If the x86intrin.h header has uninitialized values that are problematic, +// it is a GCC issue, we want to ignore these warnings. +SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) + #endif + + #include // elsewhere + + #if SIMDUTF_GCC11ORMORE +// cancels the suppression of the -Wuninitialized +SIMDUTF_POP_DISABLE_WARNINGS + #endif + +#endif // SIMDUTF_VISUAL_STUDIO + +#ifdef SIMDUTF_CLANG_VISUAL_STUDIO + /** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdutf, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ + #include // for _blsr_u64 + #include // for __lzcnt64 + #include // for most things (AVX2, AVX512, _popcnt64) + #include + #include + #include + #include + // unfortunately, we may not get _blsr_u64, but, thankfully, clang + // has it as a macro. + #ifndef _blsr_u64 + // we roll our own + #define _blsr_u64(n) (((n) - 1) & (n)) + #endif // _blsr_u64 + // Same issue with _blsmsk_u32: + #ifndef _blsmsk_u32 + // we roll our own + #define _blsmsk_u32(n) (((n) - 1) ^ (n)) + #endif // _blsmsk_u32 +#endif // SIMDUTF_CLANG_VISUAL_STUDIO + +#endif // SIMDUTF_HASWELL_INTRINSICS_H +/* end file src/simdutf/haswell/intrinsics.h */ + + // + // The rest need to be inside the region + // +/* begin file src/simdutf/haswell/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "haswell" +// #define SIMDUTF_IMPLEMENTATION haswell +#define SIMDUTF_SIMD_HAS_BYTEMASK + +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_TARGET_HASWELL +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/haswell/begin.h */ + // Declarations +/* begin file src/simdutf/haswell/bitmanipulation.h */ +#ifndef SIMDUTF_HASWELL_BITMANIPULATION_H +#define SIMDUTF_HASWELL_BITMANIPULATION_H + +namespace simdutf { +namespace haswell { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + #if SIMDUTF_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); + #else // SIMDUTF_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); + #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +} +#endif + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +} // unnamed namespace +} // namespace haswell +} // namespace simdutf + +#endif // SIMDUTF_HASWELL_BITMANIPULATION_H +/* end file src/simdutf/haswell/bitmanipulation.h */ +/* begin file src/simdutf/haswell/simd.h */ +#ifndef SIMDUTF_HASWELL_SIMD_H +#define SIMDUTF_HASWELL_SIMD_H + +namespace simdutf { +namespace haswell { +namespace { +namespace simd { + +// Forward-declared so they can be used by splat and friends. +template struct base { + __m256i value; + + // Zero constructor + simdutf_really_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m256i _value) : value(_value) {} + // Conversion to SIMD register + simdutf_really_inline operator const __m256i &() const { return this->value; } + simdutf_really_inline operator __m256i &() { return this->value; } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + __m256i first = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(*this)); + __m256i second = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(*this, 1)); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + first = _mm256_shuffle_epi8(first, swap); + second = _mm256_shuffle_epi8(second, swap); + } + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), first); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 16), second); + } + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), + _mm256_cvtepu8_epi32(_mm256_castsi256_si128(*this))); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 8), + _mm256_cvtepu8_epi32(_mm256_castsi256_si128( + _mm256_srli_si256(*this, 8)))); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(ptr + 16), + _mm256_cvtepu8_epi32(_mm256_extractf128_si256(*this, 1))); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 24), + _mm256_cvtepu8_epi32(_mm_srli_si128( + _mm256_extractf128_si256(*this, 1), 8))); + } + // Bit operations + simdutf_really_inline Child operator|(const Child other) const { + return _mm256_or_si256(*this, other); + } + simdutf_really_inline Child operator&(const Child other) const { + return _mm256_and_si256(*this, other); + } + simdutf_really_inline Child operator^(const Child other) const { + return _mm256_xor_si256(*this, other); + } + simdutf_really_inline Child bit_andnot(const Child other) const { + return _mm256_andnot_si256(other, *this); + } + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +// Forward-declared so they can be used by splat and friends. +template struct simd8; + +template > +struct base8 : base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdutf_really_inline base8() : base>() {} + simdutf_really_inline base8(const __m256i _value) : base>(_value) {} + simdutf_really_inline T first() const { + return _mm256_extract_epi8(*this, 0); + } + simdutf_really_inline T last() const { + return _mm256_extract_epi8(*this, 31); + } + friend simdutf_always_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return _mm256_cmpeq_epi8(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8( + *this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdutf_really_inline simd8 splat(bool _value) { + return _mm256_set1_epi8(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8() : base8() {} + simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdutf_really_inline uint32_t to_bitmask() const { + return uint32_t(_mm256_movemask_epi8(*this)); + } + simdutf_really_inline bool any() const { + return !_mm256_testz_si256(*this, *this); + } + simdutf_really_inline bool none() const { + return _mm256_testz_si256(*this, *this); + } + simdutf_really_inline bool all() const { + return static_cast(_mm256_movemask_epi8(*this)) == 0xFFFFFFFF; + } + simdutf_really_inline simd8 operator~() const { return *this ^ true; } +}; + +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return _mm256_set1_epi8(_value); + } + static simdutf_really_inline simd8 zero() { + return _mm256_setzero_si256(); + } + static simdutf_really_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m256i _value) + : base8(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[32]) const { + return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); + } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 operator+(const simd8 other) const { + return _mm256_add_epi8(*this, other); + } + simdutf_really_inline simd8 operator-(const simd8 other) const { + return _mm256_sub_epi8(*this, other); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdutf_really_inline operator simd8() const; + // Member-by-member initialization + simdutf_really_inline + simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, + int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, + int8_t v30, int8_t v31) + : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, + v22, v23, v24, v25, v26, v27, v28, v29, v30, + v31)) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15); + } + simdutf_really_inline bool is_ascii() const { + return _mm256_movemask_epi8(*this) == 0; + } + // Order-sensitive comparisons + simdutf_really_inline simd8 max_val(const simd8 other) const { + return _mm256_max_epi8(*this, other); + } + simdutf_really_inline simd8 min_val(const simd8 other) const { + return _mm256_min_epi8(*this, other); + } + simdutf_really_inline simd8 operator>(const simd8 other) const { + return _mm256_cmpgt_epi8(*this, other); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return _mm256_cmpgt_epi8(other, *this); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, + uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, + uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, + uint8_t v31) + : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, + v22, v23, v24, v25, v26, v27, v28, v29, v30, + v31)) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15); + } + + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return _mm256_adds_epu8(*this, other); + } + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return _mm256_subs_epu8(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd8 + max_val(const simd8 other) const { + return _mm256_max_epu8(*this, other); + } + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return _mm256_min_epu8(other, *this); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdutf_really_inline simd8 + operator<(const simd8 other) const { + return this->lt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdutf_really_inline simd8 bits_not_set() const { + return *this == uint8_t(0); + } + simdutf_really_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdutf_really_inline bool is_ascii() const { + return _mm256_movemask_epi8(*this) == 0; + } + simdutf_really_inline bool bits_not_set_anywhere() const { + return _mm256_testz_si256(*this, *this); + } + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdutf_really_inline bool bits_not_set_anywhere(simd8 bits) const { + return _mm256_testz_si256(*this, bits); + } + simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdutf_really_inline simd8 shr() const { + return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); + } + template simdutf_really_inline simd8 shl() const { + return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); + } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template simdutf_really_inline int get_bit() const { + return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7 - N)); + } + + simdutf_really_inline uint64_t sum_bytes() const { + const auto tmp = _mm256_sad_epu8(value, _mm256_setzero_si256()); + + return _mm256_extract_epi64(tmp, 0) + _mm256_extract_epi64(tmp, 1) + + _mm256_extract_epi64(tmp, 2) + _mm256_extract_epi64(tmp, 3); + } +}; +simdutf_really_inline simd8::operator simd8() const { + return this->value; +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, + "Haswell kernel should use two registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + } + + simdutf_really_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] | mask, this->chunks[1] | mask); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1]) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + return simd8x64( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), + (simd8(__m256i(this->chunks[1])) >= mask)) + .to_bitmask(); + } +}; // struct simd8x64 + +/* begin file src/simdutf/haswell/simd16-inl.h */ +#ifdef __GNUC__ + #if __GNUC__ < 8 + #define _mm256_set_m128i(xmm1, xmm2) \ + _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ + _mm256_castsi128_si256(xmm2), 2) + #define _mm256_setr_m128i(xmm2, xmm1) \ + _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ + _mm256_castsi128_si256(xmm2), 2) + #endif +#endif + +template struct simd16; + +template > +struct base16 : base> { + using bitmask_type = uint32_t; + + simdutf_really_inline base16() : base>() {} + simdutf_really_inline base16(const __m256i _value) + : base>(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(_mm256_loadu_si256(reinterpret_cast(ptr))) {} + friend simdutf_always_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return _mm256_cmpeq_epi16(lhs, rhs); + } + + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); + + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return _mm256_alignr_epi8(*this, prev_chunk, 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return _mm256_set1_epi16(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline bitmask_type to_bitmask() const { + return _mm256_movemask_epi8(*this); + } + simdutf_really_inline bool any() const { + return !_mm256_testz_si256(*this, *this); + } + simdutf_really_inline simd16 operator~() const { return *this ^ true; } +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return _mm256_set1_epi16(_value); + } + static simdutf_really_inline simd16 zero() { + return _mm256_setzero_si256(); + } + static simdutf_really_inline simd16 load(const T values[8]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m256i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return _mm256_add_epi16(*this, other); + } + simdutf_really_inline simd16 operator-(const simd16 other) const { + return _mm256_sub_epi16(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return _mm256_max_epi16(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return _mm256_min_epi16(*this, other); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return _mm256_cmpgt_epi16(*this, other); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return _mm256_cmpgt_epi16(other, *this); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + simdutf_really_inline simd16(const simd16 bm) : simd16(bm.value) {} + + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return _mm256_adds_epu16(*this, other); + } + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return _mm256_subs_epu16(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return _mm256_max_epu16(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return _mm256_min_epu16(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return other.max_val(*this) == other; + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return other.min_val(*this) == other; + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + simdutf_really_inline simd16 bits_not_set(simd16 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set(simd16 bits) const { + return ~this->bits_not_set(bits); + } + + simdutf_really_inline bool bits_not_set_anywhere() const { + return _mm256_testz_si256(*this, *this); + } + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdutf_really_inline bool + bits_not_set_anywhere(simd16 bits) const { + return _mm256_testz_si256(*this, bits); + } + simdutf_really_inline bool + any_bits_set_anywhere(simd16 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdutf_really_inline simd16 shr() const { + return simd16(_mm256_srli_epi16(*this, N)); + } + template simdutf_really_inline simd16 shl() const { + return simd16(_mm256_slli_epi16(*this, N)); + } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template simdutf_really_inline int get_bit() const { + return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 15 - N)); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + return _mm256_shuffle_epi8(*this, swap); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + // Note: the AVX2 variant of pack operates on 128-bit lanes, thus + // we have to shuffle lanes in order to produce bytes in the + // correct order. + + // get the 0th lanes + const __m128i lo_0 = _mm256_extracti128_si256(v0, 0); + const __m128i lo_1 = _mm256_extracti128_si256(v1, 0); + + // get the 1st lanes + const __m128i hi_0 = _mm256_extracti128_si256(v0, 1); + const __m128i hi_1 = _mm256_extracti128_si256(v1, 1); + + // build new vectors (shuffle lanes) + const __m256i t0 = _mm256_set_m128i(lo_1, lo_0); + const __m256i t1 = _mm256_set_m128i(hi_1, hi_0); + + // pack code units in linear order from v0 and v1 + return _mm256_packus_epi16(t0, t1); + } + + simdutf_really_inline uint64_t sum() const { + const auto lo_u16 = _mm256_and_si256(value, _mm256_set1_epi32(0x0000ffff)); + const auto hi_u16 = _mm256_srli_epi32(value, 16); + const auto sum_u32 = _mm256_add_epi32(lo_u16, hi_u16); + + const auto lo_u32 = + _mm256_and_si256(sum_u32, _mm256_set1_epi64x(0xffffffff)); + const auto hi_u32 = _mm256_srli_epi64(sum_u32, 32); + const auto sum_u64 = _mm256_add_epi64(lo_u32, hi_u32); + + return uint64_t(_mm256_extract_epi64(sum_u64, 0)) + + uint64_t(_mm256_extract_epi64(sum_u64, 1)) + + uint64_t(_mm256_extract_epi64(sum_u64, 2)) + + uint64_t(_mm256_extract_epi64(sum_u64, 3)); + } +}; + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 2, + "Haswell kernel should use two registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline simd16x32(const simd16 chunk0, + const simd16 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd16 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16)); + } + + simdutf_really_inline simd16x32 bit_or(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] | mask, this->chunks[1] | mask); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const simd16x32 &other) const { + return simd16x32(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1]) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + + return simd16x32( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } +}; // struct simd16x32 + +simd16 min(const simd16 a, simd16 b) { + return _mm256_min_epu16(a.value, b.value); +} +/* end file src/simdutf/haswell/simd16-inl.h */ +/* begin file src/simdutf/haswell/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + static const size_t SIZE = sizeof(__m256i); + static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m256i value; + + simdutf_really_inline simd32(const __m256i v) : value(v) {} + + template + simdutf_really_inline simd32(const Pointer *ptr) + : value(_mm256_loadu_si256(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + const __m256i mask = _mm256_set1_epi64x(0xffffffff); + const __m256i t0 = _mm256_and_si256(value, mask); + const __m256i t1 = _mm256_srli_epi64(value, 32); + const __m256i t2 = _mm256_add_epi64(t0, t1); + + return uint64_t(_mm256_extract_epi64(t2, 0)) + + uint64_t(_mm256_extract_epi64(t2, 1)) + + uint64_t(_mm256_extract_epi64(t2, 2)) + + uint64_t(_mm256_extract_epi64(t2, 3)); + } + + simdutf_really_inline simd32 swap_bytes() const { + const __m256i shuffle = + _mm256_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12, + 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12); + + return _mm256_shuffle_epi8(value, shuffle); + } + + // operators + simdutf_really_inline simd32 &operator+=(const simd32 other) { + value = _mm256_add_epi32(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd32 zero() { + return _mm256_setzero_si256(); + } + + simdutf_really_inline static simd32 splat(uint32_t v) { + return _mm256_set1_epi32(v); + } +}; + +//---------------------------------------------------------------------- + +template <> struct simd32 { + // static const size_t SIZE = sizeof(__m128i); + // static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m256i value; + + simdutf_really_inline simd32(const __m256i v) : value(v) {} + + simdutf_really_inline bool any() const { + return _mm256_movemask_epi8(value) != 0; + } +}; + +//---------------------------------------------------------------------- + +template +simdutf_really_inline simd32 operator|(const simd32 a, + const simd32 b) { + return _mm256_or_si256(a.value, b.value); +} + +simdutf_really_inline simd32 min(const simd32 b, + const simd32 a) { + return _mm256_min_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 max(const simd32 a, + const simd32 b) { + return _mm256_max_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 operator&(const simd32 b, + const simd32 a) { + return _mm256_and_si256(a.value, b.value); +} + +simdutf_really_inline simd32 operator+(const simd32 a, + const simd32 b) { + return _mm256_add_epi32(a.value, b.value); +} + +simdutf_really_inline simd32 operator>=(const simd32 a, + const simd32 b) { + return _mm256_cmpeq_epi32(_mm256_max_epu32(a.value, b.value), a.value); +} + +simdutf_really_inline simd32 operator!(const simd32 v) { + return _mm256_xor_si256(v.value, _mm256_set1_epi8(-1)); +} + +simdutf_really_inline simd32 operator>(const simd32 a, + const simd32 b) { + return !(b >= a); +} +/* end file src/simdutf/haswell/simd32-inl.h */ +/* begin file src/simdutf/haswell/simd64-inl.h */ +template struct simd64; + +template <> struct simd64 { + // static const size_t SIZE = sizeof(__m256i); + // static const size_t ELEMENTS = SIZE / sizeof(uint64_t); + + __m256i value; + + simdutf_really_inline simd64(const __m256i v) : value(v) {} + + template + simdutf_really_inline simd64(const Pointer *ptr) + : value(_mm256_loadu_si256(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + return _mm256_extract_epi64(value, 0) + _mm256_extract_epi64(value, 1) + + _mm256_extract_epi64(value, 2) + _mm256_extract_epi64(value, 3); + } + + // operators + simdutf_really_inline simd64 &operator+=(const simd64 other) { + value = _mm256_add_epi64(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd64 zero() { + return _mm256_setzero_si256(); + } + + simdutf_really_inline static simd64 splat(uint64_t v) { + return _mm256_set1_epi64x(v); + } +}; +/* end file src/simdutf/haswell/simd64-inl.h */ + +simdutf_really_inline simd64 sum_8bytes(const simd8 v) { + return _mm256_sad_epu8(v.value, simd8::zero()); +} + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdutf + +#endif // SIMDUTF_HASWELL_SIMD_H +/* end file src/simdutf/haswell/simd.h */ + +/* begin file src/simdutf/haswell/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +#undef SIMDUTF_SIMD_HAS_BYTEMASK + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/haswell/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_HASWELL +#endif // SIMDUTF_HASWELL_COMMON_H +/* end file src/simdutf/haswell.h */ +/* begin file src/simdutf/westmere.h */ +#ifndef SIMDUTF_WESTMERE_H +#define SIMDUTF_WESTMERE_H + +#ifdef SIMDUTF_FALLBACK_H + #error "westmere.h must be included before fallback.h" +#endif + + +// Default Westmere to on if this is x86-64, unless we'll always select Haswell. +#ifndef SIMDUTF_IMPLEMENTATION_WESTMERE + // + // You do not want to set it to (SIMDUTF_IS_X86_64 && + // !SIMDUTF_REQUIRES_HASWELL) because you want to rely on runtime dispatch! + // + #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || SIMDUTF_CAN_ALWAYS_RUN_HASWELL + #define SIMDUTF_IMPLEMENTATION_WESTMERE 0 + #else + #define SIMDUTF_IMPLEMENTATION_WESTMERE (SIMDUTF_IS_X86_64) + #endif + +#endif + +#if (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__) + #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 0 +#endif + +#if SIMDUTF_IMPLEMENTATION_WESTMERE + + #define SIMDUTF_TARGET_WESTMERE SIMDUTF_TARGET_REGION("sse4.2,popcnt") + +namespace simdutf { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere {} // namespace westmere +} // namespace simdutf + + // + // These two need to be included outside SIMDUTF_TARGET_REGION + // +/* begin file src/simdutf/westmere/implementation.h */ +#ifndef SIMDUTF_WESTMERE_IMPLEMENTATION_H +#define SIMDUTF_WESTMERE_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use +// SIMDUTF_TARGET_REGION +namespace simdutf { +namespace westmere { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("westmere", "Intel/AMD SSE4.2", + internal::instruction_set::SSE42) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace westmere +} // namespace simdutf + +#endif // SIMDUTF_WESTMERE_IMPLEMENTATION_H +/* end file src/simdutf/westmere/implementation.h */ +/* begin file src/simdutf/westmere/intrinsics.h */ +#ifndef SIMDUTF_WESTMERE_INTRINSICS_H +#define SIMDUTF_WESTMERE_INTRINSICS_H + +#ifdef SIMDUTF_VISUAL_STUDIO + // under clang within visual studio, this will include + #include // visual studio or clang +#else + + #if SIMDUTF_GCC11ORMORE +// We should not get warnings while including yet we do +// under some versions of GCC. +// If the x86intrin.h header has uninitialized values that are problematic, +// it is a GCC issue, we want to ignore these warnings. +SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) + #endif + + #include // elsewhere + + #if SIMDUTF_GCC11ORMORE +// cancels the suppression of the -Wuninitialized +SIMDUTF_POP_DISABLE_WARNINGS + #endif + +#endif // SIMDUTF_VISUAL_STUDIO + +#ifdef SIMDUTF_CLANG_VISUAL_STUDIO + /** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ + #include // for _mm_alignr_epi8 +#endif + +#endif // SIMDUTF_WESTMERE_INTRINSICS_H +/* end file src/simdutf/westmere/intrinsics.h */ + + // + // The rest need to be inside the region + // +/* begin file src/simdutf/westmere/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "westmere" +// #define SIMDUTF_IMPLEMENTATION westmere +#define SIMDUTF_SIMD_HAS_BYTEMASK + +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else +SIMDUTF_TARGET_WESTMERE +#endif +/* end file src/simdutf/westmere/begin.h */ + + // Declarations +/* begin file src/simdutf/westmere/bitmanipulation.h */ +#ifndef SIMDUTF_WESTMERE_BITMANIPULATION_H +#define SIMDUTF_WESTMERE_BITMANIPULATION_H + +namespace simdutf { +namespace westmere { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + #if SIMDUTF_REGULAR_VISUAL_STUDIO + unsigned long ret; + _BitScanForward64(&ret, input_num); + return (int)ret; + #else // SIMDUTF_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); + #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +} +#endif + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +} // unnamed namespace +} // namespace westmere +} // namespace simdutf + +#endif // SIMDUTF_WESTMERE_BITMANIPULATION_H +/* end file src/simdutf/westmere/bitmanipulation.h */ +/* begin file src/simdutf/westmere/simd.h */ +#ifndef SIMDUTF_WESTMERE_SIMD_H +#define SIMDUTF_WESTMERE_SIMD_H + +namespace simdutf { +namespace westmere { +namespace { +namespace simd { + +template struct base { + __m128i value; + + // Zero constructor + simdutf_really_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m128i _value) : value(_value) {} + // Conversion to SIMD register + simdutf_really_inline operator const __m128i &() const { return this->value; } + simdutf_really_inline operator __m128i &() { return this->value; } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + __m128i first = _mm_cvtepu8_epi16(*this); + __m128i second = _mm_cvtepu8_epi16(_mm_srli_si128(*this, 8)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + first = _mm_shuffle_epi8(first, swap); + second = _mm_shuffle_epi8(second, swap); + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(p), first); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), second); + } + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + _mm_storeu_si128(reinterpret_cast<__m128i *>(p), _mm_cvtepu8_epi32(*this)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 4), + _mm_cvtepu8_epi32(_mm_srli_si128(*this, 4))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), + _mm_cvtepu8_epi32(_mm_srli_si128(*this, 8))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 12), + _mm_cvtepu8_epi32(_mm_srli_si128(*this, 12))); + } + // Bit operations + simdutf_really_inline Child operator|(const Child other) const { + return _mm_or_si128(*this, other); + } + simdutf_really_inline Child operator&(const Child other) const { + return _mm_and_si128(*this, other); + } + simdutf_really_inline Child operator^(const Child other) const { + return _mm_xor_si128(*this, other); + } + simdutf_really_inline Child bit_andnot(const Child other) const { + return _mm_andnot_si128(other, *this); + } + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +// Forward-declared so they can be used by splat and friends. +template struct simd8; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdutf_really_inline T first() const { return _mm_extract_epi8(*this, 0); } + simdutf_really_inline T last() const { return _mm_extract_epi8(*this, 15); } + simdutf_really_inline base8() : base>() {} + simdutf_really_inline base8(const __m128i _value) : base>(_value) {} + + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return _mm_cmpeq_epi8(lhs, rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdutf_really_inline simd8 splat(bool _value) { + return _mm_set1_epi8(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8() : base8() {} + simdutf_really_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdutf_really_inline int to_bitmask() const { + return _mm_movemask_epi8(*this); + } + simdutf_really_inline bool any() const { + return !_mm_testz_si128(*this, *this); + } + simdutf_really_inline bool none() const { + return _mm_testz_si128(*this, *this); + } + simdutf_really_inline bool all() const { + return _mm_movemask_epi8(*this) == 0xFFFF; + } + simdutf_really_inline simd8 operator~() const { return *this ^ true; } +}; + +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return _mm_set1_epi8(_value); + } + static simdutf_really_inline simd8 zero() { return _mm_setzero_si128(); } + static simdutf_really_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[16]) const { + return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 operator+(const simd8 other) const { + return _mm_add_epi8(*this, other); + } + simdutf_really_inline simd8 operator-(const simd8 other) const { + return _mm_sub_epi8(*this, other); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15)) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + simdutf_really_inline operator simd8() const; + simdutf_really_inline bool is_ascii() const { + return _mm_movemask_epi8(*this) == 0; + } + + // Order-sensitive comparisons + simdutf_really_inline simd8 max_val(const simd8 other) const { + return _mm_max_epi8(*this, other); + } + simdutf_really_inline simd8 min_val(const simd8 other) const { + return _mm_min_epi8(*this, other); + } + simdutf_really_inline simd8 operator>(const simd8 other) const { + return _mm_cmpgt_epi8(*this, other); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return _mm_cmpgt_epi8(other, *this); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15)) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return _mm_adds_epu8(*this, other); + } + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return _mm_subs_epu8(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd8 + max_val(const simd8 other) const { + return _mm_max_epu8(*this, other); + } + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return _mm_min_epu8(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdutf_really_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdutf_really_inline simd8 bits_not_set() const { + return *this == uint8_t(0); + } + simdutf_really_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdutf_really_inline bool is_ascii() const { + return _mm_movemask_epi8(*this) == 0; + } + + simdutf_really_inline bool bits_not_set_anywhere() const { + return _mm_testz_si128(*this, *this); + } + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdutf_really_inline bool bits_not_set_anywhere(simd8 bits) const { + return _mm_testz_si128(*this, bits); + } + simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdutf_really_inline simd8 shr() const { + return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); + } + template simdutf_really_inline simd8 shl() const { + return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); + } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template simdutf_really_inline int get_bit() const { + return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N)); + } + + simdutf_really_inline uint64_t sum_bytes() const { + const auto tmp = _mm_sad_epu8(value, _mm_setzero_si128()); + + return _mm_extract_epi64(tmp, 0) + _mm_extract_epi64(tmp, 1); + } +}; + +simdutf_really_inline simd8::operator simd8() const { + return this->value; +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "Westmere kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low - 1); + const simd8 mask_high = simd8::splat(high + 1); + return simd8x64( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), + (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), + (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, + this->chunks[2] >= mask, this->chunks[3] >= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(__m128i(this->chunks[0])) >= mask, + simd8(__m128i(this->chunks[1])) >= mask, + simd8(__m128i(this->chunks[2])) >= mask, + simd8(__m128i(this->chunks[3])) >= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +/* begin file src/simdutf/westmere/simd16-inl.h */ +template struct simd16; + +template > +struct base16 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdutf_really_inline base16() : base>() {} + simdutf_really_inline base16(const __m128i _value) + : base>(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(_mm_loadu_si128(reinterpret_cast(ptr))) {} + + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return _mm_cmpeq_epi16(lhs, rhs); + } + + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); + + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return _mm_set1_epi16(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline int to_bitmask() const { + return _mm_movemask_epi8(*this); + } + simdutf_really_inline bool any() const { + return !_mm_testz_si128(*this, *this); + } + simdutf_really_inline simd16 operator~() const { return *this ^ true; } +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return _mm_set1_epi16(_value); + } + static simdutf_really_inline simd16 zero() { return _mm_setzero_si128(); } + static simdutf_really_inline simd16 load(const T values[8]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m128i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return _mm_add_epi16(*this, other); + } + simdutf_really_inline simd16 operator-(const simd16 other) const { + return _mm_sub_epi16(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + // Member-by-member initialization + simdutf_really_inline simd16(int16_t v0, int16_t v1, int16_t v2, int16_t v3, + int16_t v4, int16_t v5, int16_t v6, int16_t v7) + : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {} + simdutf_really_inline operator simd16() const; + + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return _mm_max_epi16(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return _mm_min_epi16(*this, other); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return _mm_cmpgt_epi16(*this, other); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return _mm_cmpgt_epi16(other, *this); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + simdutf_really_inline simd16(const simd16 bm) : simd16(bm.value) {} + // Member-by-member initialization + simdutf_really_inline simd16(uint16_t v0, uint16_t v1, uint16_t v2, + uint16_t v3, uint16_t v4, uint16_t v5, + uint16_t v6, uint16_t v7) + : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd16 + repeat_16(uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, + uint16_t v5, uint16_t v6, uint16_t v7) { + return simd16(v0, v1, v2, v3, v4, v5, v6, v7); + } + + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return _mm_adds_epu16(*this, other); + } + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return _mm_subs_epu16(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return _mm_max_epu16(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return _mm_min_epu16(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return other.max_val(*this) == other; + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return other.min_val(*this) == other; + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + simdutf_really_inline simd16 bits_not_set(simd16 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set(simd16 bits) const { + return ~this->bits_not_set(bits); + } + + simdutf_really_inline bool bits_not_set_anywhere() const { + return _mm_testz_si128(*this, *this); + } + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdutf_really_inline bool + bits_not_set_anywhere(simd16 bits) const { + return _mm_testz_si128(*this, bits); + } + simdutf_really_inline bool + any_bits_set_anywhere(simd16 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdutf_really_inline simd16 shr() const { + return simd16(_mm_srli_epi16(*this, N)); + } + template simdutf_really_inline simd16 shl() const { + return simd16(_mm_slli_epi16(*this, N)); + } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template simdutf_really_inline int get_bit() const { + return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N)); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + return _mm_shuffle_epi8(*this, swap); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return _mm_packus_epi16(v0, v1); + } + + simdutf_really_inline uint64_t sum() const { + const auto lo_u16 = _mm_and_si128(value, _mm_set1_epi32(0x0000ffff)); + const auto hi_u16 = _mm_srli_epi32(value, 16); + const auto sum_u32 = _mm_add_epi32(lo_u16, hi_u16); + + const auto lo_u32 = _mm_and_si128(sum_u32, _mm_set1_epi64x(0xffffffff)); + const auto hi_u32 = _mm_srli_epi64(sum_u32, 32); + const auto sum_u64 = _mm_add_epi64(lo_u32, hi_u32); + + return uint64_t(_mm_extract_epi64(sum_u64, 0)) + + uint64_t(_mm_extract_epi64(sum_u64, 1)); + } +}; + +simdutf_really_inline simd16::operator simd16() const { + return this->value; +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 4, + "Westmere kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const simd16x32 &other) const { + return simd16x32(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + + return simd16x32( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), + (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), + (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } +}; // struct simd16x32 + +simd16 min(const simd16 a, simd16 b) { + return _mm_min_epu16(a.value, b.value); +} +/* end file src/simdutf/westmere/simd16-inl.h */ +/* begin file src/simdutf/westmere/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + static const size_t SIZE = sizeof(__m128i); + static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m128i value; + + simdutf_really_inline simd32(const __m128i v) : value(v) {} + + template + simdutf_really_inline simd32(const Pointer *ptr) + : value(_mm_loadu_si128(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + return uint64_t(_mm_extract_epi32(value, 0)) + + uint64_t(_mm_extract_epi32(value, 1)) + + uint64_t(_mm_extract_epi32(value, 2)) + + uint64_t(_mm_extract_epi32(value, 3)); + } + + simdutf_really_inline simd32 swap_bytes() const { + const __m128i shuffle = + _mm_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12); + + return _mm_shuffle_epi8(value, shuffle); + } + + void dump() const { + printf("[%08x, %08x, %08x, %08x]\n", uint32_t(_mm_extract_epi32(value, 0)), + uint32_t(_mm_extract_epi32(value, 1)), + uint32_t(_mm_extract_epi32(value, 2)), + uint32_t(_mm_extract_epi32(value, 3))); + } + + // operators + simdutf_really_inline simd32 &operator+=(const simd32 other) { + value = _mm_add_epi32(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd32 zero() { + return _mm_setzero_si128(); + } + + simdutf_really_inline static simd32 splat(uint32_t v) { + return _mm_set1_epi32(v); + } +}; + +//---------------------------------------------------------------------- + +template <> struct simd32 { + // static const size_t SIZE = sizeof(__m128i); + // static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m128i value; + + simdutf_really_inline simd32(const __m128i v) : value(v) {} + + simdutf_really_inline bool any() const { + return _mm_movemask_epi8(value) != 0; + } +}; + +//---------------------------------------------------------------------- + +template +simdutf_really_inline simd32 operator|(const simd32 a, + const simd32 b) { + return _mm_or_si128(a.value, b.value); +} + +simdutf_really_inline simd32 min(const simd32 a, + const simd32 b) { + return _mm_min_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 max(const simd32 a, + const simd32 b) { + return _mm_max_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 operator&(const simd32 a, + const simd32 b) { + return _mm_and_si128(a.value, b.value); +} + +simdutf_really_inline simd32 operator+(const simd32 a, + const simd32 b) { + return _mm_add_epi32(a.value, b.value); +} + +simdutf_really_inline simd32 operator>=(const simd32 a, + const simd32 b) { + return _mm_cmpeq_epi32(_mm_max_epu32(a.value, b.value), a.value); +} + +simdutf_really_inline simd32 operator!(const simd32 v) { + return _mm_xor_si128(v.value, _mm_set1_epi8(-1)); +} + +simdutf_really_inline simd32 operator>(const simd32 a, + const simd32 b) { + return !(b >= a); +} +/* end file src/simdutf/westmere/simd32-inl.h */ +/* begin file src/simdutf/westmere/simd64-inl.h */ +template struct simd64; + +template <> struct simd64 { + // static const size_t SIZE = sizeof(__m128i); + // static const size_t ELEMENTS = SIZE / sizeof(uint64_t); + + __m128i value; + + simdutf_really_inline simd64(const __m128i v) : value(v) {} + + template + simdutf_really_inline simd64(const Pointer *ptr) + : value(_mm_loadu_si128(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + return _mm_extract_epi64(value, 0) + _mm_extract_epi64(value, 1); + } + + // operators + simdutf_really_inline simd64 &operator+=(const simd64 other) { + value = _mm_add_epi64(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd64 zero() { + return _mm_setzero_si128(); + } + + simdutf_really_inline static simd64 splat(uint64_t v) { + return _mm_set1_epi64x(v); + } +}; +/* end file src/simdutf/westmere/simd64-inl.h */ + +simdutf_really_inline simd64 sum_8bytes(const simd8 v) { + return _mm_sad_epu8(v.value, simd8::zero()); +} + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdutf + +#endif // SIMDUTF_WESTMERE_SIMD_INPUT_H +/* end file src/simdutf/westmere/simd.h */ + +/* begin file src/simdutf/westmere/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +#undef SIMDUTF_SIMD_HAS_BYTEMASK +/* end file src/simdutf/westmere/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_WESTMERE +#endif // SIMDUTF_WESTMERE_COMMON_H +/* end file src/simdutf/westmere.h */ +/* begin file src/simdutf/ppc64.h */ +#ifndef SIMDUTF_PPC64_H +#define SIMDUTF_PPC64_H + +#ifdef SIMDUTF_FALLBACK_H + #error "ppc64.h must be included before fallback.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_PPC64 + #define SIMDUTF_IMPLEMENTATION_PPC64 (SIMDUTF_IS_PPC64) +#endif +#define SIMDUTF_CAN_ALWAYS_RUN_PPC64 \ + SIMDUTF_IMPLEMENTATION_PPC64 &&SIMDUTF_IS_PPC64 + + +#if SIMDUTF_IMPLEMENTATION_PPC64 + +namespace simdutf { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 {} // namespace ppc64 +} // namespace simdutf + +/* begin file src/simdutf/ppc64/implementation.h */ +#ifndef SIMDUTF_PPC64_IMPLEMENTATION_H +#define SIMDUTF_PPC64_IMPLEMENTATION_H + + +namespace simdutf { +namespace ppc64 { + +namespace { +using namespace simdutf; + +template simdutf_really_inline size_t align_down(size_t size) { + return N * (size / N); +} +} // namespace + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept; + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 + +#ifdef SIMDUTF_INTERNAL_TESTS + virtual std::vector internal_tests() const override; +#endif +}; + +} // namespace ppc64 +} // namespace simdutf + +#endif // SIMDUTF_PPC64_IMPLEMENTATION_H +/* end file src/simdutf/ppc64/implementation.h */ + +/* begin file src/simdutf/ppc64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "ppc64" +// #define SIMDUTF_IMPLEMENTATION ppc64 +/* end file src/simdutf/ppc64/begin.h */ + + // Declarations +/* begin file src/simdutf/ppc64/intrinsics.h */ +#ifndef SIMDUTF_PPC64_INTRINSICS_H +#define SIMDUTF_PPC64_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool + #undef bool +#endif + +#ifdef vector + #undef vector +#endif + +#endif // SIMDUTF_PPC64_INTRINSICS_H +/* end file src/simdutf/ppc64/intrinsics.h */ +/* begin file src/simdutf/ppc64/bitmanipulation.h */ +#ifndef SIMDUTF_PPC64_BITMANIPULATION_H +#define SIMDUTF_PPC64_BITMANIPULATION_H + +namespace simdutf { +namespace ppc64 { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} +#endif + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf + +#endif // SIMDUTF_PPC64_BITMANIPULATION_H +/* end file src/simdutf/ppc64/bitmanipulation.h */ +/* begin file src/simdutf/ppc64/simd.h */ +#ifndef SIMDUTF_PPC64_SIMD_H +#define SIMDUTF_PPC64_SIMD_H + +#include + +namespace simdutf { +namespace ppc64 { +namespace { +namespace simd { + +using vec_bool_t = __vector __bool char; +using vec_bool16_t = __vector __bool short; +using vec_bool32_t = __vector __bool int; +using vec_u8_t = __vector unsigned char; +using vec_i8_t = __vector signed char; +using vec_u16_t = __vector unsigned short; +using vec_i16_t = __vector signed short; +using vec_u32_t = __vector unsigned int; +using vec_i32_t = __vector signed int; +using vec_u64_t = __vector unsigned long long; +using vec_i64_t = __vector signed long long; + +// clang-format off +template struct vector_u8_type_for_element_aux { + using type = typename std::conditional::value, vec_bool_t, + typename std::conditional::value, vec_u8_t, + typename std::conditional::value, vec_i8_t, void>::type>::type>::type; + + static_assert(not std::is_same::value, + "accepted element types are 8 bit integers or bool"); +}; + +template struct vector_u16_type_for_element_aux { + using type = typename std::conditional::value, vec_bool16_t, + typename std::conditional::value, vec_u16_t, + typename std::conditional::value, vec_i16_t, void>::type>::type>::type; + + static_assert(not std::is_same::value, + "accepted element types are 16 bit integers or bool"); +}; + +template struct vector_u32_type_for_element_aux { + using type = typename std::conditional::value, vec_bool32_t, + typename std::conditional::value, vec_u32_t, + typename std::conditional::value, vec_i32_t, void>::type>::type>::type; + + static_assert(not std::is_same::value, + "accepted element types are 32 bit integers or bool"); +}; +// clang-format on + +template +using vector_u8_type_for_element = + typename vector_u8_type_for_element_aux::type; + +template +using vector_u16_type_for_element = + typename vector_u16_type_for_element_aux::type; + +template +using vector_u32_type_for_element = + typename vector_u32_type_for_element_aux::type; + +template uint16_t move_mask_u8(T vec) { + const vec_u8_t perm_mask = {15 * 8, 14 * 8, 13 * 8, 12 * 8, 11 * 8, 10 * 8, + 9 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, + 3 * 8, 2 * 8, 1 * 8, 0 * 8}; + + const auto result = (vec_u64_t)vec_vbpermq((vec_u8_t)vec, perm_mask); +#if SIMDUTF_IS_BIG_ENDIAN + return static_cast(result[0]); +#else + return static_cast(result[1]); +#endif +} + +/* begin file src/simdutf/ppc64/simd8-inl.h */ +// file included directly + +template struct base8 { + using vector_type = vector_u8_type_for_element; + vector_type value; + static const int SIZE = sizeof(vector_type); + static const int ELEMENTS = sizeof(vector_type) / sizeof(T); + + // Zero constructor + simdutf_really_inline base8() : value{vec_splats(T(0))} {} + + // Conversion from SIMD register + simdutf_really_inline base8(const vector_type _value) : value{_value} {} + + // Splat scalar + simdutf_really_inline base8(T v) : value{vec_splats(v)} {} + + // Conversion to SIMD register + simdutf_really_inline operator const vector_type &() const { + return this->value; + } + + template simdutf_really_inline void store(U *ptr) const { + vec_xst(value, 0, reinterpret_cast(ptr)); + } + + template void operator|=(const SIMD8 other) { + this->value = vec_or(this->value, other.value); + } + + template vector_type prev_aux(vector_type prev_chunk) const { + vector_type chunk = this->value; +#if !SIMDUTF_IS_BIG_ENDIAN + chunk = (vector_type)vec_reve(this->value); + prev_chunk = (vector_type)vec_reve((vector_type)prev_chunk); +#endif + chunk = (vector_type)vec_sld((vector_type)prev_chunk, (vector_type)chunk, + 16 - N); +#if !SIMDUTF_IS_BIG_ENDIAN + chunk = (vector_type)vec_reve((vector_type)chunk); +#endif + return chunk; + } + + simdutf_really_inline bool is_ascii() const { + return move_mask_u8(this->value) == 0; + } + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + template + simdutf_really_inline void store_bytes_as_utf16(char16_t *p) const { + const vector_type zero = vec_splats(T(0)); + + if (big_endian) { + const vec_u8_t perm_lo = {16, 0, 16, 1, 16, 2, 16, 3, + 16, 4, 16, 5, 16, 6, 16, 7}; + const vec_u8_t perm_hi = {16, 8, 16, 9, 16, 10, 16, 11, + 16, 12, 16, 13, 16, 14, 16, 15}; + + const vector_type v0 = vec_perm(value, zero, perm_lo); + const vector_type v1 = vec_perm(value, zero, perm_hi); + +#if defined(__clang__) + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#else + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#endif // defined(__clang__) + } else { + const vec_u8_t perm_lo = {0, 16, 1, 16, 2, 16, 3, 16, + 4, 16, 5, 16, 6, 16, 7, 16}; + const vec_u8_t perm_hi = {8, 16, 9, 16, 10, 16, 11, 16, + 12, 16, 13, 16, 14, 16, 15, 16}; + + const vector_type v0 = vec_perm(value, zero, perm_lo); + const vector_type v1 = vec_perm(value, zero, perm_hi); + +#if defined(__clang__) + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#else + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#endif // defined(__clang__) + } + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + store_bytes_as_utf16(p); + } + + simdutf_really_inline void store_bytes_as_utf32(char32_t *p) const { + const vector_type zero = vec_splats(T(0)); + +#if SIMDUTF_IS_BIG_ENDIAN + const vec_u8_t perm0 = {16, 16, 16, 0, 16, 16, 16, 1, + 16, 16, 16, 2, 16, 16, 16, 3}; + + const vec_u8_t perm1 = {16, 16, 16, 4, 16, 16, 16, 5, + 16, 16, 16, 6, 16, 16, 16, 7}; + + const vec_u8_t perm2 = {16, 16, 16, 8, 16, 16, 16, 9, + 16, 16, 16, 10, 16, 16, 16, 11}; + + const vec_u8_t perm3 = {16, 16, 16, 12, 16, 16, 16, 13, + 16, 16, 16, 14, 16, 16, 16, 15}; +#else + const vec_u8_t perm0 = {0, 16, 16, 16, 1, 16, 16, 16, + 2, 16, 16, 16, 3, 16, 16, 16}; + + const vec_u8_t perm1 = {4, 16, 16, 16, 5, 16, 16, 16, + 6, 16, 16, 16, 7, 16, 16, 16}; + + const vec_u8_t perm2 = {8, 16, 16, 16, 9, 16, 16, 16, + 10, 16, 16, 16, 11, 16, 16, 16}; + + const vec_u8_t perm3 = {12, 16, 16, 16, 13, 16, 16, 16, + 14, 16, 16, 16, 15, 16, 16, 16}; +#endif // SIMDUTF_IS_BIG_ENDIAN + + const vector_type v0 = vec_perm(value, zero, perm0); + const vector_type v1 = vec_perm(value, zero, perm1); + const vector_type v2 = vec_perm(value, zero, perm2); + const vector_type v3 = vec_perm(value, zero, perm3); + + constexpr size_t n = base8::SIZE; + +#if defined(__clang__) + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); + vec_xst(v2, 2 * n, reinterpret_cast(p)); + vec_xst(v3, 3 * n, reinterpret_cast(p)); +#else + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); + vec_xst(v2, 2 * n, reinterpret_cast(p)); + vec_xst(v3, 3 * n, reinterpret_cast(p)); +#endif // defined(__clang__) + } + + simdutf_really_inline void store_words_as_utf32(char32_t *p) const { + const vector_type zero = vec_splats(T(0)); + +#if SIMDUTF_IS_BIG_ENDIAN + const vec_u8_t perm0 = {16, 16, 0, 1, 16, 16, 2, 3, + 16, 16, 4, 5, 16, 16, 6, 7}; + const vec_u8_t perm1 = {16, 16, 8, 9, 16, 16, 10, 11, + 16, 16, 12, 13, 16, 16, 14, 15}; +#else + const vec_u8_t perm0 = {0, 1, 16, 16, 2, 3, 16, 16, + 4, 5, 16, 16, 6, 7, 16, 16}; + const vec_u8_t perm1 = {8, 9, 16, 16, 10, 11, 16, 16, + 12, 13, 16, 16, 14, 15, 16, 16}; +#endif // SIMDUTF_IS_BIG_ENDIAN + + const vector_type v0 = vec_perm(value, zero, perm0); + const vector_type v1 = vec_perm(value, zero, perm1); + + constexpr size_t n = base8::SIZE; + +#if defined(__clang__) + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); +#else + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); +#endif // defined(__clang__) + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + store_bytes_as_utf32(p); + } +}; + +// Forward declaration +template struct simd8; + +template +simd8 operator==(const simd8 a, const simd8 b); + +template +simd8 operator!=(const simd8 a, const simd8 b); + +template simd8 operator&(const simd8 a, const simd8 b); + +template simd8 operator|(const simd8 a, const simd8 b); + +template simd8 operator^(const simd8 a, const simd8 b); + +template simd8 operator+(const simd8 a, const simd8 b); + +template simd8 operator<(const simd8 a, const simd8 b); + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + using super = base8; + + static simdutf_really_inline simd8 splat(bool _value) { + return (vector_type)vec_splats((unsigned char)(-(!!_value))); + } + + simdutf_really_inline simd8() : super(vector_type()) {} + simdutf_really_inline simd8(const vector_type _value) : super(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + template + simdutf_really_inline simd8(simd8 other) + : simd8(vector_type(other.value)) {} + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + simdutf_really_inline bool any() const { + return !vec_all_eq(this->value, (vector_type)vec_splats(0)); + } + + simdutf_really_inline bool all() const { return to_bitmask() == 0xffff; } + + simdutf_really_inline simd8 operator~() const { + return this->value ^ (vector_type)splat(true); + } +}; + +template struct base8_numeric : base8 { + using super = base8; + using vector_type = typename super::vector_type; + + static simdutf_really_inline simd8 splat(T value) { + return (vector_type)vec_splats(value); + } + + static simdutf_really_inline simd8 zero() { return splat(0); } + + template + static simdutf_really_inline simd8 load(const U *values) { + return vec_xl(0, reinterpret_cast(values)); + } + + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const vector_type _value) + : base8(_value) {} + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + simdutf_really_inline simd8 &operator-=(const simd8 other) { + this->value = vec_sub(this->value, other.value); + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return (vector_type)vec_perm((vector_type)lookup_table, + (vector_type)lookup_table, this->value); + } + + template + simdutf_really_inline simd8 + lookup_32(const simd8 lookup_table_lo, + const simd8 lookup_table_hi) const { + return (vector_type)vec_perm(lookup_table_lo.value, lookup_table_hi.value, + this->value); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + using Self = simd8; + + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const vector_type _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((vector_type){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + simdutf_really_inline bool is_ascii() const { + return move_mask_u8(this->value) == 0; + } + + template + simdutf_really_inline simd8(simd8 other) + : simd8(vector_type(other.value)) {} + + template + simdutf_really_inline Self prev(const Self prev_chunk) const { + return prev_aux(prev_chunk.value); + } + + // Saturated math + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return (vector_type)vec_subs(this->value, (vector_type)other); + } + + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + + // Bit-specific operations + simdutf_really_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (vector_type)vec_splats(0)); + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + + template simdutf_really_inline simd8 shr() const { + return simd8( + (vector_type)vec_sr(this->value, (vector_type)vec_splat_u8(N))); + } + + template simdutf_really_inline simd8 shl() const { + return simd8( + (vector_type)vec_sl(this->value, (vector_type)vec_splat_u8(N))); + } + + void dump() const { + uint8_t tmp[16]; + store(tmp); + for (int i = 0; i < 16; i++) { + if (i == 0) { + printf("[%02x", tmp[i]); + } else if (i == 15) { + printf(" %02x]", tmp[i]); + } else { + printf(" %02x", tmp[i]); + } + } + putchar('\n'); + } + + void dump_ascii() const { + uint8_t tmp[16]; + store(tmp); + for (int i = 0; i < 16; i++) { + if (i == 0) { + printf("[%c", tmp[i]); + } else if (i == 15) { + printf("%c]", tmp[i]); + } else { + printf("%c", tmp[i]); + } + } + putchar('\n'); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const vector_type _value) + : base8_numeric(_value) {} + + template + simdutf_really_inline simd8(simd8 other) + : simd8(vector_type(other.value)) {} + + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + + simdutf_really_inline operator simd8() const; + + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return (vector_type)vec_adds(this->value, other.value); + } + + void dump() const { + int8_t tmp[16]; + store(tmp); + for (int i = 0; i < 16; i++) { + if (i == 0) { + printf("[%02x", tmp[i]); + } else if (i == 15) { + printf("%02x]", tmp[i]); + } else { + printf("%02x", tmp[i]); + } + } + putchar('\n'); + } +}; + +template +simd8 operator==(const simd8 a, const simd8 b) { + return vec_cmpeq(a.value, b.value); +} + +template +simd8 operator!=(const simd8 a, const simd8 b) { + return vec_cmpne(a.value, b.value); +} + +template simd8 operator&(const simd8 a, const simd8 b) { + return vec_and(a.value, b.value); +} + +template simd8 operator&(const simd8 a, U b) { + return vec_and(a.value, vec_splats(T(b))); +} + +template simd8 operator|(const simd8 a, const simd8 b) { + return vec_or(a.value, b.value); +} + +template simd8 operator^(const simd8 a, const simd8 b) { + return vec_xor(a.value, b.value); +} + +template simd8 operator^(const simd8 a, U b) { + return vec_xor(a.value, vec_splats(T(b))); +} + +template simd8 operator+(const simd8 a, const simd8 b) { + return vec_add(a.value, b.value); +} + +template simd8 operator+(const simd8 a, U b) { + return vec_add(a.value, vec_splats(T(b))); +} + +simdutf_really_inline simd8::operator simd8() const { + return (simd8::vector_type)value; +} + +template +simd8 operator<(const simd8 a, const simd8 b) { + return vec_cmplt(a.value, b.value); +} + +template +simd8 operator>(const simd8 a, const simd8 b) { + return vec_cmpgt(a.value, b.value); +} + +template +simd8 operator>=(const simd8 a, const simd8 b) { + return vec_cmpge(a.value, b.value); +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static constexpr size_t ELEMENTS = simd8::ELEMENTS; + + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + simd8x64(simd8x64 &&) = default; + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + ELEMENTS * 0); + this->chunks[1].store(ptr + ELEMENTS * 1); + this->chunks[2].store(ptr + ELEMENTS * 2); + this->chunks[3].store(ptr + ELEMENTS * 3); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(this->chunks[0]) >= mask, + simd8(this->chunks[1]) >= mask, + simd8(this->chunks[2]) >= mask, + simd8(this->chunks[3]) >= mask) + .to_bitmask(); + } + + void dump() const { + puts(""); + for (int i = 0; i < 4; i++) { + printf("chunk[%d] = ", i); + this->chunks[i].dump(); + } + } +}; // struct simd8x64 + +simdutf_really_inline simd8 avg(const simd8 a, + const simd8 b) { + return vec_avg(a.value, b.value); +} +/* end file src/simdutf/ppc64/simd8-inl.h */ +/* begin file src/simdutf/ppc64/simd16-inl.h */ +// file included directly + +template struct simd16; + +template struct base16 { + using vector_type = vector_u16_type_for_element; + static const int SIZE = sizeof(vector_type); + static const int ELEMENTS = sizeof(vector_type) / sizeof(T); + + vector_type value; + + // Zero constructor + simdutf_really_inline base16() : value{vector_type()} {} + + // Conversion from SIMD register + simdutf_really_inline base16(const vector_type _value) : value{_value} {} + + void dump() const { + uint16_t tmp[8]; + vec_xst(value, 0, reinterpret_cast(tmp)); + for (int i = 0; i < 8; i++) { + if (i == 0) { + printf("[%04x", tmp[i]); + } else if (i == 8 - 1) { + printf(" %04x]", tmp[i]); + } else { + printf(" %04x", tmp[i]); + } + } + putchar('\n'); + } +}; + +// Forward declaration +template struct simd16; + +template +simd16 operator==(const simd16 a, const simd16 b); + +template +simd16 operator==(const simd16 a, U b); + +template simd16 operator&(const simd16 a, const simd16 b); + +template simd16 operator|(const simd16 a, const simd16 b); + +template simd16 operator|(const simd16 a, U b); + +template simd16 operator^(const simd16 a, U b); + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return (vector_type)vec_splats(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + + simdutf_really_inline simd16(const vector_type _value) + : base16(_value) {} + + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + simdutf_really_inline bool any() const { + const auto tmp = vec_u64_t(value); + + return tmp[0] || tmp[1]; // Note: logical or, not binary one + } + + simdutf_really_inline bool is_zero() const { + const auto tmp = vec_u64_t(value); + + return (tmp[0] | tmp[1]) == 0; + } + + simdutf_really_inline simd16 &operator|=(const simd16 rhs) { + value = vec_or(this->value, rhs.value); + return *this; + } +}; + +template struct base16_numeric : base16 { + using vector_type = typename base16::vector_type; + + static simdutf_really_inline simd16 splat(T _value) { + return vec_splats(_value); + } + + static simdutf_really_inline simd16 zero() { return splat(0); } + + template + static simdutf_really_inline simd16 load(const U *ptr) { + return vec_xl(0, reinterpret_cast(ptr)); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const vector_type _value) + : base16(_value) {} + + // Store to array + template simdutf_really_inline void store(U *dst) const { +#if defined(__clang__) + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#else + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#endif // defined(__clang__) + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { + return vec_xor(this->value, vec_splats(T(0xffff))); + } +}; + +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const vector_type _value) + : base16_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline operator simd16() const; +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const vector_type _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + + // Array constructor + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + simdutf_really_inline bool is_ascii() const { + return vec_all_lt(value, vec_splats(uint16_t(128))); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return vec_max(this->value, other.value); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return vec_min(this->value, other.value); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return other.max_val(*this) == other; + } + + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return other.min_val(*this) == other; + } + + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return vec_cmplt(value, other.value); + } + + // Bit-specific operations + template simdutf_really_inline simd16 shr() const { + return vec_sr(value, vec_splats(uint16_t(N))); + } + + template simdutf_really_inline simd16 shl() const { + return vec_sl(value, vec_splats(uint16_t(N))); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return vec_revb(value); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return vec_packs(v0.value, v1.value); + } +}; + +template +simd16 operator==(const simd16 a, const simd16 b) { + return vec_cmpeq(a.value, b.value); +} + +template +simd16 operator==(const simd16 a, U b) { + return vec_cmpeq(a.value, vec_splats(T(b))); +} + +template +simd16 operator&(const simd16 a, const simd16 b) { + return vec_and(a.value, b.value); +} + +template simd16 operator&(const simd16 a, U b) { + return vec_and(a.value, vec_splats(T(b))); +} + +template +simd16 operator|(const simd16 a, const simd16 b) { + return vec_or(a.value, b.value); +} + +template simd16 operator|(const simd16 a, U b) { + return vec_or(a.value, vec_splats(T(b))); +} + +template +simd16 operator^(const simd16 a, const simd16 b) { + return vec_xor(a.value, b.value); +} + +template simd16 operator^(const simd16 a, U b) { + return vec_xor(a.value, vec_splats(T(b))); +} + +simdutf_really_inline simd16::operator simd16() const { + return (vec_u16_t)(value); +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 4, + "AltiVec kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), + (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), + (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) + .to_bitmask(); + } +}; // struct simd16x32 +/* end file src/simdutf/ppc64/simd16-inl.h */ +/* begin file src/simdutf/ppc64/simd32-inl.h */ +// file included directly + +template struct simd32; + +template struct base32 { + using vector_type = vector_u32_type_for_element; + static const int SIZE = sizeof(vector_type); + static const int ELEMENTS = sizeof(vector_type) / sizeof(T); + + vector_type value; + + // Zero constructor + simdutf_really_inline base32() : value{vector_type()} {} + + // Conversion from SIMD register + simdutf_really_inline base32(const vector_type _value) : value{_value} {} + + // Splat for scalar + simdutf_really_inline base32(T scalar) : value{vec_splats(scalar)} {} + + template + simdutf_really_inline base32(const Pointer *ptr) + : base32(vec_xl(0, reinterpret_cast(ptr))) {} + + // Store to array + template simdutf_really_inline void store(U *dst) const { +#if defined(__clang__) + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#else + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#endif // defined(__clang__) + } + + void dump(const char *name = nullptr) const { + if (name != nullptr) { + printf("%-10s = ", name); + } + + uint32_t tmp[4]; + vec_xst(value, 0, reinterpret_cast(tmp)); + for (int i = 0; i < 4; i++) { + if (i == 0) { + printf("[%08x", tmp[i]); + } else if (i == 4 - 1) { + printf(" %08x]", tmp[i]); + } else { + printf(" %08x", tmp[i]); + } + } + putchar('\n'); + } +}; + +template struct base32_numeric : base32 { + using super = base32; + using vector_type = typename super::vector_type; + + static simdutf_really_inline simd32 splat(T _value) { + return vec_splats(_value); + } + + static simdutf_really_inline simd32 zero() { return splat(0); } + + template + static simdutf_really_inline simd32 load(const U *values) { + return vec_xl(0, reinterpret_cast(values)); + } + + simdutf_really_inline base32_numeric() : base32() {} + + simdutf_really_inline base32_numeric(const vector_type _value) + : base32(_value) {} + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd32 operator+(const simd32 other) const { + return vec_add(this->value, other.value); + } + + simdutf_really_inline simd32 operator-(const simd32 other) const { + return vec_sub(this->value, other.value); + } + + simdutf_really_inline simd32 &operator+=(const simd32 other) { + *this = *this + other; + return *static_cast *>(this); + } + + simdutf_really_inline simd32 &operator-=(const simd32 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Forward declaration +template struct simd32; + +template +simd32 operator==(const simd32 a, const simd32 b); + +template +simd32 operator!=(const simd32 a, const simd32 b); + +template +simd32 operator>(const simd32 a, const simd32 b); + +template simd32 operator==(const simd32 a, T b); + +template simd32 operator!=(const simd32 a, T b); + +template simd32 operator&(const simd32 a, const simd32 b); + +template simd32 operator|(const simd32 a, const simd32 b); + +template simd32 operator^(const simd32 a, const simd32 b); + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd32 : base32 { + static simdutf_really_inline simd32 splat(bool _value) { + return (vector_type)vec_splats(uint32_t(-(!!_value))); + } + + simdutf_really_inline simd32(const vector_type _value) + : base32(_value) {} + + // Splat constructor + simdutf_really_inline simd32(bool _value) : base32(splat(_value)) {} + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + simdutf_really_inline bool any() const { + const vec_u64_t tmp = (vec_u64_t)value; + + return tmp[0] || tmp[1]; // Note: logical or, not binary one + } + + simdutf_really_inline bool is_zero() const { + const vec_u64_t tmp = (vec_u64_t)value; + + return (tmp[0] | tmp[1]) == 0; + } + + simdutf_really_inline simd32 operator~() const { + return (vec_bool32_t)vec_xor(this->value, vec_splats(uint32_t(0xffffffff))); + } +}; + +// Unsigned code units +template <> struct simd32 : base32_numeric { + simdutf_really_inline simd32() : base32_numeric() {} + + simdutf_really_inline simd32(const vector_type _value) + : base32_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd32(uint32_t _value) : simd32(splat(_value)) {} + + // Array constructor + simdutf_really_inline simd32(const char32_t *values) + : simd32(load(reinterpret_cast(values))) {} + + // Bit-specific operations + template simdutf_really_inline simd32 shr() const { + return vec_sr(value, vec_splats(uint32_t(N))); + } + + template simdutf_really_inline simd32 shl() const { + return vec_sl(value, vec_splats(uint32_t(N))); + } + + // Change the endianness + simdutf_really_inline simd32 swap_bytes() const { + return vec_revb(value); + } + + simdutf_really_inline uint64_t sum() const { + return uint64_t(value[0]) + uint64_t(value[1]) + uint64_t(value[2]) + + uint64_t(value[3]); + } + + static simdutf_really_inline simd16 + pack(const simd32 &v0, const simd32 &v1) { + return vec_packs(v0.value, v1.value); + } +}; + +template +simd32 operator==(const simd32 a, const simd32 b) { + return vec_cmpeq(a.value, b.value); +} + +template +simd32 operator!=(const simd32 a, const simd32 b) { + return vec_cmpne(a.value, b.value); +} + +template simd32 operator==(const simd32 a, T b) { + return vec_cmpeq(a.value, vec_splats(b)); +} + +template simd32 operator!=(const simd32 a, T b) { + return vec_cmpne(a.value, vec_splats(b)); +} + +template +simd32 operator>(const simd32 a, const simd32 b) { + return vec_cmpgt(a.value, b.value); +} + +template +simd32 operator&(const simd32 a, const simd32 b) { + return vec_and(a.value, b.value); +} + +template simd32 operator&(const simd32 a, U b) { + return vec_and(a.value, vec_splats(T(b))); +} + +template +simd32 operator|(const simd32 a, const simd32 b) { + return vec_or(a.value, b.value); +} + +template +simd32 operator^(const simd32 a, const simd32 b) { + return vec_xor(a.value, b.value); +} + +template simd32 operator^(const simd32 a, U b) { + return vec_xor(a.value, vec_splats(T(b))); +} + +template simd32 max_val(const simd32 a, const simd32 b) { + return vec_max(a.value, b.value); +} + +template +simdutf_really_inline simd32 min(const simd32 b, const simd32 a) { + return vec_min(a.value, b.value); +} +/* end file src/simdutf/ppc64/simd32-inl.h */ + +template +simd8 select(const simd8 cond, const simd8 val_true, + const simd8 val_false) { + return vec_sel(val_false.value, val_true.value, cond.value); +} + +template +simd8 select(const T cond, const simd8 val_true, + const simd8 val_false) { + return vec_sel(val_false.value, val_true.value, vec_splats(cond)); +} + +template +simd16 select(const simd16 cond, const simd16 val_true, + const simd16 val_false) { + return vec_sel(val_false.value, val_true.value, cond.value); +} + +template +simd16 select(const T cond, const simd16 val_true, + const simd16 val_false) { + return vec_sel(val_false.value, val_true.value, vec_splats(cond)); +} + +template +simd32 select(const simd32 cond, const simd32 val_true, + const simd32 val_false) { + return vec_sel(val_false.value, val_true.value, cond.value); +} + +template +simd32 select(const T cond, const simd32 val_true, + const simd32 val_false) { + return vec_sel(val_false.value, val_true.value, vec_splats(cond)); +} + +using vector_u8 = simd8; +using vector_u16 = simd16; +using vector_u32 = simd32; +using vector_i8 = simd8; + +simdutf_really_inline vector_u8 as_vector_u8(const vector_u16 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_u8 as_vector_u8(const vector_u32 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_u8 as_vector_u8(const vector_i8 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_u8 as_vector_u8(const simd16 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_i8 as_vector_i8(const vector_u8 v) { + return vector_i8::vector_type(v.value); +} + +simdutf_really_inline vector_u16 as_vector_u16(const vector_u8 v) { + return vector_u16::vector_type(v.value); +} + +simdutf_really_inline vector_u16 as_vector_u16(const simd16 v) { + return vector_u16::vector_type(v.value); +} + +simdutf_really_inline vector_u32 as_vector_u32(const vector_u8 v) { + return vector_u32::vector_type(v.value); +} + +simdutf_really_inline vector_u32 as_vector_u32(const vector_u16 v) { + return vector_u32::vector_type(v.value); +} + +simdutf_really_inline vector_u32 max(vector_u32 a, vector_u32 b) { + return vec_max(a.value, b.value); +} + +simdutf_really_inline vector_u32 max(vector_u32 a, vector_u32 b, vector_u32 c) { + return max(max(a, b), c); +} + +simdutf_really_inline vector_u32 sum4bytes(vector_u8 bytes, vector_u32 acc) { + return vec_sum4s(bytes.value, acc.value); +} + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf + +#endif // SIMDUTF_PPC64_SIMD_INPUT_H +/* end file src/simdutf/ppc64/simd.h */ + +/* begin file src/simdutf/ppc64/end.h */ +/* end file src/simdutf/ppc64/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_PPC64 + +#endif // SIMDUTF_PPC64_H +/* end file src/simdutf/ppc64.h */ +/* begin file src/simdutf/rvv.h */ +#ifndef SIMDUTF_RVV_H +#define SIMDUTF_RVV_H + +#ifdef SIMDUTF_FALLBACK_H + #error "rvv.h must be included before fallback.h" +#endif + + +#define SIMDUTF_CAN_ALWAYS_RUN_RVV SIMDUTF_IS_RVV + +#ifndef SIMDUTF_IMPLEMENTATION_RVV + #define SIMDUTF_IMPLEMENTATION_RVV \ + (SIMDUTF_CAN_ALWAYS_RUN_RVV || \ + (SIMDUTF_IS_RISCV64 && SIMDUTF_HAS_RVV_INTRINSICS && \ + SIMDUTF_HAS_RVV_TARGET_REGION)) +#endif + +#if SIMDUTF_IMPLEMENTATION_RVV + + #if SIMDUTF_CAN_ALWAYS_RUN_RVV + #define SIMDUTF_TARGET_RVV + #else + #define SIMDUTF_TARGET_RVV SIMDUTF_TARGET_REGION("arch=+v") + #endif + #if !SIMDUTF_IS_ZVBB && SIMDUTF_HAS_ZVBB_INTRINSICS + #define SIMDUTF_TARGET_ZVBB SIMDUTF_TARGET_REGION("arch=+v,+zvbb") + #endif + +namespace simdutf { +namespace rvv {} // namespace rvv +} // namespace simdutf + +/* begin file src/simdutf/rvv/implementation.h */ +#ifndef SIMDUTF_RVV_IMPLEMENTATION_H +#define SIMDUTF_RVV_IMPLEMENTATION_H + + +namespace simdutf { +namespace rvv { + +namespace { +using namespace simdutf; +} // namespace + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("rvv", "RISC-V Vector Extension", + internal::instruction_set::RVV), + _supports_zvbb(internal::detect_supported_architectures() & + internal::instruction_set::ZVBB) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +private: + const bool _supports_zvbb; + +#if SIMDUTF_IS_ZVBB + bool supports_zvbb() const { return true; } +#elif SIMDUTF_HAS_ZVBB_INTRINSICS + bool supports_zvbb() const { return _supports_zvbb; } +#else + bool supports_zvbb() const { return false; } +#endif +}; + +} // namespace rvv +} // namespace simdutf + +#endif // SIMDUTF_RVV_IMPLEMENTATION_H +/* end file src/simdutf/rvv/implementation.h */ +/* begin file src/simdutf/rvv/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "rvv" +// #define SIMDUTF_IMPLEMENTATION rvv + +#if SIMDUTF_CAN_ALWAYS_RUN_RVV +// nothing needed. +#else +SIMDUTF_TARGET_RVV +#endif +/* end file src/simdutf/rvv/begin.h */ +/* begin file src/simdutf/rvv/intrinsics.h */ +#ifndef SIMDUTF_RVV_INTRINSICS_H +#define SIMDUTF_RVV_INTRINSICS_H + + +#include + +#if __riscv_v_intrinsic >= 1000000 || __GCC__ >= 14 + #define simdutf_vrgather_u8m1x2(tbl, idx) \ + __riscv_vcreate_v_u8m1_u8m2( \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1())); + + #define simdutf_vrgather_u8m1x4(tbl, idx) \ + __riscv_vcreate_v_u8m1_u8m4( \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ + __riscv_vsetvlmax_e8m1())); +#else + // This has worse codegen on gcc + #define simdutf_vrgather_u8m1x2(tbl, idx) \ + __riscv_vset_v_u8m1_u8m2( \ + __riscv_vlmul_ext_v_u8m1_u8m2(__riscv_vrgather_vv_u8m1( \ + tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), __riscv_vsetvlmax_e8m1())), \ + 1, \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1())) + + #define simdutf_vrgather_u8m1x4(tbl, idx) \ + __riscv_vset_v_u8m1_u8m4( \ + __riscv_vset_v_u8m1_u8m4( \ + __riscv_vset_v_u8m1_u8m4( \ + __riscv_vlmul_ext_v_u8m1_u8m4(__riscv_vrgather_vv_u8m1( \ + tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ + __riscv_vsetvlmax_e8m1())), \ + 1, \ + __riscv_vrgather_vv_u8m1(tbl, \ + __riscv_vget_v_u8m4_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1())), \ + 2, \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ + __riscv_vsetvlmax_e8m1())), \ + 3, \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ + __riscv_vsetvlmax_e8m1())) +#endif + +/* Zvbb adds dedicated support for endianness swaps with vrev8, but if we can't + * use that, we have to emulate it with the standard V extension. + * Using LMUL=1 vrgathers could be faster than the srl+macc variant, but that + * would increase register pressure, and vrgather implementations performance + * varies a lot. */ +enum class simdutf_ByteFlip { NONE, V, ZVBB }; + +template +simdutf_really_inline static uint16_t simdutf_byteflip(uint16_t v) { + if (method != simdutf_ByteFlip::NONE) + return (uint16_t)((v * 1u) << 8 | (v * 1u) >> 8); + return v; +} + +#ifdef SIMDUTF_TARGET_ZVBB +SIMDUTF_UNTARGET_REGION +SIMDUTF_TARGET_ZVBB +#endif + +template +simdutf_really_inline static vuint16m1_t simdutf_byteflip(vuint16m1_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m1(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m1(__riscv_vsrl_vx_u16m1(v, 8, vl), 0x100, v, + vl); + return v; +} + +template +simdutf_really_inline static vuint16m2_t simdutf_byteflip(vuint16m2_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m2(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 8, vl), 0x100, v, + vl); + return v; +} + +template +simdutf_really_inline static vuint16m4_t simdutf_byteflip(vuint16m4_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m4(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m4(__riscv_vsrl_vx_u16m4(v, 8, vl), 0x100, v, + vl); + return v; +} + +template +simdutf_really_inline static vuint16m8_t simdutf_byteflip(vuint16m8_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m8(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m8(__riscv_vsrl_vx_u16m8(v, 8, vl), 0x100, v, + vl); + return v; +} + +#ifdef SIMDUTF_TARGET_ZVBB +SIMDUTF_UNTARGET_REGION +SIMDUTF_TARGET_RVV +#endif + +#endif // SIMDUTF_RVV_INTRINSICS_H +/* end file src/simdutf/rvv/intrinsics.h */ +/* begin file src/simdutf/rvv/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_RVV +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +/* end file src/simdutf/rvv/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_RVV + +#endif // SIMDUTF_RVV_H +/* end file src/simdutf/rvv.h */ +/* begin file src/simdutf/lsx.h */ +#ifndef SIMDUTF_LSX_H +#define SIMDUTF_LSX_H + +#ifdef SIMDUTF_FALLBACK_H + #error "lsx.h must be included before fallback.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_LSX + #define SIMDUTF_IMPLEMENTATION_LSX (SIMDUTF_IS_LSX) +#endif +#if SIMDUTF_IMPLEMENTATION_LSX && SIMDUTF_IS_LSX + #define SIMDUTF_CAN_ALWAYS_RUN_LSX 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_LSX 0 +#endif + +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) + +#if SIMDUTF_IMPLEMENTATION_LSX + +namespace simdutf { +/** + * Implementation for LoongArch SX. + */ +namespace lsx {} // namespace lsx +} // namespace simdutf + +/* begin file src/simdutf/lsx/implementation.h */ +#ifndef SIMDUTF_LSX_IMPLEMENTATION_H +#define SIMDUTF_LSX_IMPLEMENTATION_H + + +namespace simdutf { +namespace lsx { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("lsx", "LOONGARCH SX", + internal::instruction_set::LSX) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace lsx +} // namespace simdutf + +#endif // SIMDUTF_LSX_IMPLEMENTATION_H +/* end file src/simdutf/lsx/implementation.h */ + +/* begin file src/simdutf/lsx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lsx" +// #define SIMDUTF_IMPLEMENTATION lsx +/* end file src/simdutf/lsx/begin.h */ + + // Declarations +/* begin file src/simdutf/lsx/intrinsics.h */ +#ifndef SIMDUTF_LSX_INTRINSICS_H +#define SIMDUTF_LSX_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +#endif // SIMDUTF_LSX_INTRINSICS_H +/* end file src/simdutf/lsx/intrinsics.h */ +/* begin file src/simdutf/lsx/bitmanipulation.h */ +#ifndef SIMDUTF_LSX_BITMANIPULATION_H +#define SIMDUTF_LSX_BITMANIPULATION_H + +#include + +namespace simdutf { +namespace lsx { +namespace { + +simdutf_really_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); +} + +#if SIMDUTF_NEED_TRAILING_ZEROES +// simdutf_really_inline int trailing_zeroes(uint64_t input_num) { +// return __builtin_ctzll(input_num); +// } +#endif + +} // unnamed namespace +} // namespace lsx +} // namespace simdutf + +#endif // SIMDUTF_LSX_BITMANIPULATION_H +/* end file src/simdutf/lsx/bitmanipulation.h */ +/* begin file src/simdutf/lsx/simd.h */ +#ifndef SIMDUTF_LSX_SIMD_H +#define SIMDUTF_LSX_SIMD_H + +#include + +namespace simdutf { +namespace lsx { +namespace { +namespace simd { + +template struct simd8; + +// +// Base class of simd8 and simd8, both of which use __m128i +// internally. +// +template > struct base_u8 { + __m128i value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdutf_really_inline base_u8(const __m128i _value) : value(_value) {} + simdutf_really_inline operator const __m128i &() const { return this->value; } + simdutf_really_inline operator __m128i &() { return this->value; } + simdutf_really_inline T first() const { + return __lsx_vpickve2gr_bu(this->value, 0); + } + simdutf_really_inline T last() const { + return __lsx_vpickve2gr_bu(this->value, 15); + } + + // Bit operations + simdutf_really_inline simd8 operator|(const simd8 other) const { + return __lsx_vor_v(this->value, other); + } + simdutf_really_inline simd8 operator&(const simd8 other) const { + return __lsx_vand_v(this->value, other); + } + simdutf_really_inline simd8 operator^(const simd8 other) const { + return __lsx_vxor_v(this->value, other); + } + simdutf_really_inline simd8 bit_andnot(const simd8 other) const { + return __lsx_vandn_v(this->value, other); + } + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd8 &operator|=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline simd8 &operator&=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline simd8 &operator^=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } + + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return __lsx_vseq_b(lhs, rhs); + } + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdutf_really_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {} + // False constructor + simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); + } + + simdutf_really_inline uint32_t to_bitmask() const { + return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0); + } + + simdutf_really_inline bool any() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) != 0; + } + simdutf_really_inline bool none() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0; + } + simdutf_really_inline bool all() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0xFFFF; + } +}; + +// Unsigned bytes +template <> struct simd8 : base_u8 { + static simdutf_really_inline simd8 splat(uint8_t _value) { + return __lsx_vreplgr2vr_b(_value); + } + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const uint8_t *values) { + return __lsx_vld(values, 0); + } + simdutf_really_inline simd8(const __m128i _value) + : base_u8(_value) {} + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization + + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Store to array + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); + } + + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return __lsx_vsadd_bu(this->value, other); + } + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return __lsx_vssub_bu(this->value, other); + } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 + operator+(const simd8 other) const { + return __lsx_vadd_b(this->value, other); + } + simdutf_really_inline simd8 + operator-(const simd8 other) const { + return __lsx_vsub_b(this->value, other); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *this; + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *this; + } + + // Order-specific operations + simdutf_really_inline simd8 + max_val(const simd8 other) const { + return __lsx_vmax_bu(*this, other); + } + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return __lsx_vmin_bu(*this, other); + } + simdutf_really_inline simd8 + operator<=(const simd8 other) const { + return __lsx_vsle_bu(*this, other); + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lsx_vsle_bu(other, *this); + } + simdutf_really_inline simd8 + operator<(const simd8 other) const { + return __lsx_vslt_bu(*this, other); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return __lsx_vslt_bu(other, *this); + } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return simd8(*this > other); + } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return simd8(*this < other); + } + + // Bit-specific operations + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits)); + } + simdutf_really_inline bool is_ascii() const { + return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF; + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0; + } + simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return (*this & bits).any_bits_set_anywhere(); + } + template simdutf_really_inline simd8 shr() const { + return __lsx_vsrli_b(this->value, N); + } + template simdutf_really_inline simd8 shl() const { + return __lsx_vslli_b(this->value, N); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp)); + } +}; + +// Signed bytes +template <> struct simd8 { + __m128i value; + + static simdutf_really_inline simd8 splat(int8_t _value) { + return __lsx_vreplgr2vr_b(_value); + } + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const int8_t values[16]) { + return __lsx_vld(values, 0); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + __m128i zero = __lsx_vldi(0); + if (match_system(big_endian)) { + __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value), + reinterpret_cast(p + 8), 0); + } else { + __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero), + reinterpret_cast(p + 8), 0); + } + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value); + __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + __lsx_vst(in32_0, reinterpret_cast(p), 0); + __lsx_vst(in32_1, reinterpret_cast(p + 4), 0); + __lsx_vst(in32_2, reinterpret_cast(p + 8), 0); + __lsx_vst(in32_3, reinterpret_cast(p + 12), 0); + } + + // In places where the table can be reused, which is most uses in simdutf, it + // is worth it to do 4 table lookups, as there is no direct zero extension + // from u8 to u32. + simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { + const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, + 2, 255, 255, 255, 3, 255, 255, 255}; + const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, + 6, 255, 255, 255, 7, 255, 255, 255}; + const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, + 10, 255, 255, 255, 11, 255, 255, 255}; + const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, + 14, 255, 255, 255, 15, 255, 255, 255}; + + // encourage store pairing and interleaving + const auto shuf1 = this->apply_lookup_16_to(tb1); + const auto shuf2 = this->apply_lookup_16_to(tb2); + shuf1.store(reinterpret_cast(p)); + shuf2.store(reinterpret_cast(p + 4)); + + const auto shuf3 = this->apply_lookup_16_to(tb3); + const auto shuf4 = this->apply_lookup_16_to(tb4); + shuf3.store(reinterpret_cast(p + 8)); + shuf4.store(reinterpret_cast(p + 12)); + } + // Conversion from/to SIMD register + simdutf_really_inline simd8(const __m128i _value) : value(_value) {} + simdutf_really_inline operator const __m128i &() const { return this->value; } + + simdutf_really_inline operator const __m128i() const { return this->value; } + + simdutf_really_inline operator __m128i &() { return this->value; } + + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + + simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)v16i8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Store to array + simdutf_really_inline void store(int8_t dst[16]) const { + return __lsx_vst(value, dst, 0); + } + + simdutf_really_inline operator simd8() const { + return ((__m128i)this->value); + } + + simdutf_really_inline simd8 + operator|(const simd8 other) const { + return __lsx_vor_v((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator&(const simd8 other) const { + return __lsx_vand_v((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator^(const simd8 other) const { + return __lsx_vxor_v((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + bit_andnot(const simd8 other) const { + return __lsx_vandn_v((__m128i)other.value, (__m128i)value); + } + + // Math + simdutf_really_inline simd8 + operator+(const simd8 other) const { + return __lsx_vadd_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator-(const simd8 other) const { + return __lsx_vsub_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *this; + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *this; + } + + simdutf_really_inline bool is_ascii() const { + return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) == + 0xffff); + } + + // Order-sensitive comparisons + simdutf_really_inline simd8 max_val(const simd8 other) const { + return __lsx_vmax_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 min_val(const simd8 other) const { + return __lsx_vmin_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lsx_vslt_b((__m128i)other.value, (__m128i)value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lsx_vslt_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator==(const simd8 other) const { + return __lsx_vseq_b((__m128i)value, (__m128i)other.value); + } + + template + simdutf_really_inline simd8 + prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value, + simd8(original_tmp)); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert( + NUM_CHUNKS == 4, + "LoongArch kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4)); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2)); + mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0])); + return __lsx_vpickve2gr_du(mask, 0); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + return simd8x64( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), + (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), + (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, + this->chunks[2] >= mask, this->chunks[3] >= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(this->chunks[0].value) >= mask, + simd8(this->chunks[1].value) >= mask, + simd8(this->chunks[2].value) >= mask, + simd8(this->chunks[3].value) >= mask) + .to_bitmask(); + } +}; // struct simd8x64 +/* begin file src/simdutf/lsx/simd16-inl.h */ +template struct simd16; + +template > struct base_u16 { + __m128i value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdutf_really_inline base_u16() = default; + simdutf_really_inline base_u16(const __m128i _value) : value(_value) {} + // Bit operations + simdutf_really_inline simd16 operator|(const simd16 other) const { + return __lsx_vor_v(this->value, other.value); + } + simdutf_really_inline simd16 operator&(const simd16 other) const { + return __lsx_vand_v(this->value, other.value); + } + simdutf_really_inline simd16 operator^(const simd16 other) const { + return __lsx_vxor_v(this->value, other.value); + } + simdutf_really_inline simd16 bit_andnot(const simd16 other) const { + return __lsx_vandn_v(this->value, other.value); + } + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd16 &operator|=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline simd16 &operator&=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline simd16 &operator^=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } + + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return __lsx_vseq_h(lhs.value, rhs.value); + } + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), + __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); + } +}; + +template > +struct base16 : base_u16 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdutf_really_inline base16() : base_u16() {} + simdutf_really_inline base16(const __m128i _value) : base_u16(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(__lsx_vld(ptr, 0)) {} + + static const int SIZE = sizeof(base_u16::value); + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), + __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return __lsx_vreplgr2vr_h(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return __lsx_vreplgr2vr_h(_value); + } + static simdutf_really_inline simd16 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd16 load(const T values[8]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m128i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return __lsx_vst(this->value, dst, 0); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return __lsx_vadd_b(*this, other); + } + simdutf_really_inline simd16 operator-(const simd16 other) const { + return __lsx_vsub_b(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Signed code unitstemplate<> +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric(_value) {} + simdutf_really_inline simd16(simd16 other) + : base16_numeric(other.value) {} + + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + simdutf_really_inline operator simd16() const; + + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lsx_vmax_h(this->value, other.value); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lsx_vmin_h(this->value, other.value); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lsx_vsle_h(other.value, this->value); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lsx_vslt_h(this->value, other.value); + } +}; + +// Unsigned code unitstemplate<> +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric((__m128i)_value) {} + simdutf_really_inline simd16(simd16 other) + : base16_numeric(other.value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return __lsx_vsadd_hu(this->value, other.value); + } + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return __lsx_vssub_hu(this->value, other.value); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lsx_vmax_hu(this->value, other.value); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lsx_vmin_hu(this->value, other.value); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return __lsx_vsle_hu(this->value, other.value); + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return __lsx_vsle_hu(other.value, this->value); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lsx_vslt_hu(other.value, this->value); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lsx_vslt_hu(this->value, other.value); + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + template simdutf_really_inline simd16 shr() const { + return simd16(__lsx_vsrli_h(this->value, N)); + } + template simdutf_really_inline simd16 shl() const { + return simd16(__lsx_vslli_h(this->value, N)); + } + + // logical operations + simdutf_really_inline simd16 + operator|(const simd16 other) const { + return __lsx_vor_v(this->value, other.value); + } + simdutf_really_inline simd16 + operator&(const simd16 other) const { + return __lsx_vand_v(this->value, other.value); + } + simdutf_really_inline simd16 + operator^(const simd16 other) const { + return __lsx_vxor_v(this->value, other.value); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return __lsx_vssrlni_bu_h(v1.value, v0.value, 0); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lsx_vshuf4i_b(this->value, 0b10110001); + } +}; + +simdutf_really_inline simd16::operator simd16() const { + return this->value; +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert( + NUM_CHUNKS == 4, + "LOONGARCH kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[3]).value), 6); + mask = __lsx_vor_v( + mask, __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[2]).value), 4)); + mask = __lsx_vor_v( + mask, __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[1]).value), 2)); + mask = __lsx_vor_v(mask, __lsx_vmsknz_b((this->chunks[0]).value)); + return __lsx_vpickve2gr_du(mask, 0); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + + return simd16x32( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + return simd16x32( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), + (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), + (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + +}; // struct simd16x32 + +template <> +simdutf_really_inline uint64_t simd16x32::not_in_range( + const uint16_t low, const uint16_t high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + simd16x32 x(simd16((this->chunks[0] > mask_high) | + (this->chunks[0] < mask_low)), + simd16((this->chunks[1] > mask_high) | + (this->chunks[1] < mask_low)), + simd16((this->chunks[2] > mask_high) | + (this->chunks[2] < mask_low)), + simd16((this->chunks[3] > mask_high) | + (this->chunks[3] < mask_low))); + return x.to_bitmask(); +} +/* end file src/simdutf/lsx/simd16-inl.h */ +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdutf + +#endif // SIMDUTF_LSX_SIMD_H +/* end file src/simdutf/lsx/simd.h */ + +/* begin file src/simdutf/lsx/end.h */ +/* end file src/simdutf/lsx/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_LSX + +#endif // SIMDUTF_LSX_H +/* end file src/simdutf/lsx.h */ +/* begin file src/simdutf/lasx.h */ +#ifndef SIMDUTF_LASX_H +#define SIMDUTF_LASX_H + +#ifdef SIMDUTF_FALLBACK_H + #error "lasx.h must be included before fallback.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_LASX + #define SIMDUTF_IMPLEMENTATION_LASX (SIMDUTF_IS_LASX) +#endif +#if SIMDUTF_IMPLEMENTATION_LASX && SIMDUTF_IS_LASX + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 0 +#endif + +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) + +#if SIMDUTF_IMPLEMENTATION_LASX + +namespace simdutf { +/** + * Implementation for LoongArch ASX. + */ +namespace lasx {} // namespace lasx +} // namespace simdutf + +/* begin file src/simdutf/lasx/implementation.h */ +#ifndef SIMDUTF_LASX_IMPLEMENTATION_H +#define SIMDUTF_LASX_IMPLEMENTATION_H + + +namespace simdutf { +namespace lasx { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("lasx", "LOONGARCH ASX", + internal::instruction_set::LSX | + internal::instruction_set::LASX) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace lasx +} // namespace simdutf + +#endif // SIMDUTF_LASX_IMPLEMENTATION_H +/* end file src/simdutf/lasx/implementation.h */ + +/* begin file src/simdutf/lasx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lasx" +// #define SIMDUTF_IMPLEMENTATION lasx +/* end file src/simdutf/lasx/begin.h */ + + // Declarations +/* begin file src/simdutf/lasx/intrinsics.h */ +#ifndef SIMDUTF_LASX_INTRINSICS_H +#define SIMDUTF_LASX_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include +#include + +#if defined(__loongarch_asx) + #ifdef __clang__ + #define VREGS_PREFIX "$vr" + #define XREGS_PREFIX "$xr" + #else // GCC + #define VREGS_PREFIX "$f" + #define XREGS_PREFIX "$f" + #endif + #define __ALL_REGS \ + "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26," \ + "27,28,29,30,31" +// Convert __m128i to __m256i +static inline __m256i ____m256i(__m128i in) { + __m256i out = __lasx_xvldi(0); + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "+f"(out) + : [in] "f"(in)); + return out; +} +// Convert two __m128i to __m256i +static inline __m256i lasx_set_q(__m128i inhi, __m128i inlo) { + __m256i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[lo], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".ifnc %[out], %[hi] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\j \n\t" + " xvori.b $xr\\i, $xr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out), [hi] "+f"(inhi) + : [lo] "f"(inlo)); + return out; +} +// Convert __m256i low part to __m128i +static inline __m128i lasx_extracti128_lo(__m256i in) { + __m128i out; + __asm__ volatile(".ifnc %[out], %[in] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " vori.b $vr\\i, $vr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +// Convert __m256i high part to __m128i +static inline __m128i lasx_extracti128_hi(__m256i in) { + __m128i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x11 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +#endif + +#endif // SIMDUTF_LASX_INTRINSICS_H +/* end file src/simdutf/lasx/intrinsics.h */ +/* begin file src/simdutf/lasx/bitmanipulation.h */ +#ifndef SIMDUTF_LASX_BITMANIPULATION_H +#define SIMDUTF_LASX_BITMANIPULATION_H + +#include + +namespace simdutf { +namespace lasx { +namespace { + +simdutf_really_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); +} + +#if SIMDUTF_NEED_TRAILING_ZEROES +// simdutf_really_inline int trailing_zeroes(uint64_t input_num) { +// return __builtin_ctzll(input_num); +// } +#endif + +} // unnamed namespace +} // namespace lasx +} // namespace simdutf + +#endif // SIMDUTF_LASX_BITMANIPULATION_H +/* end file src/simdutf/lasx/bitmanipulation.h */ +/* begin file src/simdutf/lasx/simd.h */ +#ifndef SIMDUTF_LASX_SIMD_H +#define SIMDUTF_LASX_SIMD_H + +#include + +namespace simdutf { +namespace lasx { +namespace { +namespace simd { + +__attribute__((aligned(32))) static const uint8_t prev_shuf_table[32][32] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, + {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0}, + {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0}, + {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0}, + {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0}, + {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0}, + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, +}; + +__attribute__((aligned(32))) static const uint8_t bitsel_mask_table[32][32] = { + {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0}}; + +// Forward-declared so they can be used by splat and friends. +template struct base { + __m256i value; + + // Zero constructor + simdutf_really_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m256i _value) : value(_value) {} + // Conversion to SIMD register + simdutf_really_inline operator const __m256i &() const { return this->value; } + simdutf_really_inline operator __m256i &() { return this->value; } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + if (big_endian) { + __m256i zero = __lasx_xvldi(0); + __m256i in8 = __lasx_xvpermi_d(this->value, 0b11011000); + __m256i inlow = __lasx_xvilvl_b(in8, zero); + __m256i inhigh = __lasx_xvilvh_b(in8, zero); + __lasx_xvst(inlow, reinterpret_cast(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast(ptr), 32); + } else { + __m256i inlow = __lasx_vext2xv_hu_bu(this->value); + __m256i inhigh = __lasx_vext2xv_hu_bu( + __lasx_xvpermi_q(this->value, this->value, 0b00000001)); + __lasx_xvst(inlow, reinterpret_cast<__m256i *>(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast<__m256i *>(ptr), 32); + } + } + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + __m256i in32_0 = __lasx_vext2xv_wu_bu(this->value); + __lasx_xvst(in32_0, reinterpret_cast(ptr), 0); + + __m256i in8_1 = __lasx_xvpermi_d(this->value, 0b00000001); + __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); + __lasx_xvst(in32_1, reinterpret_cast(ptr), 32); + + __m256i in8_2 = __lasx_xvpermi_d(this->value, 0b00000010); + __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); + __lasx_xvst(in32_2, reinterpret_cast(ptr), 64); + + __m256i in8_3 = __lasx_xvpermi_d(this->value, 0b00000011); + __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); + __lasx_xvst(in32_3, reinterpret_cast(ptr), 96); + } + // Bit operations + simdutf_really_inline Child operator|(const Child other) const { + return __lasx_xvor_v(this->value, other); + } + simdutf_really_inline Child operator&(const Child other) const { + return __lasx_xvand_v(this->value, other); + } + simdutf_really_inline Child operator^(const Child other) const { + return __lasx_xvxor_v(this->value, other); + } + simdutf_really_inline Child bit_andnot(const Child other) const { + return __lasx_xvandn_v(this->value, other); + } + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template struct simd8; + +template > +struct base8 : base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdutf_really_inline base8() : base>() {} + simdutf_really_inline base8(const __m256i _value) : base>(_value) {} + simdutf_really_inline T first() const { + return __lasx_xvpickve2gr_wu(this->value, 0); + } + simdutf_really_inline T last() const { + return __lasx_xvpickve2gr_wu(this->value, 7); + } + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return __lasx_xvseq_b(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + if (!N) + return this->value; + + __m256i zero = __lasx_xvldi(0); + __m256i result, shuf; + if (N < 16) { + shuf = __lasx_xvld(prev_shuf_table[N], 0); + + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, + shuf); + __m256i srl_prev = __lasx_xvbsrl_v( + __lasx_xvpermi_q(zero, prev_chunk.value, 0b00110001), (16 - N)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + result = __lasx_xvbitsel_v(result, srl_prev, mask); + + return result; + } else if (N == 16) { + return __lasx_xvpermi_q(this->value, prev_chunk.value, 0b00100001); + } /*else { + __m256i sll_value = __lasx_xvbsll_v( + __lasx_xvpermi_q(zero, this->value, 0b00000011), (N - 16) % 32); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + shuf = __lasx_xvld(prev_shuf_table[N], 0); + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(prev_chunk.value, prev_chunk.value, 0b00000001), + prev_chunk.value, shuf); + result = __lasx_xvbitsel_v(sll_value, result, mask); + return result; + }*/ + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdutf_really_inline simd8 splat(bool _value) { + return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8() : base8() {} + simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdutf_really_inline uint32_t to_bitmask() const { + __m256i mask = __lasx_xvmsknz_b(this->value); + uint32_t mask0 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); + } + simdutf_really_inline bool any() const { + if (__lasx_xbz_b(this->value)) + return false; + return true; + } + simdutf_really_inline bool none() const { + if (__lasx_xbz_b(this->value)) + return true; + return false; + } + simdutf_really_inline bool all() const { + if (__lasx_xbnz_b(this->value)) + return true; + return false; + } + simdutf_really_inline simd8 operator~() const { return *this ^ true; } +}; + +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdutf_really_inline simd8 zero() { return __lasx_xvldi(0); } + static simdutf_really_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m256i _value) + : base8(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[32]) const { + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 operator+(const simd8 other) const { + return __lasx_xvadd_b(this->value, other); + } + simdutf_really_inline simd8 operator-(const simd8 other) const { + return __lasx_xvsub_b(this->value, other); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + __m256i origin = __lasx_xvand_v(this->value, __lasx_xvldi(0x1f)); + return __lasx_xvshuf_b(__lasx_xvldi(0), lookup_table, origin); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdutf_really_inline operator simd8() const; + // Member-by-member initialization + simdutf_really_inline + simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, + int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, + int8_t v30, int8_t v31) + : simd8((__m256i)v32i8{v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15); + } + simdutf_really_inline bool is_ascii() const { + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; + } + // Order-sensitive comparisons + simdutf_really_inline simd8 max_val(const simd8 other) const { + return __lasx_xvmax_b(this->value, other); + } + simdutf_really_inline simd8 min_val(const simd8 other) const { + return __lasx_xvmin_b(this->value, other); + } + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lasx_xvslt_b(other, this->value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lasx_xvslt_b(this->value, other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, + uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, + uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, + uint8_t v31) + : simd8((__m256i)v32u8{v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15); + } + + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return __lasx_xvsadd_bu(this->value, other); + } + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return __lasx_xvssub_bu(this->value, other); + } + + // Order-specific operations + simdutf_really_inline simd8 + max_val(const simd8 other) const { + return __lasx_xvmax_bu(*this, other); + } + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return __lasx_xvmin_bu(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd8 + operator<=(const simd8 other) const { + return __lasx_xvsle_bu(*this, other); + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lasx_xvsle_bu(other, *this); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return __lasx_xvslt_bu(*this, other); + } + simdutf_really_inline simd8 + operator<(const simd8 other) const { + return __lasx_xvslt_bu(other, *this); + } + + // Bit-specific operations + simdutf_really_inline simd8 bits_not_set() const { + return *this == uint8_t(0); + } + simdutf_really_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdutf_really_inline bool is_ascii() const { + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; + } + simdutf_really_inline bool any_bits_set_anywhere() const { + if (__lasx_xbnz_v(this->value)) + return true; + return false; + } + simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return (*this & bits).any_bits_set_anywhere(); + } + template simdutf_really_inline simd8 shr() const { + return __lasx_xvsrli_b(this->value, N); + } + template simdutf_really_inline simd8 shl() const { + return __lasx_xvslli_b(this->value, N); + } +}; +simdutf_really_inline simd8::operator simd8() const { + return this->value; +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + } + + simdutf_really_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] | mask, this->chunks[1] | mask); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1]) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + return simd8x64( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), + (simd8(__m256i(this->chunks[1])) >= mask)) + .to_bitmask(); + } +}; // struct simd8x64 + +/* begin file src/simdutf/lasx/simd16-inl.h */ +template struct simd16; + +template > +struct base16 : base> { + using bitmask_type = uint32_t; + + simdutf_really_inline base16() : base>() {} + simdutf_really_inline base16(const __m256i _value) + : base>(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(__lasx_xvld(reinterpret_cast(ptr), 0)) {} + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return __lasx_xvseq_h(lhs.value, rhs.value); + } + + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); + + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + if (!N) + return this->value; + + __m256i zero = __lasx_xvldi(0); + __m256i result, shuf; + if (N < 8) { + shuf = __lasx_xvld(prev_shuf_table[N * 2], 0); + + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, + shuf); + __m256i srl_prev = __lasx_xvbsrl_v( + __lasx_xvpermi_q(zero, prev_chunk, 0b00110001), (16 - N * 2)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + result = __lasx_xvbitsel_v(result, srl_prev, mask); + + return result; + } else if (N == 8) { + return __lasx_xvpermi_q(this->value, prev_chunk, 0b00100001); + } else { + __m256i sll_value = __lasx_xvbsll_v( + __lasx_xvpermi_q(zero, this->value, 0b00000011), (N * 2 - 16)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N * 2], 0); + shuf = __lasx_xvld(prev_shuf_table[N * 2], 0); + result = + __lasx_xvshuf_b(__lasx_xvpermi_q(prev_chunk, prev_chunk, 0b00000001), + prev_chunk, shuf); + result = __lasx_xvbitsel_v(sll_value, result, mask); + return result; + } + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return __lasx_xvreplgr2vr_h(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline bitmask_type to_bitmask() const { + __m256i mask = __lasx_xvmsknz_b(this->value); + bitmask_type mask0 = __lasx_xvpickve2gr_wu(mask, 0); + bitmask_type mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); + } + simdutf_really_inline bool any() const { + if (__lasx_xbz_v(this->value)) + return false; + return true; + } + simdutf_really_inline simd16 operator~() const { return *this ^ true; } +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return __lasx_xvreplgr2vr_h((uint16_t)_value); + } + static simdutf_really_inline simd16 zero() { return __lasx_xvldi(0); } + static simdutf_really_inline simd16 load(const T values[8]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m256i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return __lasx_xvadd_h(*this, other); + } + simdutf_really_inline simd16 operator-(const simd16 other) const { + return __lasx_xvsub_h(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lasx_xvmax_h(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lasx_xvmin_h(*this, other); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lasx_xvsle_h(other.value, this->value); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lasx_xvslt_h(this->value, other.value); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return __lasx_xvsadd_hu(this->value, other.value); + } + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return __lasx_xvssub_hu(this->value, other.value); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lasx_xvmax_hu(this->value, other.value); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lasx_xvmin_hu(this->value, other.value); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return __lasx_xvsle_hu(this->value, other.value); + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return __lasx_xvsle_hu(other.value, this->value); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lasx_xvslt_hu(other.value, this->value); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lasx_xvslt_hu(this->value, other.value); + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + simdutf_really_inline simd16 bits_not_set(simd16 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set(simd16 bits) const { + return ~this->bits_not_set(bits); + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + if (__lasx_xbnz_v(this->value)) + return true; + return false; + } + simdutf_really_inline bool + any_bits_set_anywhere(simd16 bits) const { + return (*this & bits).any_bits_set_anywhere(); + } + + template simdutf_really_inline simd16 shr() const { + return simd16(__lasx_xvsrli_h(this->value, N)); + } + template simdutf_really_inline simd16 shl() const { + return simd16(__lasx_xvslli_h(this->value, N)); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lasx_xvshuf4i_b(this->value, 0b10110001); + } + + template + static simdutf_really_inline simd8 + pack_shifted_right(const simd16 &v0, const simd16 &v1) { + return __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(v1.value, v0.value, N), + 0b11011000); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + + return pack_shifted_right<0>(v0, v1); + } +}; + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline simd16x32(const simd16 chunk0, + const simd16 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd16 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16)); + } + + simdutf_really_inline simd16x32 bit_or(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] | mask, this->chunks[1] | mask); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const simd16x32 &other) const { + return simd16x32(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1]) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + + return simd16x32( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } +}; // struct simd16x32 +/* end file src/simdutf/lasx/simd16-inl.h */ +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdutf + +#endif // SIMDUTF_LASX_SIMD_H +/* end file src/simdutf/lasx/simd.h */ + +/* begin file src/simdutf/lasx/end.h */ +/* end file src/simdutf/lasx/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_LASX + +#endif // SIMDUTF_LASX_H +/* end file src/simdutf/lasx.h */ +/* begin file src/simdutf/fallback.h */ +#ifndef SIMDUTF_FALLBACK_H +#define SIMDUTF_FALLBACK_H + + +// Note that fallback.h is always imported last. + +// Default Fallback to on unless a builtin implementation has already been +// selected. +#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK + #if SIMDUTF_CAN_ALWAYS_RUN_ARM64 || SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || \ + SIMDUTF_CAN_ALWAYS_RUN_HASWELL || SIMDUTF_CAN_ALWAYS_RUN_WESTMERE || \ + SIMDUTF_CAN_ALWAYS_RUN_PPC64 || SIMDUTF_CAN_ALWAYS_RUN_RVV || \ + SIMDUTF_CAN_ALWAYS_RUN_LSX || SIMDUTF_CAN_ALWAYS_RUN_LASX + #define SIMDUTF_IMPLEMENTATION_FALLBACK 0 + #else + #define SIMDUTF_IMPLEMENTATION_FALLBACK 1 + #endif +#endif + +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) + +#if SIMDUTF_IMPLEMENTATION_FALLBACK + +namespace simdutf { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback {} // namespace fallback +} // namespace simdutf + +/* begin file src/simdutf/fallback/implementation.h */ +#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H +#define SIMDUTF_FALLBACK_IMPLEMENTATION_H + + +namespace simdutf { +namespace fallback { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("fallback", "Generic fallback implementation", + 0) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; +} // namespace fallback +} // namespace simdutf + +#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H +/* end file src/simdutf/fallback/implementation.h */ + +/* begin file src/simdutf/fallback/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "fallback" +// #define SIMDUTF_IMPLEMENTATION fallback +/* end file src/simdutf/fallback/begin.h */ + + // Declarations +/* begin file src/simdutf/fallback/bitmanipulation.h */ +#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H +#define SIMDUTF_FALLBACK_BITMANIPULATION_H + +#include + +namespace simdutf { +namespace fallback { +namespace {} // unnamed namespace +} // namespace fallback +} // namespace simdutf + +#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H +/* end file src/simdutf/fallback/bitmanipulation.h */ + +/* begin file src/simdutf/fallback/end.h */ +/* end file src/simdutf/fallback/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_FALLBACK +#endif // SIMDUTF_FALLBACK_H +/* end file src/simdutf/fallback.h */ + // The scalar routines should be included once. +/* begin file src/scalar/swap_bytes.h */ +#ifndef SIMDUTF_SWAP_BYTES_H +#define SIMDUTF_SWAP_BYTES_H + +namespace simdutf { +namespace scalar { + +inline simdutf_warn_unused uint16_t u16_swap_bytes(const uint16_t word) { + return uint16_t((word >> 8) | (word << 8)); +} + +inline simdutf_warn_unused uint32_t u32_swap_bytes(const uint32_t word) { + return ((word >> 24) & 0xff) | // move byte 3 to byte 0 + ((word << 8) & 0xff0000) | // move byte 1 to byte 2 + ((word >> 8) & 0xff00) | // move byte 2 to byte 1 + ((word << 24) & 0xff000000); // byte 0 to byte 3 +} + +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/swap_bytes.h */ +#if SIMDUTF_FEATURE_ASCII /* begin file src/scalar/ascii.h */ #ifndef SIMDUTF_ASCII_H #define SIMDUTF_ASCII_H @@ -14270,6 +13033,516 @@ inline simdutf_warn_unused result validate_with_errors(const char *buf, #endif /* end file src/scalar/ascii.h */ +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/scalar/utf8.h */ +#ifndef SIMDUTF_UTF8_H +#define SIMDUTF_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8 { +#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_RVV +// only used by the fallback kernel. +// credit: based on code from Google Fuchsia (Apache Licensed) +inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 16 bytes are ascii. + uint64_t next_pos = pos + 16; + if (next_pos <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return true; + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return false; + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point) || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return false; + } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} +#endif + +inline simdutf_warn_unused result validate_with_errors(const char *buf, + size_t len) noexcept { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 16 bytes are ascii. + size_t next_pos = pos + 16; + if (next_pos <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return result(error_code::SUCCESS, len); + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return result(error_code::OVERLONG, pos); + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point)) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + pos = next_pos; + } + return result(error_code::SUCCESS, len); +} + +// Finds the previous leading byte starting backward from buf and validates with +// errors from there Used to pinpoint the location of an error when an invalid +// chunk is detected We assume that the stream starts with a leading byte, and +// to check that it is the case, we ask that you pass a pointer to the start of +// the stream (start). +inline simdutf_warn_unused result rewind_and_validate_with_errors( + const char *start, const char *buf, size_t len) noexcept { + // First check that we start with a leading byte + if ((*start & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, 0); + } + size_t extra_len{0}; + // A leading byte cannot be further than 4 bytes away + for (int i = 0; i < 5; i++) { + unsigned char byte = *buf; + if ((byte & 0b11000000) != 0b10000000) { + break; + } else { + buf--; + extra_len++; + } + } + + result res = validate_with_errors(buf, len + extra_len); + res.count -= extra_len; + return res; +} + +inline size_t count_code_points(const char *buf, size_t len) { + const int8_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // -65 is 0b10111111, anything larger in two-complement's should start a new + // code point. + if (p[i] > -65) { + counter++; + } + } + return counter; +} + +inline size_t utf16_length_from_utf8(const char *buf, size_t len) { + const int8_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + if (p[i] > -65) { + counter++; + } + if (uint8_t(p[i]) >= 240) { + counter++; + } + } + return counter; +} + +simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, + size_t length) { + if (length < 3) { + switch (length) { + case 2: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 2 bytes left + return length; + case 1: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + return length; + case 0: + return length; + } + } + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 3]) >= 0xf0) { + return length - 3; + } // 4-byte characters with only 3 bytes left + return length; +} + +} // namespace utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || \ + (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) +/* begin file src/scalar/utf16.h */ +#ifndef SIMDUTF_UTF16_H +#define SIMDUTF_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16 { + +template +inline simdutf_warn_unused bool validate(const char16_t *data, + size_t len) noexcept { + uint64_t pos = 0; + while (pos < len) { + char16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return false; + } + char16_t diff = char16_t(word - 0xD800); + if (diff > 0x3FF) { + return false; + } + char16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + char16_t diff2 = char16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return false; + } + pos += 2; + } else { + pos++; + } + } + return true; +} + +template +inline simdutf_warn_unused result validate_with_errors(const char16_t *data, + size_t len) noexcept { + size_t pos = 0; + while (pos < len) { + char16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } + char16_t diff = char16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + char16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + char16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + pos += 2; + } else { + pos++; + } + } + return result(error_code::SUCCESS, pos); +} + +template +inline size_t count_code_points(const char16_t *p, size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +template +inline size_t utf8_length_from_utf16(const char16_t *p, size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; + counter++; // ASCII + counter += static_cast( + word > + 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes + counter += static_cast((word > 0x7FF && word <= 0xD7FF) || + (word >= 0xE000)); // three-byte + } + return counter; +} + +template +inline size_t utf32_length_from_utf16(const char16_t *p, size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +simdutf_really_inline void +change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) { + for (size_t i = 0; i < size; i++) { + *output++ = char16_t(input[i] >> 8 | input[i] << 8); + } +} + +template +simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t *input, + size_t length) { + if (length <= 1) { + return length; + } + uint16_t last_word = uint16_t(input[length - 1]); + last_word = !match_system(big_endian) ? u16_swap_bytes(last_word) : last_word; + length -= ((last_word & 0xFC00) == 0xD800); + return length; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || + // (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/scalar/utf32.h */ +#ifndef SIMDUTF_UTF32_H +#define SIMDUTF_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32 { + +inline simdutf_warn_unused bool validate(const char32_t *buf, + size_t len) noexcept { + const uint32_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { + return false; + } + } + return true; +} + +inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, + size_t len) noexcept { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + } + return result(error_code::SUCCESS, pos); +} + +inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) { + // We are not BOM aware. + const uint32_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // credit: @ttsugriy for the vectorizable approach + counter++; // ASCII + counter += static_cast(p[i] > 0x7F); // two-byte + counter += static_cast(p[i] > 0x7FF); // three-byte + counter += static_cast(p[i] > 0xFFFF); // four-bytes + } + return counter; +} + +inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) { + // We are not BOM aware. + const uint32_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + counter++; // non-surrogate word + counter += static_cast(p[i] > 0xFFFF); // surrogate pair + } + return counter; +} + +} // namespace utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/latin1.h */ #ifndef SIMDUTF_LATIN1_H #define SIMDUTF_LATIN1_H @@ -14279,12 +13552,8 @@ namespace scalar { namespace { namespace latin1 { -inline size_t utf32_length_from_latin1(size_t len) { - // We are not BOM aware. - return len; // a utf32 unit will always represent 1 latin1 character -} - -inline size_t utf8_length_from_latin1(const char *buf, size_t len) { +simdutf_really_inline size_t utf8_length_from_latin1(const char *buf, + size_t len) { const uint8_t *c = reinterpret_cast(buf); size_t answer = 0; for (size_t i = 0; i < len; i++) { @@ -14295,8 +13564,6 @@ inline size_t utf8_length_from_latin1(const char *buf, size_t len) { return answer + len; } -inline size_t utf16_length_from_latin1(size_t len) { return len; } - } // namespace latin1 } // unnamed namespace } // namespace scalar @@ -14304,7 +13571,507 @@ inline size_t utf16_length_from_latin1(size_t len) { return len; } #endif /* end file src/scalar/latin1.h */ +#endif // SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/scalar/base64.h */ +#ifndef SIMDUTF_BASE64_H +#define SIMDUTF_BASE64_H +#include +#include +#include +#include + +namespace simdutf { +namespace scalar { +namespace { +namespace base64 { + +// This function is not expected to be fast. Do not use in long loops. +template bool is_ascii_white_space(char_type c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; +} + +template bool is_ascii_white_space_or_padding(char_type c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || + c == '='; +} + +template bool is_eight_byte(char_type c) { + if (sizeof(char_type) == 1) { + return true; + } + return uint8_t(c) == c; +} + +// Returns true upon success. The destination buffer must be large enough. +// This functions assumes that the padding (=) has been removed. +template +full_result +base64_tail_decode(char *dst, const char_type *src, size_t length, + size_t padded_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, + last_chunk_handling_options last_chunk_options) { + // This looks like 5 branches, but we expect the compiler to resolve this to a + // single branch: + const uint8_t *to_base64 = (options & base64_url) + ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + const uint32_t *d0 = (options & base64_url) + ? tables::base64::base64_url::d0 + : tables::base64::base64_default::d0; + const uint32_t *d1 = (options & base64_url) + ? tables::base64::base64_url::d1 + : tables::base64::base64_default::d1; + const uint32_t *d2 = (options & base64_url) + ? tables::base64::base64_url::d2 + : tables::base64::base64_default::d2; + const uint32_t *d3 = (options & base64_url) + ? tables::base64::base64_url::d3 + : tables::base64::base64_default::d3; + + const char_type *srcend = src + length; + const char_type *srcinit = src; + const char *dstinit = dst; + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage); + + uint32_t x; + size_t idx; + uint8_t buffer[4]; + while (true) { + while (src + 4 <= srcend && is_eight_byte(src[0]) && + is_eight_byte(src[1]) && is_eight_byte(src[2]) && + is_eight_byte(src[3]) && + (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | + d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { + if (match_system(endianness::BIG)) { + x = scalar::u32_swap_bytes(x); + } + std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes + dst += 3; + src += 4; + } + idx = 0; + // we need at least four characters. +#ifdef __clang__ + // If possible, we read four characters at a time. (It is an optimization.) + if (ignore_garbage && src + 4 <= srcend) { + char_type c0 = src[0]; + char_type c1 = src[1]; + char_type c2 = src[2]; + char_type c3 = src[3]; + uint8_t code0 = to_base64[uint8_t(c0)]; + uint8_t code1 = to_base64[uint8_t(c1)]; + uint8_t code2 = to_base64[uint8_t(c2)]; + uint8_t code3 = to_base64[uint8_t(c3)]; + buffer[idx] = code0; + idx += (is_eight_byte(c0) && code0 <= 63); + buffer[idx] = code1; + idx += (is_eight_byte(c1) && code1 <= 63); + buffer[idx] = code2; + idx += (is_eight_byte(c2) && code2 <= 63); + buffer[idx] = code3; + idx += (is_eight_byte(c3) && code3 <= 63); + src += 4; + } +#endif + while ((idx < 4) && (src < srcend)) { + char_type c = *src; + uint8_t code = to_base64[uint8_t(c)]; + buffer[idx] = uint8_t(code); + if (is_eight_byte(c) && code <= 63) { + idx++; + } else if (!ignore_garbage && + (code > 64 || !scalar::base64::is_eight_byte(c))) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else { + // We have a space or a newline or garbage. We ignore it. + } + src++; + } + if (idx != 4) { + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::strict && + (idx != 1) && ((idx + padded_characters) & 3) != 0) { + // The partial chunk was at src - idx + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else if (!ignore_garbage && + last_chunk_options == + last_chunk_handling_options::stop_before_partial && + (idx != 1) && ((idx + padded_characters) & 3) != 0) { + // Rewind src to before partial chunk + src -= idx; + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } else { + if (idx == 2) { + uint32_t triple = + (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 1); + } else { + triple = scalar::u32_swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 1); + } + dst += 1; + } else if (idx == 3) { + uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 2); + } else { + triple = scalar::u32_swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 2); + } + dst += 2; + } else if (!ignore_garbage && idx == 1) { + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } + } + + uint32_t triple = + (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 3); + } else { + triple = scalar::u32_swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 3); + } + dst += 3; + } +} + +// like base64_tail_decode, but it will not write past the end of the output +// buffer. The outlen paramter is modified to reflect the number of bytes +// written. This functions assumes that the padding (=) has been removed. +template +result base64_tail_decode_safe( + char *dst, size_t &outlen, const char_type *&srcr, size_t length, + size_t padded_characters, // number of padding characters '=', typically 0, + // 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + const char_type *src = srcr; + if (length == 0) { + outlen = 0; + return {SUCCESS, 0}; + } + // This looks like 5 branches, but we expect the compiler to resolve this to a + // single branch: + const uint8_t *to_base64 = (options & base64_url) + ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + const uint32_t *d0 = (options & base64_url) + ? tables::base64::base64_url::d0 + : tables::base64::base64_default::d0; + const uint32_t *d1 = (options & base64_url) + ? tables::base64::base64_url::d1 + : tables::base64::base64_default::d1; + const uint32_t *d2 = (options & base64_url) + ? tables::base64::base64_url::d2 + : tables::base64::base64_default::d2; + const uint32_t *d3 = (options & base64_url) + ? tables::base64::base64_url::d3 + : tables::base64::base64_default::d3; + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage); + + const char_type *srcend = src + length; + const char_type *srcinit = src; + const char *dstinit = dst; + const char *dstend = dst + outlen; + + uint32_t x; + size_t idx; + uint8_t buffer[4]; + while (true) { + while (src + 4 <= srcend && is_eight_byte(src[0]) && + is_eight_byte(src[1]) && is_eight_byte(src[2]) && + is_eight_byte(src[3]) && + (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | + d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { + if (dstend - dst < 3) { + outlen = size_t(dst - dstinit); + srcr = src; + return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)}; + } + if (match_system(endianness::BIG)) { + x = scalar::u32_swap_bytes(x); + } + std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes + dst += 3; + src += 4; + } + idx = 0; + const char_type *srccur = src; + // We need at least four characters. +#ifdef __clang__ + // If possible, we read four characters at a time. (It is an optimization.) + if (ignore_garbage && src + 4 <= srcend) { + char_type c0 = src[0]; + char_type c1 = src[1]; + char_type c2 = src[2]; + char_type c3 = src[3]; + uint8_t code0 = to_base64[uint8_t(c0)]; + uint8_t code1 = to_base64[uint8_t(c1)]; + uint8_t code2 = to_base64[uint8_t(c2)]; + uint8_t code3 = to_base64[uint8_t(c3)]; + buffer[idx] = code0; + idx += (is_eight_byte(c0) && code0 <= 63); + buffer[idx] = code1; + idx += (is_eight_byte(c1) && code1 <= 63); + buffer[idx] = code2; + idx += (is_eight_byte(c2) && code2 <= 63); + buffer[idx] = code3; + idx += (is_eight_byte(c3) && code3 <= 63); + src += 4; + } +#endif + while (idx < 4 && src < srcend) { + char_type c = *src; + uint8_t code = to_base64[uint8_t(c)]; + + buffer[idx] = uint8_t(code); + if (is_eight_byte(c) && code <= 63) { + idx++; + } else if (!ignore_garbage && + (code > 64 || !scalar::base64::is_eight_byte(c))) { + outlen = size_t(dst - dstinit); + srcr = src; + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + } else { + // We have a space or a newline or garbage. We ignore it. + } + src++; + } + if (idx != 4) { + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::strict && + ((idx + padded_characters) & 3) != 0) { + outlen = size_t(dst - dstinit); + srcr = src; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; + } else if (!ignore_garbage && + last_chunk_options == + last_chunk_handling_options::stop_before_partial && + ((idx + padded_characters) & 3) != 0) { + // Rewind src to before partial chunk + srcr = srccur; + outlen = size_t(dst - dstinit); + return {SUCCESS, size_t(dst - dstinit)}; + } else { // loose mode + if (idx == 0) { + // No data left; return success + outlen = size_t(dst - dstinit); + srcr = src; + return {SUCCESS, size_t(dst - dstinit)}; + } else if (!ignore_garbage && idx == 1) { + // Error: Incomplete chunk of length 1 is invalid in loose mode + outlen = size_t(dst - dstinit); + srcr = src; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; + } else if (idx == 2 || idx == 3) { + // Check if there's enough space in the destination buffer + size_t required_space = (idx == 2) ? 1 : 2; + if (size_t(dstend - dst) < required_space) { + outlen = size_t(dst - dstinit); + srcr = src; + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; + } + uint32_t triple = 0; + if (idx == 2) { + triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + srcr = src; + return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; + } + // Extract the first byte + triple >>= 16; + dst[0] = static_cast(triple & 0xFF); + dst += 1; + } else if (idx == 3) { + triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12) + + (uint32_t(buffer[2]) << 6); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + srcr = src; + return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; + } + // Extract the first two bytes + triple >>= 8; + dst[0] = static_cast((triple >> 8) & 0xFF); + dst[1] = static_cast(triple & 0xFF); + dst += 2; + } + outlen = size_t(dst - dstinit); + srcr = src; + return {SUCCESS, size_t(dst - dstinit)}; + } + } + } + + if (dstend - dst < 3) { + outlen = size_t(dst - dstinit); + srcr = src; + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; + } + uint32_t triple = (uint32_t(buffer[0]) << 18) + + (uint32_t(buffer[1]) << 12) + (uint32_t(buffer[2]) << 6) + + (uint32_t(buffer[3])); + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 3); + } else { + triple = scalar::u32_swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 3); + } + dst += 3; + } +} + +// Returns the number of bytes written. The destination buffer must be large +// enough. It will add padding (=) if needed. +size_t tail_encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + // This looks like 3 branches, but we expect the compiler to resolve this to + // a single branch: + const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 + : tables::base64::base64_default::e0; + const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 + : tables::base64::base64_default::e1; + const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 + : tables::base64::base64_default::e2; + char *out = dst; + size_t i = 0; + uint8_t t1, t2, t3; + for (; i + 2 < srclen; i += 3) { + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + t3 = uint8_t(src[i + 2]); + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + } + switch (srclen - i) { + case 0: + break; + case 1: + t1 = uint8_t(src[i]); + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + if (use_padding) { + *out++ = '='; + *out++ = '='; + } + break; + default: /* case 2 */ + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + if (use_padding) { + *out++ = '='; + } + } + return (size_t)(out - dst); +} + +template +simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char_type *input, size_t length) noexcept { + // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode + size_t padding = 0; + if (length > 0) { + if (input[length - 1] == '=') { + padding++; + if (length > 1 && input[length - 2] == '=') { + padding++; + } + } + } + size_t actual_length = length - padding; + if (actual_length % 4 <= 1) { + return actual_length / 4 * 3; + } + // if we have a valid input, then the remainder must be 2 or 3 adding one or + // two extra bytes. + return actual_length / 4 * 3 + (actual_length % 4) - 1; +} + +simdutf_warn_unused size_t +base64_length_from_binary(size_t length, base64_options options) noexcept { + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + if (!use_padding) { + return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); + } + return (length + 2) / 3 * + 4; // We use padding to make the length a multiple of 4. +} + +} // namespace base64 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/base64.h */ +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ #ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H #define SIMDUTF_VALID_UTF32_TO_UTF8_H @@ -14498,7 +14265,9 @@ inline result convert_with_errors(const char32_t *buf, size_t len, #endif /* end file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ #ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H #define SIMDUTF_VALID_UTF32_TO_UTF16_H @@ -14519,7 +14288,7 @@ inline size_t convert_valid(const char32_t *buf, size_t len, if ((word & 0xFFFF0000) == 0) { // will not generate a surrogate pair *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(uint16_t(word))) + ? char16_t(u16_swap_bytes(uint16_t(word))) : char16_t(word); pos++; } else { @@ -14528,8 +14297,8 @@ inline size_t convert_valid(const char32_t *buf, size_t len, uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -14568,7 +14337,7 @@ inline size_t convert(const char32_t *buf, size_t len, char16_t *utf16_output) { } // will not generate a surrogate pair *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(uint16_t(word))) + ? char16_t(u16_swap_bytes(uint16_t(word))) : char16_t(word); } else { // will generate a surrogate pair @@ -14579,8 +14348,8 @@ inline size_t convert(const char32_t *buf, size_t len, char16_t *utf16_output) { uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -14604,7 +14373,7 @@ inline result convert_with_errors(const char32_t *buf, size_t len, } // will not generate a surrogate pair *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(uint16_t(word))) + ? char16_t(u16_swap_bytes(uint16_t(word))) : char16_t(word); } else { // will generate a surrogate pair @@ -14615,8 +14384,8 @@ inline result convert_with_errors(const char32_t *buf, size_t len, uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -14633,7 +14402,9 @@ inline result convert_with_errors(const char32_t *buf, size_t len, #endif /* end file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ #ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H #define SIMDUTF_VALID_UTF16_TO_UTF8_H @@ -14662,7 +14433,7 @@ inline size_t convert_valid(const char16_t *buf, size_t len, size_t final_pos = pos + 4; while (pos < final_pos) { *utf8_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(buf[pos])) + ? char(u16_swap_bytes(buf[pos])) : char(buf[pos]); pos++; } @@ -14671,7 +14442,7 @@ inline size_t convert_valid(const char16_t *buf, size_t len, } uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; if ((word & 0xFF80) == 0) { // will generate one UTF-8 bytes *utf8_output++ = char(word); @@ -14696,7 +14467,7 @@ inline size_t convert_valid(const char16_t *buf, size_t len, return 0; } // minimal bound checking uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) + ? u16_swap_bytes(data[pos + 1]) : data[pos + 1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); uint32_t value = (diff << 10) + diff2 + 0x10000; @@ -14746,7 +14517,7 @@ inline size_t convert(const char16_t *buf, size_t len, char *utf8_output) { size_t final_pos = pos + 4; while (pos < final_pos) { *utf8_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(buf[pos])) + ? char(u16_swap_bytes(buf[pos])) : char(buf[pos]); pos++; } @@ -14754,7 +14525,7 @@ inline size_t convert(const char16_t *buf, size_t len, char *utf8_output) { } } uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; if ((word & 0xFF80) == 0) { // will generate one UTF-8 bytes *utf8_output++ = char(word); @@ -14782,7 +14553,7 @@ inline size_t convert(const char16_t *buf, size_t len, char *utf8_output) { return 0; } uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) + ? u16_swap_bytes(data[pos + 1]) : data[pos + 1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); if (diff2 > 0x3FF) { @@ -14819,7 +14590,7 @@ inline result convert_with_errors(const char16_t *buf, size_t len, size_t final_pos = pos + 4; while (pos < final_pos) { *utf8_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(buf[pos])) + ? char(u16_swap_bytes(buf[pos])) : char(buf[pos]); pos++; } @@ -14827,7 +14598,7 @@ inline result convert_with_errors(const char16_t *buf, size_t len, } } uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; if ((word & 0xFF80) == 0) { // will generate one UTF-8 bytes *utf8_output++ = char(word); @@ -14855,7 +14626,7 @@ inline result convert_with_errors(const char16_t *buf, size_t len, return result(error_code::SURROGATE, pos); } uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) + ? u16_swap_bytes(data[pos + 1]) : data[pos + 1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); if (diff2 > 0x3FF) { @@ -14881,7 +14652,9 @@ inline result convert_with_errors(const char16_t *buf, size_t len, #endif /* end file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ #ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H #define SIMDUTF_VALID_UTF16_TO_UTF32_H @@ -14899,7 +14672,7 @@ inline size_t convert_valid(const char16_t *buf, size_t len, char32_t *start{utf32_output}; while (pos < len) { uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; if ((word & 0xF800) != 0xD800) { // No surrogate pair, extend 16-bit word to 32-bit word *utf32_output++ = char32_t(word); @@ -14911,7 +14684,7 @@ inline size_t convert_valid(const char16_t *buf, size_t len, return 0; } // minimal bound checking uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) + ? u16_swap_bytes(data[pos + 1]) : data[pos + 1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); uint32_t value = (diff << 10) + diff2 + 0x10000; @@ -14945,7 +14718,7 @@ inline size_t convert(const char16_t *buf, size_t len, char32_t *utf32_output) { char32_t *start{utf32_output}; while (pos < len) { uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; if ((word & 0xF800) != 0xD800) { // No surrogate pair, extend 16-bit word to 32-bit word *utf32_output++ = char32_t(word); @@ -14960,7 +14733,7 @@ inline size_t convert(const char16_t *buf, size_t len, char32_t *utf32_output) { return 0; } // minimal bound checking uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) + ? u16_swap_bytes(data[pos + 1]) : data[pos + 1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); if (diff2 > 0x3FF) { @@ -14982,7 +14755,7 @@ inline result convert_with_errors(const char16_t *buf, size_t len, char32_t *start{utf32_output}; while (pos < len) { uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; if ((word & 0xF800) != 0xD800) { // No surrogate pair, extend 16-bit word to 32-bit word *utf32_output++ = char32_t(word); @@ -14997,7 +14770,7 @@ inline result convert_with_errors(const char16_t *buf, size_t len, return result(error_code::SURROGATE, pos); } // minimal bound checking uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) + ? u16_swap_bytes(data[pos + 1]) : data[pos + 1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); if (diff2 > 0x3FF) { @@ -15018,7 +14791,10 @@ inline result convert_with_errors(const char16_t *buf, size_t len, #endif /* end file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && \ + (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) /* begin file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ #ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H #define SIMDUTF_VALID_UTF8_TO_UTF16_H @@ -15044,7 +14820,7 @@ inline size_t convert_valid(const char *buf, size_t len, size_t final_pos = pos + 8; while (pos < final_pos) { *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(buf[pos])) + ? char16_t(u16_swap_bytes(buf[pos])) : char16_t(buf[pos]); pos++; } @@ -15055,7 +14831,7 @@ inline size_t convert_valid(const char *buf, size_t len, if (leading_byte < 0b10000000) { // converting one ASCII byte !!! *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(leading_byte)) + ? char16_t(u16_swap_bytes(leading_byte)) : char16_t(leading_byte); pos++; } else if ((leading_byte & 0b11100000) == 0b11000000) { @@ -15067,7 +14843,7 @@ inline size_t convert_valid(const char *buf, size_t len, uint16_t code_point = uint16_t(((leading_byte & 0b00011111) << 6) | (data[pos + 1] & 0b00111111)); if (!match_system(big_endian)) { - code_point = utf16::swap_bytes(uint16_t(code_point)); + code_point = u16_swap_bytes(uint16_t(code_point)); } *utf16_output++ = char16_t(code_point); pos += 2; @@ -15081,7 +14857,7 @@ inline size_t convert_valid(const char *buf, size_t len, ((data[pos + 1] & 0b00111111) << 6) | (data[pos + 2] & 0b00111111)); if (!match_system(big_endian)) { - code_point = utf16::swap_bytes(uint16_t(code_point)); + code_point = u16_swap_bytes(uint16_t(code_point)); } *utf16_output++ = char16_t(code_point); pos += 3; @@ -15098,8 +14874,8 @@ inline size_t convert_valid(const char *buf, size_t len, uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -15146,7 +14922,7 @@ inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { size_t final_pos = pos + 16; while (pos < final_pos) { *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(buf[pos])) + ? char16_t(u16_swap_bytes(buf[pos])) : char16_t(buf[pos]); pos++; } @@ -15158,7 +14934,7 @@ inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { if (leading_byte < 0b10000000) { // converting one ASCII byte !!! *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(leading_byte)) + ? char16_t(u16_swap_bytes(leading_byte)) : char16_t(leading_byte); pos++; } else if ((leading_byte & 0b11100000) == 0b11000000) { @@ -15177,7 +14953,7 @@ inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { return 0; } if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); } *utf16_output++ = char16_t(code_point); pos += 2; @@ -15203,7 +14979,7 @@ inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { return 0; } if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); } *utf16_output++ = char16_t(code_point); pos += 3; @@ -15234,8 +15010,8 @@ inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -15266,7 +15042,7 @@ inline result convert_with_errors(const char *buf, size_t len, size_t final_pos = pos + 16; while (pos < final_pos) { *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(buf[pos])) + ? char16_t(u16_swap_bytes(buf[pos])) : char16_t(buf[pos]); pos++; } @@ -15277,7 +15053,7 @@ inline result convert_with_errors(const char *buf, size_t len, if (leading_byte < 0b10000000) { // converting one ASCII byte !!! *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(leading_byte)) + ? char16_t(u16_swap_bytes(leading_byte)) : char16_t(leading_byte); pos++; } else if ((leading_byte & 0b11100000) == 0b11000000) { @@ -15296,7 +15072,7 @@ inline result convert_with_errors(const char *buf, size_t len, return result(error_code::OVERLONG, pos); } if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); } *utf16_output++ = char16_t(code_point); pos += 2; @@ -15324,7 +15100,7 @@ inline result convert_with_errors(const char *buf, size_t len, return result(error_code::SURROGATE, pos); } if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); } *utf16_output++ = char16_t(code_point); pos += 3; @@ -15358,8 +15134,8 @@ inline result convert_with_errors(const char *buf, size_t len, uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -15447,7 +15223,10 @@ inline result rewind_and_convert_with_errors(size_t prior_bytes, #endif /* end file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 /* begin file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ #ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H #define SIMDUTF_VALID_UTF8_TO_UTF32_H @@ -15809,7 +15588,117 @@ inline result rewind_and_convert_with_errors(size_t prior_bytes, #endif /* end file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF8_H +#define SIMDUTF_LATIN1_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf8 { + +inline size_t convert(const char *buf, size_t len, char *utf8_output) { + const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t utf8_pos = 0; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + utf8_output[utf8_pos++] = char(buf[pos]); + pos++; + } + continue; + } + } + + unsigned char byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_pos; +} + +inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, + size_t utf8_len) { + const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t skip_pos = 0; + size_t utf8_pos = 0; + while (pos < len && utf8_pos < utf8_len) { + // try to convert the next block of 16 ASCII bytes + if (pos >= skip_pos && pos + 16 <= len && + utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, + // check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + ::memcpy(utf8_output + utf8_pos, buf + pos, 16); + utf8_pos += 16; + pos += 16; + } else { + // At least one of the next 16 bytes are not ASCII, we will process them + // one by one + skip_pos = pos + 16; + } + } else { + const auto byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else if (utf8_pos + 2 <= utf8_len) { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } else { + break; + } + } + } + return utf8_pos; +} + +} // namespace latin1_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ #ifndef SIMDUTF_LATIN1_TO_UTF16_H #define SIMDUTF_LATIN1_TO_UTF16_H @@ -15829,7 +15718,7 @@ inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point *utf16_output++ = - char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word)); + char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); pos++; } @@ -15847,7 +15736,7 @@ inline result convert_with_errors(const char *buf, size_t len, uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point *utf16_output++ = - char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word)); + char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); pos++; } @@ -15861,6 +15750,8 @@ inline result convert_with_errors(const char *buf, size_t len, #endif /* end file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ #ifndef SIMDUTF_LATIN1_TO_UTF32_H #define SIMDUTF_LATIN1_TO_UTF32_H @@ -15886,7 +15777,9 @@ inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { #endif /* end file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ #ifndef SIMDUTF_UTF8_TO_LATIN1_H #define SIMDUTF_UTF8_TO_LATIN1_H @@ -16096,6 +15989,8 @@ inline result rewind_and_convert_with_errors(size_t prior_bytes, #endif /* end file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ #ifndef SIMDUTF_UTF16_TO_LATIN1_H #define SIMDUTF_UTF16_TO_LATIN1_H @@ -16119,7 +16014,7 @@ inline size_t convert(const char16_t *buf, size_t len, char *latin_output) { uint16_t too_large = 0; while (pos < len) { - word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; too_large |= word; *current_write++ = char(word & 0xFF); pos++; @@ -16168,14 +16063,14 @@ inline result convert_with_errors(const char16_t *buf, size_t len, size_t final_pos = pos + 16; while (pos < final_pos) { *latin_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(data[pos])) + ? char(u16_swap_bytes(data[pos])) : char(data[pos]); pos++; } continue; } } - word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; if ((word & 0xFF00) == 0) { *latin_output++ = char(word & 0xFF); pos++; @@ -16193,6 +16088,8 @@ inline result convert_with_errors(const char16_t *buf, size_t len, #endif /* end file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ #ifndef SIMDUTF_UTF32_TO_LATIN1_H #define SIMDUTF_UTF32_TO_LATIN1_H @@ -16257,7 +16154,9 @@ inline result convert_with_errors(const char32_t *buf, size_t len, #endif /* end file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ #ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H #define SIMDUTF_VALID_UTF8_TO_LATIN1_H @@ -16338,6 +16237,8 @@ inline size_t convert_valid(const char *buf, size_t len, char *latin_output) { #endif /* end file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ #ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H #define SIMDUTF_VALID_UTF16_TO_LATIN1_H @@ -16356,7 +16257,7 @@ inline size_t convert_valid(const char16_t *buf, size_t len, uint16_t word = 0; while (pos < len) { - word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; *latin_output++ = char(word); pos++; } @@ -16371,6 +16272,8 @@ inline size_t convert_valid(const char16_t *buf, size_t len, #endif /* end file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ #ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H #define SIMDUTF_VALID_UTF32_TO_LATIN1_H @@ -16422,6 +16325,2235 @@ inline size_t convert_valid(const char32_t *buf, size_t len, #endif /* end file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +/* begin file src/implementation.cpp */ +#include +#include +#include + +static_assert(sizeof(uint8_t) == sizeof(char), + "simdutf requires that uint8_t be a char"); +static_assert(sizeof(uint16_t) == sizeof(char16_t), + "simdutf requires that char16_t be 16 bits"); +static_assert(sizeof(uint32_t) == sizeof(char32_t), + "simdutf requires that char32_t be 32 bits"); +// next line is redundant, but it is kept to catch defective systems. +static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes"); + +// Useful for debugging purposes +namespace simdutf { +namespace { + +template std::string toBinaryString(T b) { + std::string binary = ""; + T mask = T(1) << (sizeof(T) * CHAR_BIT - 1); + while (mask > 0) { + binary += ((b & mask) == 0) ? '0' : '1'; + mask >>= 1; + } + return binary; +} +} // namespace +} // namespace simdutf + +namespace simdutf { +bool implementation::supported_by_runtime_system() const { + uint32_t required_instruction_sets = this->required_instruction_sets(); + uint32_t supported_instruction_sets = + internal::detect_supported_architectures(); + return ((supported_instruction_sets & required_instruction_sets) == + required_instruction_sets); +} + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused encoding_type implementation::autodetect_encoding( + const char *input, size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + // UTF8 is common, it includes ASCII, and is commonly represented + // without a BOM, so if it fits, go with that. Note that it is still + // possible to get it wrong, we are only 'guessing'. If some has UTF-16 + // data without a BOM, it could pass as UTF-8. + // + // An interesting twist might be to check for UTF-16 ASCII first (every + // other byte is zero). + if (validate_utf8(input, length)) { + return encoding_type::UTF8; + } + // The next most common encoding that might appear without BOM is probably + // UTF-16LE, so try that next. + if ((length % 2) == 0) { + // important: we need to divide by two + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + return encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + return encoding_type::UTF32_LE; + } + } + return encoding_type::unspecified; +} + + #ifdef SIMDUTF_INTERNAL_TESTS +std::vector +implementation::internal_tests() const { + return {}; +} + #endif +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} +#endif // SIMDUTF_FEATURE_BASE64 + +namespace internal { +// When there is a single implementation, we should not pay a price +// for dispatching to the best implementation. We should just use the +// one we have. This is a compile-time check. +#define SIMDUTF_SINGLE_IMPLEMENTATION \ + (SIMDUTF_IMPLEMENTATION_ICELAKE + SIMDUTF_IMPLEMENTATION_HASWELL + \ + SIMDUTF_IMPLEMENTATION_WESTMERE + SIMDUTF_IMPLEMENTATION_ARM64 + \ + SIMDUTF_IMPLEMENTATION_PPC64 + SIMDUTF_IMPLEMENTATION_LSX + \ + SIMDUTF_IMPLEMENTATION_LASX + SIMDUTF_IMPLEMENTATION_FALLBACK == \ + 1) + +// Static array of known implementations. We are hoping these get baked into the +// executable without requiring a static initializer. + +#if SIMDUTF_IMPLEMENTATION_ICELAKE +static const icelake::implementation *get_icelake_singleton() { + static const icelake::implementation icelake_singleton{}; + return &icelake_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL +static const haswell::implementation *get_haswell_singleton() { + static const haswell::implementation haswell_singleton{}; + return &haswell_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_WESTMERE +static const westmere::implementation *get_westmere_singleton() { + static const westmere::implementation westmere_singleton{}; + return &westmere_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 +static const arm64::implementation *get_arm64_singleton() { + static const arm64::implementation arm64_singleton{}; + return &arm64_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 +static const ppc64::implementation *get_ppc64_singleton() { + static const ppc64::implementation ppc64_singleton{}; + return &ppc64_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_RVV +static const rvv::implementation *get_rvv_singleton() { + static const rvv::implementation rvv_singleton{}; + return &rvv_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_LSX +static const lsx::implementation *get_lsx_singleton() { + static const lsx::implementation lsx_singleton{}; + return &lsx_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_LASX +static const lasx::implementation *get_lasx_singleton() { + static const lasx::implementation lasx_singleton{}; + return &lasx_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK +static const fallback::implementation *get_fallback_singleton() { + static const fallback::implementation fallback_singleton{}; + return &fallback_singleton; +} +#endif + +#if SIMDUTF_SINGLE_IMPLEMENTATION +static const implementation *get_single_implementation() { + return + #if SIMDUTF_IMPLEMENTATION_ICELAKE + get_icelake_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_HASWELL + get_haswell_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_WESTMERE + get_westmere_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_ARM64 + get_arm64_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_PPC64 + get_ppc64_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_LASX + get_lasx_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_FALLBACK + get_fallback_singleton(); + #endif +} +#endif + +/** + * @private Detects best supported implementation on first use, and sets it + */ +class detect_best_supported_implementation_on_first_use final + : public implementation { +public: + std::string name() const noexcept final { return set_best()->name(); } + std::string description() const noexcept final { + return set_best()->description(); + } + uint32_t required_instruction_sets() const noexcept final { + return set_best()->required_instruction_sets(); + } + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int + detect_encodings(const char *input, size_t length) const noexcept override { + return set_best()->detect_encodings(input, length); + } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf8(const char *buf, size_t len) const noexcept final override { + return set_best()->validate_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result validate_utf8_with_errors( + const char *buf, size_t len) const noexcept final override { + return set_best()->validate_utf8_with_errors(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_ascii(const char *buf, size_t len) const noexcept final override { + return set_best()->validate_ascii(buf, len); + } + + simdutf_warn_unused result validate_ascii_with_errors( + const char *buf, size_t len) const noexcept final override { + return set_best()->validate_ascii_with_errors(buf, len); + } +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf16le(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16le(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool + validate_utf16be(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16be(buf, len); + } + + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf16le_with_errors(buf, len); + } + + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf16be_with_errors(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf32(const char32_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf32(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf32_with_errors(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_latin1_to_utf8(const char *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_latin1_to_utf8(buf, len, utf8_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, + char32_t *latin1_output) const noexcept final override { + return set_best()->convert_latin1_to_utf32(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf8_to_latin1(const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf8_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf8_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_latin1(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf8_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf8_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf8_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + convert_utf16le_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t + convert_utf16be_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, + utf8_output); + } + + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, + utf8_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf32_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + } + + simdutf_warn_unused size_t + convert_valid_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t len, + char16_t *output) const noexcept final override { + set_best()->change_endianness_utf16(buf, len, output); + } + + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t len) const noexcept final override { + return set_best()->count_utf16le(buf, len); + } + + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t len) const noexcept final override { + return set_best()->count_utf16be(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t + count_utf8(const char *buf, size_t len) const noexcept final override { + return set_best()->count_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->latin1_length_from_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_latin1(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf16le(buf, len); + } + + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf16be(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf16le(buf, len); + } + + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf16be(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->utf16_length_from_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf32(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *buf, size_t len) const noexcept override { + return set_best()->utf16_length_from_utf32(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override { + return set_best()->binary_to_base64(input, length, output, options); + } +#endif // SIMDUTF_FEATURE_BASE64 + + simdutf_really_inline + detect_best_supported_implementation_on_first_use() noexcept + : implementation("best_supported_detector", + "Detects the best supported implementation and sets it", + 0) {} + +private: + const implementation *set_best() const noexcept; +}; + +static_assert(std::is_trivially_destructible< + detect_best_supported_implementation_on_first_use>::value, + "detect_best_supported_implementation_on_first_use should be " + "trivially destructible"); + +static const std::initializer_list & +get_available_implementation_pointers() { + static const std::initializer_list + available_implementation_pointers{ +#if SIMDUTF_IMPLEMENTATION_ICELAKE + get_icelake_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL + get_haswell_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_WESTMERE + get_westmere_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 + get_arm64_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 + get_ppc64_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_RVV + get_rvv_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_LASX + get_lasx_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK + get_fallback_singleton(), +#endif + }; // available_implementation_pointers + return available_implementation_pointers; +} + +// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no +// support +class unsupported_implementation final : public implementation { +public: +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *, + size_t) const noexcept override { + return encoding_type::unspecified; + } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *, + size_t) const noexcept final override { + return false; // Just refuse to validate. Given that we have a fallback + // implementation + // it seems unlikely that unsupported_implementation will ever be used. If + // it is used, then it will flag all strings as invalid. The alternative is + // to return an error_code from which the user has to figure out whether the + // string is valid UTF-8... which seems like a lot of work just to handle + // the very unlikely case that we have an unsupported implementation. And, + // when it does happen (that we have an unsupported implementation), what + // are the chances that the programmer has a fallback? Given that *we* + // provide the fallback, it implies that the programmer would need a + // fallback for our fallback. + } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result validate_utf8_with_errors( + const char *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_ascii(const char *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused result validate_ascii_with_errors( + const char *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf16le(const char16_t *, size_t) const noexcept final override { + return false; + } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool + validate_utf16be(const char16_t *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf32(const char32_t *, size_t) const noexcept final override { + return false; + } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf16le_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf32_to_latin1( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *, size_t, + char16_t *) const noexcept final override {} + + simdutf_warn_unused size_t + count_utf16le(const char16_t *, size_t) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t + count_utf16be(const char16_t *, size_t) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *, + size_t) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result + base64_to_binary(const char *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + + simdutf_warn_unused result + base64_to_binary(const char16_t *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + + size_t binary_to_base64(const char *, size_t, char *, + base64_options) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_BASE64 + + unsupported_implementation() + : implementation("unsupported", + "Unsupported CPU (no detected SIMD instructions)", 0) {} +}; + +const unsupported_implementation *get_unsupported_singleton() { + static const unsupported_implementation unsupported_singleton{}; + return &unsupported_singleton; +} +static_assert(std::is_trivially_destructible::value, + "unsupported_singleton should be trivially destructible"); + +size_t available_implementation_list::size() const noexcept { + return internal::get_available_implementation_pointers().size(); +} +const implementation *const * +available_implementation_list::begin() const noexcept { + return internal::get_available_implementation_pointers().begin(); +} +const implementation *const * +available_implementation_list::end() const noexcept { + return internal::get_available_implementation_pointers().end(); +} +const implementation * +available_implementation_list::detect_best_supported() const noexcept { + // They are prelisted in priority order, so we just go down the list + uint32_t supported_instruction_sets = + internal::detect_supported_architectures(); + for (const implementation *impl : + internal::get_available_implementation_pointers()) { + uint32_t required_instruction_sets = impl->required_instruction_sets(); + if ((supported_instruction_sets & required_instruction_sets) == + required_instruction_sets) { + return impl; + } + } + return get_unsupported_singleton(); // this should never happen? +} + +const implementation * +detect_best_supported_implementation_on_first_use::set_best() const noexcept { + SIMDUTF_PUSH_DISABLE_WARNINGS + SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: + // manually verified this is safe + char *force_implementation_name = getenv("SIMDUTF_FORCE_IMPLEMENTATION"); + SIMDUTF_POP_DISABLE_WARNINGS + + if (force_implementation_name) { + auto force_implementation = + get_available_implementations()[force_implementation_name]; + if (force_implementation) { + return get_active_implementation() = force_implementation; + } else { + // Note: abort() and stderr usage within the library is forbidden. + return get_active_implementation() = get_unsupported_singleton(); + } + } + return get_active_implementation() = + get_available_implementations().detect_best_supported(); +} + +} // namespace internal + +/** + * The list of available implementations compiled into simdutf. + */ +SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & +get_available_implementations() { + static const internal::available_implementation_list + available_implementations{}; + return available_implementations; +} + +/** + * The active implementation. + */ +SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & +get_active_implementation() { +#if SIMDUTF_SINGLE_IMPLEMENTATION + // skip runtime detection + static internal::atomic_ptr active_implementation{ + internal::get_single_implementation()}; + return active_implementation; +#else + static const internal::detect_best_supported_implementation_on_first_use + detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{ + &detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +#endif +} + +#if SIMDUTF_SINGLE_IMPLEMENTATION +const implementation *get_default_implementation() { + return internal::get_single_implementation(); +} +#else +internal::atomic_ptr &get_default_implementation() { + return get_active_implementation(); +} +#endif +#define SIMDUTF_GET_CURRENT_IMPLEMENTION + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { + return get_default_implementation()->validate_utf8(buf, len); +} +simdutf_warn_unused result validate_utf8_with_errors(const char *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept { + return get_default_implementation()->validate_ascii(buf, len); +} +simdutf_warn_unused result validate_ascii_with_errors(const char *buf, + size_t len) noexcept { + return get_default_implementation()->validate_ascii_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_output) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf8_to_utf16be(input, length, utf16_output); + #else + return convert_utf8_to_utf16le(input, length, utf16_output); + #endif +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf8(const char *buf, size_t len, + char *utf8_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf8(buf, len, + utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf16le(buf, len, + utf16_output); +} +simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf16be(buf, len, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *latin1_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf32(buf, len, + latin1_output); +} +simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept { + return length; +} +simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept { + return length; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf8_to_latin1(buf, len, + latin1_output); +} +simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf8_to_latin1_with_errors( + buf, len, latin1_output); +} +simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_valid_utf8_to_latin1( + buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16le(input, length, + utf16_output); +} +simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16be(input, length, + utf16_output); +} +simdutf_warn_unused result convert_utf8_to_utf16_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf8_to_utf16be_with_errors(input, length, utf16_output); + #else + return convert_utf8_to_utf16le_with_errors(input, length, utf16_output); + #endif +} +simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16le_with_errors( + input, length, utf16_output); +} +simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16be_with_errors( + input, length, utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf32(input, length, + utf32_output); +} +simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *input, size_t length, char32_t *utf32_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf32_with_errors( + input, length, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool validate_utf16(const char16_t *buf, + size_t len) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return validate_utf16be(buf, len); + #else + return validate_utf16le(buf, len); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16le(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16be(buf, len); +} +simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, + size_t len) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return validate_utf16be_with_errors(buf, len); + #else + return validate_utf16le_with_errors(buf, len); + #endif +} +simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16le_with_errors(buf, len); +} +simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16be_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf32(buf, len); +} +simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf32_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_valid_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf8_to_utf16be(input, length, utf16_buffer); + #else + return convert_valid_utf8_to_utf16le(input, length, utf16_buffer); + #endif +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf16le( + input, length, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf16be( + input, length, utf16_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf32( + input, length, utf32_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf8(buf, len, utf8_buffer); + #else + return convert_utf16le_to_utf8(buf, len, utf8_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_utf16_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_latin1(buf, len, latin1_buffer); + #else + return convert_utf16le_to_latin1(buf, len, latin1_buffer); + #endif +} +simdutf_warn_unused size_t convert_latin1_to_utf16( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_latin1_to_utf16be(buf, len, utf16_output); + #else + return convert_latin1_to_utf16le(buf, len, utf16_output); + #endif +} +simdutf_warn_unused size_t convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_latin1(buf, len, + latin1_buffer); +} +simdutf_warn_unused size_t convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_latin1(buf, len, + latin1_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_latin1( + buf, len, latin1_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_latin1( + buf, len, latin1_buffer); +} +simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_latin1_with_errors( + buf, len, latin1_buffer); +} +simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_latin1_with_errors( + buf, len, latin1_buffer); +} +simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept { + return length; +} +simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept { + return length; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused result convert_utf16_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); + #else + return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused result convert_utf16_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer); + #else + return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); + #else + return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_valid_utf16_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer); + #else + return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_utf8( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_utf8( + buf, len, utf8_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf8(buf, len, + utf8_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf32_to_utf16( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf32_to_utf16be(buf, len, utf16_buffer); + #else + return convert_utf32_to_utf16le(buf, len, utf16_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_utf32_to_latin1( + const char32_t *input, size_t length, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf32_to_latin1(input, length, + latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16le(buf, len, + utf16_buffer); +} +simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16be(buf, len, + utf16_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf16_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer); + #else + return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer); + #endif +} +simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16le_with_errors( + buf, len, utf16_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16be_with_errors( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer); + #else + return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer); + #endif +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf16le( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf16be( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_utf16_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf32(buf, len, utf32_buffer); + #else + return convert_utf16le_to_utf32(buf, len, utf32_buffer); + #endif +} +simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf32(buf, len, + utf32_buffer); +} +simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf32(buf, len, + utf32_buffer); +} +simdutf_warn_unused result convert_utf16_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer); + #else + return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer); + #endif +} +simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf32_with_errors( + buf, len, utf32_buffer); +} +simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf32_with_errors( + buf, len, utf32_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); + #else + return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer); + #endif +} +simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_utf32( + buf, len, utf32_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_utf32( + buf, len, utf32_buffer); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void change_endianness_utf16(const char16_t *input, size_t length, + char16_t *output) noexcept { + get_default_implementation()->change_endianness_utf16(input, length, output); +} +simdutf_warn_unused size_t count_utf16(const char16_t *input, + size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return count_utf16be(input, length); + #else + return count_utf16le(input, length); + #endif +} +simdutf_warn_unused size_t count_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->count_utf16le(input, length); +} +simdutf_warn_unused size_t count_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->count_utf16be(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t count_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->count_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, + size_t len) noexcept { + return get_default_implementation()->latin1_length_from_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, + size_t len) noexcept { + return get_default_implementation()->utf8_length_from_latin1(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, + size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return utf8_length_from_utf16be(input, length); + #else + return utf8_length_from_utf16le(input, length); + #endif +} +simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf16le(input, length); +} +simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf16be(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, + size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return utf32_length_from_utf16be(input, length); + #else + return utf32_length_from_utf16le(input, length); + #endif +} +simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf16le(input, length); +} +simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf16be(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->utf16_length_from_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, + size_t length) noexcept { + return get_default_implementation()->utf16_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused size_t +maximal_binary_length_from_base64(const char *input, size_t length) noexcept { + return get_default_implementation()->maximal_binary_length_from_base64( + input, length); +} + +simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return get_default_implementation()->base64_to_binary( + input, length, output, options, last_chunk_handling_options); +} + +simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) noexcept { + return get_default_implementation()->maximal_binary_length_from_base64( + input, length); +} + +simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return get_default_implementation()->base64_to_binary( + input, length, output, options, last_chunk_handling_options); +} + +template +simdutf_warn_unused result base64_to_binary_safe_impl( + const chartype *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + static_assert(std::is_same::value || + std::is_same::value, + "Only char and char16_t are supported."); + // The implementation could be nicer, but we expect that most times, the user + // will provide us with a buffer that is large enough. + size_t max_length = maximal_binary_length_from_base64(input, length); + if (outlen >= max_length) { + // fast path + full_result r = get_default_implementation()->base64_to_binary_details( + input, length, output, options, last_chunk_handling_options); + if (r.error != error_code::INVALID_BASE64_CHARACTER && + r.error != error_code::BASE64_EXTRA_BITS) { + outlen = r.output_count; + if (last_chunk_handling_options == stop_before_partial) { + if ((r.output_count % 3) != 0) { + bool empty_trail = true; + for (size_t i = r.input_count; i < length; i++) { + if (!scalar::base64::is_ascii_white_space_or_padding(input[i])) { + empty_trail = false; + break; + } + } + if (empty_trail) { + r.input_count = length; + } + } + return {r.error, r.input_count}; + } + return {r.error, length}; + } + return r; + } + // The output buffer is maybe too small. We will decode a truncated version of + // the input. + size_t outlen3 = outlen / 3 * 3; // round down to multiple of 3 + size_t safe_input = base64_length_from_binary(outlen3, options); + full_result r = get_default_implementation()->base64_to_binary_details( + input, safe_input, output, options, loose); + if (r.error == error_code::INVALID_BASE64_CHARACTER) { + return r; + } + size_t offset = + (r.error == error_code::BASE64_INPUT_REMAINDER) + ? 1 + : ((r.output_count % 3) == 0 ? 0 : (r.output_count % 3) + 1); + size_t output_index = r.output_count - (r.output_count % 3); + size_t input_index = safe_input; + // offset is a value that is no larger than 3. We backtrack + // by up to offset characters + an undetermined number of + // white space characters. It is expected that the next loop + // runs at most 3 times + the number of white space characters + // in between them, so we are not worried about performance. + while (offset > 0 && input_index > 0) { + chartype c = input[--input_index]; + if (scalar::base64::is_ascii_white_space(c)) { + // skipping + } else { + offset--; + } + } + size_t remaining_out = outlen - output_index; + const chartype *tail_input = input + input_index; + size_t tail_length = length - input_index; + while (tail_length > 0 && + scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { + tail_length--; + } + size_t padding_characts = 0; + if (tail_length > 0 && tail_input[tail_length - 1] == '=') { + tail_length--; + padding_characts++; + while (tail_length > 0 && + scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { + tail_length--; + } + if (tail_length > 0 && tail_input[tail_length - 1] == '=') { + tail_length--; + padding_characts++; + } + } + // this will advance tail_input and tail_length + result rr = scalar::base64::base64_tail_decode_safe( + output + output_index, remaining_out, tail_input, tail_length, + padding_characts, options, last_chunk_handling_options); + outlen = output_index + remaining_out; + if (last_chunk_handling_options != stop_before_partial && + rr.error == error_code::SUCCESS && padding_characts > 0) { + // additional checks + if ((outlen % 3 == 0) || ((outlen % 3) + 1 + padding_characts != 4)) { + rr.error = error_code::INVALID_BASE64_CHARACTER; + } + } + if (rr.error == error_code::SUCCESS && + last_chunk_handling_options == stop_before_partial) { + if (tail_input > input + input_index) { + rr.count = tail_input - input; + } else if (r.input_count > 0) { + rr.count = r.input_count + rr.count; + } + return rr; + } + rr.count += input_index; + return rr; +} + + #if SIMDUTF_ATOMIC_REF +size_t atomic_binary_to_base64(const char *input, size_t length, char *output, + base64_options options) noexcept { + static_assert(std::atomic_ref::required_alignment == 1); + size_t retval = 0; + // Arbitrary block sizes: 3KB for input, 4KB for output. Total is 7KB. + constexpr size_t input_block_size = 1024 * 3; + constexpr size_t output_block_size = input_block_size * 4 / 3; + std::array inbuf; + std::array outbuf; + + // std::atomic_ref must not have a const T, see + // https://cplusplus.github.io/LWG/issue3508 + // we instead provide a mutable input, which is ok since we are only reading + // from it. + char *mutable_input = const_cast(input); + + for (size_t i = 0; i < length; i += input_block_size) { + const size_t current_block_size = std::min(input_block_size, length - i); + // This copy is inefficient. + // Under x64, we could use 16-byte aligned loads. + // Note that we warn users that the performance might be poor. + for (size_t j = 0; j < current_block_size; ++j) { + inbuf[j] = std::atomic_ref(mutable_input[i + j]) + .load(std::memory_order_relaxed); + } + const size_t written = binary_to_base64(inbuf.data(), current_block_size, + outbuf.data(), options); + // This copy is inefficient. + // Under x64, we could use 16-byte aligned stores. + for (size_t j = 0; j < written; ++j) { + std::atomic_ref(output[retval + j]) + .store(outbuf[j], std::memory_order_relaxed); + } + retval += written; + } + return retval; +} + #endif // SIMDUTF_ATOMIC_REF + +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf8_safe( + const char *buf, size_t len, char *utf8_output, size_t utf8_len) noexcept { + const auto start{utf8_output}; + + while (true) { + // convert_latin1_to_utf8 will never write more than input length * 2 + auto read_len = std::min(len, utf8_len >> 1); + if (read_len <= 16) { + break; + } + + const auto write_len = + simdutf::convert_latin1_to_utf8(buf, read_len, utf8_output); + + utf8_output += write_len; + utf8_len -= write_len; + buf += read_len; + len -= read_len; + } + + utf8_output += + scalar::latin1_to_utf8::convert_safe(buf, len, utf8_output, utf8_len); + + return utf8_output - start; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result base64_to_binary_safe( + const char *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return base64_to_binary_safe_impl(input, length, output, outlen, + options, last_chunk_handling_options); +} +simdutf_warn_unused result base64_to_binary_safe( + const char16_t *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options); +} + +simdutf_warn_unused size_t +base64_length_from_binary(size_t length, base64_options options) noexcept { + return get_default_implementation()->base64_length_from_binary(length, + options); +} + +size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) noexcept { + return get_default_implementation()->binary_to_base64(input, length, output, + options); +} +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused simdutf::encoding_type +autodetect_encoding(const char *buf, size_t length) noexcept { + return get_default_implementation()->autodetect_encoding(buf, length); +} + +simdutf_warn_unused int detect_encodings(const char *buf, + size_t length) noexcept { + return get_default_implementation()->detect_encodings(buf, length); +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +const implementation *builtin_implementation() { + static const implementation *builtin_impl = + get_available_implementations()[SIMDUTF_STRINGIFY( + SIMDUTF_BUILTIN_IMPLEMENTATION)]; + return builtin_impl; +} + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) { + return scalar::utf8::trim_partial_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, + size_t length) { + return scalar::utf16::trim_partial_utf16(input, length); +} + +simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, + size_t length) { + return scalar::utf16::trim_partial_utf16(input, length); +} + +simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, + size_t length) { + #if SIMDUTF_IS_BIG_ENDIAN + return trim_partial_utf16be(input, length); + #else + return trim_partial_utf16le(input, length); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 + +} // namespace simdutf +/* end file src/implementation.cpp */ SIMDUTF_PUSH_DISABLE_WARNINGS SIMDUTF_DISABLE_UNDESIRED_WARNINGS @@ -16440,26 +18572,16 @@ namespace { #endif using namespace simd; +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 simdutf_really_inline bool is_ascii(const simd8x64 &input) { simd8 bits = input.reduce_or(); return bits.max_val() < 0b10000000u; } +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller - // is using ^ as well. This will work fine because we only have to report - // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2 - // overlapping multibyte characters, and if that happens, there is guaranteed - // to be at least *one* lead byte that is part of only 1 other multibyte - // character. The error will be detected there. - return is_second_byte ^ is_third_byte ^ is_fourth_byte; -} - +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { @@ -16467,17 +18589,19 @@ must_be_2_3_continuation(const simd8 prev2, simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); return is_third_byte ^ is_fourth_byte; } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) // common functions for utf8 conversions simdutf_really_inline uint16x4_t convert_utf8_3_byte_to_utf16(uint8x16_t in) { // Low half contains 10cccccc|1110aaaa // High half contains 10bbbbbb|10bbbbbb -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO const uint8x16_t sh = simdutf_make_uint8x16_t(0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10); -#else + #else const uint8x16_t sh = {0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10}; -#endif + #endif uint8x16_t perm = vqtbl1q_u8(in, sh); // Split into half vectors. // 10cccccc|1110aaaa @@ -16544,7 +18668,10 @@ convert_utf8_1_to_2_byte_to_utf16(uint8x16_t in, size_t shufutf8_idx) { uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); return composed; } +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/arm64/arm_validate_utf16.cpp */ template const char16_t *arm_validate_utf16(const char16_t *input, size_t size) { @@ -16690,6 +18817,8 @@ const result arm_validate_utf16_with_errors(const char16_t *input, return result(error_code::SUCCESS, input - start); } /* end file src/arm64/arm_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/arm64/arm_validate_utf32le.cpp */ const char32_t *arm_validate_utf32le(const char32_t *input, size_t size) { @@ -16757,7 +18886,9 @@ const result arm_validate_utf32le_with_errors(const char32_t *input, return result(error_code::SUCCESS, input - start); } /* end file src/arm64/arm_validate_utf32le.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/arm64/arm_convert_latin1_to_utf16.cpp */ template std::pair @@ -16784,6 +18915,8 @@ arm_convert_latin1_to_utf16(const char *buf, size_t len, return std::make_pair(buf, utf16_output); } /* end file src/arm64/arm_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/arm64/arm_convert_latin1_to_utf32.cpp */ std::pair arm_convert_latin1_to_utf32(const char *buf, size_t len, @@ -16810,6 +18943,8 @@ arm_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf, utf32_output); } /* end file src/arm64/arm_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/arm64/arm_convert_latin1_to_utf8.cpp */ /* Returns a pair: the first unprocessed byte from buf and utf8_output @@ -16882,7 +19017,9 @@ arm_convert_latin1_to_utf8(const char *latin1_input, size_t len, return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); } /* end file src/arm64/arm_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/arm64/arm_convert_utf8_to_latin1.cpp */ // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask @@ -16954,6 +19091,8 @@ size_t convert_masked_utf8_to_latin1(const char *input, return consumed; } /* end file src/arm64/arm_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */ // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask @@ -17269,6 +19408,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, } } /* end file src/arm64/arm_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */ // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask @@ -17450,7 +19591,9 @@ size_t convert_masked_utf8_to_utf32(const char *input, } } /* end file src/arm64/arm_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/arm64/arm_convert_utf16_to_latin1.cpp */ template @@ -17500,9 +19643,8 @@ arm_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, } else { // Let us do a scalar fallback. for (int k = 0; k < 8; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); } else { @@ -17516,6 +19658,8 @@ arm_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, latin1_output); } /* end file src/arm64/arm_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */ /* The vectorized algorithm works on single SSE register i.e., it @@ -17607,16 +19751,15 @@ arm_convert_utf16_to_utf32(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -17680,16 +19823,15 @@ arm_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -17709,6 +19851,8 @@ arm_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, reinterpret_cast(utf32_output)); } /* end file src/arm64/arm_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 /* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */ /* The vectorized algorithm works on single SSE register i.e., it @@ -17986,9 +20130,8 @@ arm_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -18002,7 +20145,7 @@ arm_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -18258,9 +20401,8 @@ arm_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -18274,7 +20416,7 @@ arm_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -18298,7 +20440,9 @@ arm_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, reinterpret_cast(utf8_output)); } /* end file src/arm64/arm_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_BASE64 /* begin file src/arm64/arm_base64.cpp */ /** * References and further reading: @@ -18627,6 +20771,81 @@ void base64_decode_block(char *out, const char *src) { vst3q_u8((uint8_t *)out, outvec); } +static size_t compress_block_single(block64 *b, uint64_t mask, char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + + // Predefine the index vector +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t v1 = simdutf_make_uint8x16_t(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15); +#else // SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; +#endif // SIMDUTF_REGULAR_VISUAL_STUDIO + + switch (pos64 >> 4) { + case 0b00: { + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), + vreinterpretq_s8_u8(v0)); // Compare greater than + const uint8x16_t sh = vsubq_u8(v1, v2); // Subtract + const uint8x16_t compressed = + vqtbl1q_u8(b->chunks[0], sh); // Table lookup (shuffle) + + vst1q_u8((uint8_t *)(output + 0 * 16), compressed); + vst1q_u8((uint8_t *)(output + 1 * 16 - 1), b->chunks[1]); + vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]); + vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); + } break; + + case 0b01: { + vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); + + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); + const uint8x16_t sh = vsubq_u8(v1, v2); + const uint8x16_t compressed = vqtbl1q_u8(b->chunks[1], sh); + + vst1q_u8((uint8_t *)(output + 1 * 16), compressed); + vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]); + vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); + } break; + + case 0b10: { + vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); + vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]); + + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); + const uint8x16_t sh = vsubq_u8(v1, v2); + const uint8x16_t compressed = vqtbl1q_u8(b->chunks[2], sh); + + vst1q_u8((uint8_t *)(output + 2 * 16), compressed); + vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); + } break; + + case 0b11: { + vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); + vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]); + vst1q_u8((uint8_t *)(output + 2 * 16), b->chunks[2]); + + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); + const uint8x16_t sh = vsubq_u8(v1, v2); + const uint8x16_t compressed = vqtbl1q_u8(b->chunks[3], sh); + + vst1q_u8((uint8_t *)(output + 3 * 16), compressed); + } break; + } + return 63; +} + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + template full_result compress_decode_base64(char *dst, const char_type *src, size_t srclen, @@ -18703,7 +20922,11 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, // optimization opportunity: check for simple masks like those made of // continuous 1s followed by continuous 0s. And masks containing a // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); + if (is_power_of_two(badcharmask)) { + bufferptr += compress_block_single(&b, badcharmask, bufferptr); + } else { + bufferptr += compress_block(&b, badcharmask, bufferptr); + } } else { // optimization opportunity: if bufferptr == buffer and mask == 0, we // can avoid the call to compress_block and decode directly. @@ -18750,7 +20973,7 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - triple = scalar::utf32::swap_bytes(triple); + triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 4); dst += 3; @@ -18762,7 +20985,7 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - triple = scalar::utf32::swap_bytes(triple); + triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 3); dst += 3; @@ -18815,6 +21038,8 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/arm64/arm_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/arm64/arm_convert_utf32_to_latin1.cpp */ std::pair arm_convert_utf32_to_latin1(const char32_t *buf, size_t len, @@ -18877,6 +21102,8 @@ arm_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, latin1_output); } /* end file src/arm64/arm_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 /* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */ template std::pair @@ -19030,6 +21257,8 @@ arm_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, reinterpret_cast(utf16_output)); } /* end file src/arm64/arm_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF8 /* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */ std::pair arm_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { @@ -19537,10 +21766,12 @@ arm_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, reinterpret_cast(utf8_output)); } /* end file src/arm64/arm_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF8 } // unnamed namespace } // namespace arm64 } // namespace simdutf + /* begin file src/generic/buf_block_reader.h */ namespace simdutf { namespace arm64 { @@ -19652,6 +21883,7 @@ simdutf_really_inline void buf_block_reader::advance() { } // namespace arm64 } // namespace simdutf /* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { namespace arm64 { @@ -19958,9 +22190,22 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { reinterpret_cast(input), length); } -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +} // namespace utf8_validation +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace ascii_validation { + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); uint8_t blocks[64]{}; simd::simd8x64 running_or(blocks); while (reader.has_full_block()) { @@ -19975,14 +22220,8 @@ bool generic_validate_ascii(const uint8_t *input, size_t length) { return running_or.is_ascii(); } -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} - -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); size_t count{0}; while (reader.has_full_block()) { simd::simd8x64 in(reader.full_block()); @@ -20007,19 +22246,15 @@ result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { } } -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation +} // namespace ascii_validation } // unnamed namespace } // namespace arm64 } // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -// transcoding from UTF-8 to UTF-16 +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ - namespace simdutf { namespace arm64 { namespace { @@ -20354,7 +22589,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ - namespace simdutf { namespace arm64 { namespace { @@ -20430,9 +22664,10 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace arm64 } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ - namespace simdutf { namespace arm64 { namespace { @@ -20753,7 +22988,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ - namespace simdutf { namespace arm64 { namespace { @@ -20797,7 +23031,9 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace arm64 } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 // other functions +#if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16.h */ namespace simdutf { namespace arm64 { @@ -20847,6 +23083,89 @@ simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + template simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, size_t size) { @@ -20873,8 +23192,9 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace arm64 } // namespace simdutf /* end file src/generic/utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 /* begin file src/generic/utf8.h */ - namespace simdutf { namespace arm64 { namespace { @@ -20893,6 +23213,59 @@ simdutf_really_inline size_t count_code_points(const char *in, size_t size) { return count + scalar::utf8::count_code_points(in + pos, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif + simdutf_really_inline size_t utf16_length_from_utf8(const char *in, size_t size) { size_t pos = 0; @@ -20914,9 +23287,10 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } // namespace arm64 } // namespace simdutf /* end file src/generic/utf8.h */ -// transcoding from UTF-8 to Latin 1 +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 /* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ - namespace simdutf { namespace arm64 { namespace { @@ -21235,7 +23609,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ - namespace simdutf { namespace arm64 { namespace { @@ -21315,8 +23688,7 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ - -// placeholder scalars +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 // // Implementation-specific overrides @@ -21324,6 +23696,7 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, namespace simdutf { namespace arm64 { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -21350,27 +23723,35 @@ implementation::detect_encodings(const char *input, } return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { return arm64::utf8_validation::generic_validate_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { return arm64::utf8_validation::generic_validate_utf8_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_ascii(buf, len); + return arm64::ascii_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_ascii_with_errors(buf, len); + return arm64::ascii_validation::generic_validate_ascii_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { @@ -21386,7 +23767,9 @@ implementation::validate_utf16le(const char16_t *buf, return false; } } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { @@ -21431,7 +23814,9 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( return res; } } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -21445,7 +23830,9 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { return false; } } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -21460,7 +23847,9 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( return res; } } +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -21474,7 +23863,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -21502,7 +23893,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -21515,7 +23908,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { utf8_to_latin1::validating_transcoder converter; @@ -21532,13 +23927,17 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { return arm64::utf8_to_latin1::convert_valid(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { utf8_to_utf16::validating_transcoder converter; return converter.convert(buf, len, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( const char *buf, size_t len, char16_t *utf16_output) const noexcept { utf8_to_utf16::validating_transcoder converter; @@ -21569,7 +23968,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return utf8_to_utf16::convert_valid(input, size, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { utf8_to_utf32::validating_transcoder converter; @@ -21581,12 +23982,16 @@ simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( utf8_to_utf32::validating_transcoder converter; return converter.convert_with_errors(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( const char *input, size_t size, char32_t *utf32_output) const noexcept { return utf8_to_utf32::convert_valid(input, size, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -21694,7 +24099,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( // optimization opportunity: implement a custom function. return convert_utf16le_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -21800,7 +24207,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf16be_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -21847,7 +24256,9 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( utf8_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -21943,7 +24354,9 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( utf32_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -22004,13 +24417,17 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( } return saved_bytes; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { // optimization opportunity: implement a custom function. return convert_utf32_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -22118,7 +24535,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { return convert_utf16be_to_utf32(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { @@ -22134,27 +24553,23 @@ simdutf_warn_unused size_t implementation::count_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *buf, size_t len) const noexcept { return count_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return scalar::utf32::latin1_length_from_utf32(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t length) const noexcept { // See @@ -22177,7 +24592,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( return result + (length / lanes) * lanes + scalar::latin1::utf8_length_from_latin1((const char *)simd_end, rem); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf8_length_from_utf16(input, length); @@ -22187,17 +24604,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf8_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} - -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} - +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); @@ -22207,12 +24616,16 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::utf16_length_from_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f); @@ -22254,7 +24667,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); @@ -22275,17 +24690,16 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32( return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -22330,11 +24744,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -22379,16 +24788,12 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { return encode_base64(output, input, length, options); } +#endif // SIMDUTF_FEATURE_BASE64 } // namespace arm64 } // namespace simdutf @@ -22404,19 +24809,10 @@ size_t implementation::binary_to_base64(const char *input, size_t length, // #define SIMDUTF_IMPLEMENTATION fallback /* end file src/simdutf/fallback/begin.h */ - - - - - - - -#include -#include - namespace simdutf { namespace fallback { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -22425,8 +24821,8 @@ implementation::detect_encodings(const char *input, if (bom_encoding != encoding_type::unspecified) { return bom_encoding; } - // todo: reimplement as a one-pass algorithm. int out = 0; + // todo: reimplement as a one-pass algorithm. if (validate_utf8(input, length)) { out |= encoding_type::UTF8; } @@ -22443,17 +24839,23 @@ implementation::detect_encodings(const char *input, } return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { return scalar::utf8::validate(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { return scalar::utf8::validate_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { return scalar::ascii::validate(buf, len); @@ -22463,13 +24865,17 @@ simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { return scalar::ascii::validate_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { return scalar::utf16::validate(buf, len); } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { @@ -22485,22 +24891,30 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( const char16_t *buf, size_t len) const noexcept { return scalar::utf16::validate_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { return scalar::utf32::validate(buf, len); } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { return scalar::utf32::validate_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { return scalar::latin1_to_utf8::convert(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { return scalar::latin1_to_utf16::convert(buf, len, @@ -22512,12 +24926,16 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( return scalar::latin1_to_utf16::convert(buf, len, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { return scalar::latin1_to_utf32::convert(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { return scalar::utf8_to_latin1::convert(buf, len, latin1_output); @@ -22532,7 +24950,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { return scalar::utf8_to_latin1::convert_valid(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { return scalar::utf8_to_utf16::convert(buf, len, @@ -22568,7 +24988,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return scalar::utf8_to_utf16::convert_valid(buf, len, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { return scalar::utf8_to_utf32::convert(buf, len, utf32_output); @@ -22583,7 +25005,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( const char *input, size_t size, char32_t *utf32_output) const noexcept { return scalar::utf8_to_utf32::convert_valid(input, size, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { return scalar::utf16_to_latin1::convert(buf, len, @@ -22621,7 +25045,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( return scalar::utf16_to_latin1::convert_valid(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return scalar::utf16_to_utf8::convert(buf, len, @@ -22656,7 +25082,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( return scalar::utf16_to_utf8::convert_valid(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { return scalar::utf32_to_latin1::convert(buf, len, latin1_output); @@ -22671,7 +25099,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { return scalar::utf32_to_latin1::convert_valid(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return scalar::utf32_to_utf8::convert(buf, len, utf8_output); @@ -22686,7 +25116,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { return scalar::utf32_to_utf16::convert(buf, len, @@ -22758,7 +25190,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( return scalar::utf16_to_utf32::convert_valid(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { @@ -22774,27 +25208,23 @@ simdutf_warn_unused size_t implementation::count_utf16be( const char16_t *input, size_t length) const noexcept { return scalar::utf16::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { return scalar::utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *buf, size_t len) const noexcept { return scalar::utf8::count_code_points(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return length; -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t length) const noexcept { size_t answer = length; @@ -22825,7 +25255,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( } return answer; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return scalar::utf16::utf8_length_from_utf16(input, @@ -22836,7 +25268,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return scalar::utf16::utf8_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return scalar::utf16::utf32_length_from_utf16(input, @@ -22847,42 +25281,37 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return scalar::utf16::utf32_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { return scalar::utf8::utf16_length_from_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { return scalar::utf32::utf8_length_from_utf32(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { return scalar::utf32::utf16_length_from_utf32(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { return scalar::utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -22986,11 +25415,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( return r; } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -23094,16 +25518,13 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( return r; } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { return scalar::base64::tail_encode_base64(output, input, length, options); } +#endif // SIMDUTF_FEATURE_BASE64 + } // namespace fallback } // namespace simdutf @@ -23116,7 +25537,6 @@ size_t implementation::binary_to_base64(const char *input, size_t length, #include #include - /* begin file src/simdutf/icelake/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "icelake" // #define SIMDUTF_IMPLEMENTATION icelake @@ -23140,6 +25560,187 @@ namespace { #ifndef SIMDUTF_ICELAKE_H #error "icelake.h must be included" #endif +using namespace simd; + +/* begin file src/icelake/icelake_macros.inl.cpp */ + +/* + This upcoming macro (SIMDUTF_ICELAKE_TRANSCODE16) takes 16 + 4 bytes (of a + UTF-8 string) and loads all possible 4-byte substring into an AVX512 + register. + + For example if we have bytes abcdefgh... we create following 32-bit lanes + + [abcd|bcde|cdef|defg|efgh|...] + ^ ^ + byte 0 of reg byte 63 of reg +*/ +/** pshufb + # lane{0,1,2} have got bytes: [ 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, + 11, 12, 13, 14, 15] # lane3 has got bytes: [ 16, 17, 18, 19, 4, 5, + 6, 8, 9, 10, 11, 12, 13, 14, 15] + + expand_ver2 = [ + # lane 0: + 0, 1, 2, 3, + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + + # lane 1: + 4, 5, 6, 7, + 5, 6, 7, 8, + 6, 7, 8, 9, + 7, 8, 9, 10, + + # lane 2: + 8, 9, 10, 11, + 9, 10, 11, 12, + 10, 11, 12, 13, + 11, 12, 13, 14, + + # lane 3 order: 13, 14, 15, 16 14, 15, 16, 17, 15, 16, 17, 18, 16, + 17, 18, 19 12, 13, 14, 15, 13, 14, 15, 0, 14, 15, 0, 1, 15, 0, 1, 2, + ] +*/ + +#define SIMDUTF_ICELAKE_TRANSCODE16(LANE0, LANE1, MASKED) \ + { \ + const __m512i merged = _mm512_mask_mov_epi32(LANE0, 0x1000, LANE1); \ + const __m512i expand_ver2 = _mm512_setr_epi64( \ + 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, \ + 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, \ + 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); \ + const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); \ + \ + __mmask16 leading_bytes; \ + const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); \ + const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); \ + const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); \ + leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); \ + \ + __m512i char_class; \ + char_class = _mm512_srli_epi32(input, 4); \ + /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ \ + const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); \ + const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); \ + char_class = \ + _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); \ + \ + const int valid_count = static_cast(count_ones(leading_bytes)); \ + const __m512i utf32 = expanded_utf8_to_utf32(char_class, input); \ + \ + const __m512i out = _mm512_mask_compress_epi32(_mm512_setzero_si512(), \ + leading_bytes, utf32); \ + \ + if (UTF32) { \ + if (MASKED) { \ + const __mmask16 valid = uint16_t((1 << valid_count) - 1); \ + _mm512_mask_storeu_epi32((__m512i *)output, valid, out); \ + } else { \ + _mm512_storeu_si512((__m512i *)output, out); \ + } \ + output += valid_count; \ + } else { \ + if (MASKED) { \ + output += utf32_to_utf16_masked( \ + byteflip, out, valid_count, reinterpret_cast(output)); \ + } else { \ + output += utf32_to_utf16( \ + byteflip, out, valid_count, reinterpret_cast(output)); \ + } \ + } \ + } + +#define SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(INPUT, VALID_COUNT, MASKED) \ + { \ + if (UTF32) { \ + if (MASKED) { \ + const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \ + _mm512_mask_storeu_epi32((__m512i *)output, valid_mask, INPUT); \ + } else { \ + _mm512_storeu_si512((__m512i *)output, INPUT); \ + } \ + output += VALID_COUNT; \ + } else { \ + if (MASKED) { \ + output += utf32_to_utf16_masked( \ + byteflip, INPUT, VALID_COUNT, \ + reinterpret_cast(output)); \ + } else { \ + output += \ + utf32_to_utf16(byteflip, INPUT, VALID_COUNT, \ + reinterpret_cast(output)); \ + } \ + } \ + } + +#define SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) \ + if (UTF32) { \ + const __m128i t0 = _mm512_castsi512_si128(utf8); \ + const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \ + const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \ + const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \ + _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ + _mm512_cvtepu8_epi32(t0)); \ + _mm512_storeu_si512((__m512i *)(output + 1 * 16), \ + _mm512_cvtepu8_epi32(t1)); \ + _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ + _mm512_cvtepu8_epi32(t2)); \ + _mm512_storeu_si512((__m512i *)(output + 3 * 16), \ + _mm512_cvtepu8_epi32(t3)); \ + } else { \ + const __m256i h0 = _mm512_castsi512_si256(utf8); \ + const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \ + if (big_endian) { \ + _mm512_storeu_si512( \ + (__m512i *)(output + 0 * 16), \ + _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h0), byteflip)); \ + _mm512_storeu_si512( \ + (__m512i *)(output + 2 * 16), \ + _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h1), byteflip)); \ + } else { \ + _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ + _mm512_cvtepu8_epi16(h0)); \ + _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ + _mm512_cvtepu8_epi16(h1)); \ + } \ + } +/* end file src/icelake/icelake_macros.inl.cpp */ +/* begin file src/icelake/icelake_common.inl.cpp */ +// file included directly +/** + * Store the last N bytes of previous followed by 512-N bytes from input. + */ +template __m512i prev(__m512i input, __m512i previous) { + static_assert(N <= 32, "N must be no larger than 32"); + const __m512i movemask = + _mm512_setr_epi32(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous); +#if SIMDUTF_GCC8 || SIMDUTF_GCC9 + constexpr int shift = 16 - N; // workaround for GCC8,9 + return _mm512_alignr_epi8(input, rotated, shift); +#else + return _mm512_alignr_epi8(input, rotated, 16 - N); +#endif // SIMDUTF_GCC8 || SIMDUTF_GCC9 +} + +template +__m512i shuffle_epi128(__m512i v) { + static_assert((idx0 >= 0 && idx0 <= 3), "idx0 must be in range 0..3"); + static_assert((idx1 >= 0 && idx1 <= 3), "idx1 must be in range 0..3"); + static_assert((idx2 >= 0 && idx2 <= 3), "idx2 must be in range 0..3"); + static_assert((idx3 >= 0 && idx3 <= 3), "idx3 must be in range 0..3"); + + constexpr unsigned shuffle = idx0 | (idx1 << 2) | (idx2 << 4) | (idx3 << 6); + return _mm512_shuffle_i32x4(v, v, shuffle); +} + +template constexpr __m512i broadcast_epi128(__m512i v) { + return shuffle_epi128(v); +} +/* end file src/icelake/icelake_common.inl.cpp */ +#if SIMDUTF_FEATURE_UTF8 /* begin file src/icelake/icelake_utf8_common.inl.cpp */ // Common procedures for both validating and non-validating conversions from // UTF-8. @@ -23655,11 +26256,10 @@ simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, t5 = _mm512_shuffle_epi8(t5, byteflip); } // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (zen4) + // (AMD Zen4 has terrible performance with it, it is effectively broken) __m512i compressed = _mm512_maskz_compress_epi16(nonzero_masked, t5); _mm512_mask_storeu_epi16( - output, - (1 << (count + static_cast(count_ones(sp_mask)))) - 1, + output, _bzhi_u32(0xFFFFFFFF, count + _mm_popcnt_u32(sp_mask)), compressed); //_mm512_mask_compressstoreu_epi16(output, nonzero_masked, t5); } @@ -23751,48 +26351,6 @@ simdutf_really_inline size_t utf32_to_utf16(const __m512i byteflip, return count + static_cast(count_ones(sp_mask)); } -/** - * Store the last N bytes of previous followed by 512-N bytes from input. - */ -template __m512i prev(__m512i input, __m512i previous) { - static_assert(N <= 32, "N must be no larger than 32"); - const __m512i movemask = - _mm512_setr_epi32(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); - const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous); -#if SIMDUTF_GCC8 || SIMDUTF_GCC9 - constexpr int shift = 16 - N; // workaround for GCC8,9 - return _mm512_alignr_epi8(input, rotated, shift); -#else - return _mm512_alignr_epi8(input, rotated, 16 - N); -#endif // SIMDUTF_GCC8 || SIMDUTF_GCC9 -} - -template -__m512i shuffle_epi128(__m512i v) { - static_assert((idx0 >= 0 && idx0 <= 3), "idx0 must be in range 0..3"); - static_assert((idx1 >= 0 && idx1 <= 3), "idx1 must be in range 0..3"); - static_assert((idx2 >= 0 && idx2 <= 3), "idx2 must be in range 0..3"); - static_assert((idx3 >= 0 && idx3 <= 3), "idx3 must be in range 0..3"); - - constexpr unsigned shuffle = idx0 | (idx1 << 2) | (idx2 << 4) | (idx3 << 6); - return _mm512_shuffle_i32x4(v, v, shuffle); -} - -template constexpr __m512i broadcast_epi128(__m512i v) { - return shuffle_epi128(v); -} - -/** - * Current unused. - */ -template __m512i rotate_by_N_epi8(const __m512i input) { - - // lanes order: 1, 2, 3, 0 => 0b00_11_10_01 - const __m512i permuted = _mm512_shuffle_i32x4(input, input, 0x39); - - return _mm512_alignr_epi8(permuted, input, N); -} - /* expanded_utf8_to_utf32 converts expanded UTF-8 characters (`utf8`) stored at separate 32-bit lanes. @@ -23938,151 +26496,131 @@ simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) { return expanded_utf8_to_utf32(char_class, input); } /* end file src/icelake/icelake_utf8_common.inl.cpp */ -/* begin file src/icelake/icelake_macros.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 -/* - This upcoming macro (SIMDUTF_ICELAKE_TRANSCODE16) takes 16 + 4 bytes (of a - UTF-8 string) and loads all possible 4-byte substring into an AVX512 - register. +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/icelake/icelake_utf8_validation.inl.cpp */ +// file included directly - For example if we have bytes abcdefgh... we create following 32-bit lanes +simdutf_really_inline __m512i check_special_cases(__m512i input, + const __m512i prev1) { + __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080); + const __m512i v_0f = _mm512_set1_epi8(0x0f); + __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f); - [abcd|bcde|cdef|defg|efgh|...] - ^ ^ - byte 0 of reg byte 63 of reg -*/ -/** pshufb - # lane{0,1,2} have got bytes: [ 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, - 11, 12, 13, 14, 15] # lane3 has got bytes: [ 16, 17, 18, 19, 4, 5, - 6, 8, 9, 10, 11, 12, 13, 14, 15] + __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1); + __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb); + __m512i index2 = _mm512_and_si512(prev1, v_0f); - expand_ver2 = [ - # lane 0: - 0, 1, 2, 3, - 1, 2, 3, 4, - 2, 3, 4, 5, - 3, 4, 5, 6, + __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2); + __m512i mask3 = + _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101, + 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6, + 0x101010101010101, 0x1010101babaaee6); + __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f); + __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3); + return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128); +} - # lane 1: - 4, 5, 6, 7, - 5, 6, 7, 8, - 6, 7, 8, 9, - 7, 8, 9, 10, +simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input, + const __m512i prev_input, + const __m512i sc) { + __m512i prev2 = prev<2>(input, prev_input); + __m512i prev3 = prev<3>(input, prev_input); + __m512i is_third_byte = _mm512_subs_epu8( + prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0 + __m512i is_fourth_byte = _mm512_subs_epu8( + prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0 + __m512i is_third_or_fourth_byte = + _mm512_or_si512(is_third_byte, is_fourth_byte); + const __m512i v_7f = _mm512_set1_epi8(char(0x7f)); + is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte); + // We want to compute (is_third_or_fourth_byte AND v80) XOR sc. + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc, + 0b1101010); + //__m512i is_third_or_fourth_byte_mask = + //_mm512_and_si512(is_third_or_fourth_byte, v_80); return + // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc); +} +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline __m512i is_incomplete(const __m512i input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xbfdfefffffffffff); + return _mm512_subs_epu8(input, max_value); +} - # lane 2: - 8, 9, 10, 11, - 9, 10, 11, 12, - 10, 11, 12, 13, - 11, 12, 13, 14, +struct avx512_utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + __m512i error{}; - # lane 3 order: 13, 14, 15, 16 14, 15, 16, 17, 15, 16, 17, 18, 16, - 17, 18, 19 12, 13, 14, 15, 13, 14, 15, 0, 14, 15, 0, 1, 15, 0, 1, 2, - ] -*/ + // The last input we received + __m512i prev_input_block{}; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + __m512i prev_incomplete{}; -#define SIMDUTF_ICELAKE_TRANSCODE16(LANE0, LANE1, MASKED) \ - { \ - const __m512i merged = _mm512_mask_mov_epi32(LANE0, 0x1000, LANE1); \ - const __m512i expand_ver2 = _mm512_setr_epi64( \ - 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, \ - 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, \ - 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); \ - const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); \ - \ - __mmask16 leading_bytes; \ - const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); \ - const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); \ - const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); \ - leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); \ - \ - __m512i char_class; \ - char_class = _mm512_srli_epi32(input, 4); \ - /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ \ - const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); \ - const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); \ - char_class = \ - _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); \ - \ - const int valid_count = static_cast(count_ones(leading_bytes)); \ - const __m512i utf32 = expanded_utf8_to_utf32(char_class, input); \ - \ - const __m512i out = _mm512_mask_compress_epi32(_mm512_setzero_si512(), \ - leading_bytes, utf32); \ - \ - if (UTF32) { \ - if (MASKED) { \ - const __mmask16 valid = uint16_t((1 << valid_count) - 1); \ - _mm512_mask_storeu_epi32((__m512i *)output, valid, out); \ - } else { \ - _mm512_storeu_si512((__m512i *)output, out); \ - } \ - output += valid_count; \ - } else { \ - if (MASKED) { \ - output += utf32_to_utf16_masked( \ - byteflip, out, valid_count, reinterpret_cast(output)); \ - } else { \ - output += utf32_to_utf16( \ - byteflip, out, valid_count, reinterpret_cast(output)); \ - } \ - } \ + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const __m512i input, + const __m512i prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + __m512i prev1 = prev<1>(input, prev_input); + __m512i sc = check_special_cases(input, prev1); + this->error = _mm512_or_si512( + check_multibyte_lengths(input, prev_input, sc), this->error); } -#define SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(INPUT, VALID_COUNT, MASKED) \ - { \ - if (UTF32) { \ - if (MASKED) { \ - const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \ - _mm512_mask_storeu_epi32((__m512i *)output, valid_mask, INPUT); \ - } else { \ - _mm512_storeu_si512((__m512i *)output, INPUT); \ - } \ - output += VALID_COUNT; \ - } else { \ - if (MASKED) { \ - output += utf32_to_utf16_masked( \ - byteflip, INPUT, VALID_COUNT, \ - reinterpret_cast(output)); \ - } else { \ - output += \ - utf32_to_utf16(byteflip, INPUT, VALID_COUNT, \ - reinterpret_cast(output)); \ - } \ - } \ + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error = _mm512_or_si512(this->error, this->prev_incomplete); } -#define SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) \ - if (UTF32) { \ - const __m128i t0 = _mm512_castsi512_si128(utf8); \ - const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \ - const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \ - const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \ - _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ - _mm512_cvtepu8_epi32(t0)); \ - _mm512_storeu_si512((__m512i *)(output + 1 * 16), \ - _mm512_cvtepu8_epi32(t1)); \ - _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ - _mm512_cvtepu8_epi32(t2)); \ - _mm512_storeu_si512((__m512i *)(output + 3 * 16), \ - _mm512_cvtepu8_epi32(t3)); \ - } else { \ - const __m256i h0 = _mm512_castsi512_si256(utf8); \ - const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \ - if (big_endian) { \ - _mm512_storeu_si512( \ - (__m512i *)(output + 0 * 16), \ - _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h0), byteflip)); \ - _mm512_storeu_si512( \ - (__m512i *)(output + 2 * 16), \ - _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h1), byteflip)); \ - } else { \ - _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ - _mm512_cvtepu8_epi16(h0)); \ - _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ - _mm512_cvtepu8_epi16(h1)); \ - } \ + // returns true if ASCII. + simdutf_really_inline bool check_next_input(const __m512i input) { + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80); + if (ascii == 0) { + this->error = _mm512_or_si512(this->error, this->prev_incomplete); + return true; + } else { + this->check_utf8_bytes(input, this->prev_input_block); + this->prev_incomplete = is_incomplete(input); + this->prev_input_block = input; + return false; + } } -/* end file src/icelake/icelake_macros.inl.cpp */ + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return _mm512_test_epi8_mask(this->error, this->error) != 0; + } +}; // struct avx512_utf8_checker +/* end file src/icelake/icelake_utf8_validation.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && \ + (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) /* begin file src/icelake/icelake_from_valid_utf8.inl.cpp */ // file included directly @@ -24221,201 +26759,11 @@ valid_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { using utf8_to_utf16_result = std::pair; /* end file src/icelake/icelake_from_valid_utf8.inl.cpp */ -/* begin file src/icelake/icelake_utf8_validation.inl.cpp */ -// file included directly - -simdutf_really_inline __m512i check_special_cases(__m512i input, - const __m512i prev1) { - __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080); - const __m512i v_0f = _mm512_set1_epi8(0x0f); - __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f); - - __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1); - __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb); - __m512i index2 = _mm512_and_si512(prev1, v_0f); - - __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2); - __m512i mask3 = - _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101, - 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6, - 0x101010101010101, 0x1010101babaaee6); - __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f); - __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3); - return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128); -} - -simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input, - const __m512i prev_input, - const __m512i sc) { - __m512i prev2 = prev<2>(input, prev_input); - __m512i prev3 = prev<3>(input, prev_input); - __m512i is_third_byte = _mm512_subs_epu8( - prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0 - __m512i is_fourth_byte = _mm512_subs_epu8( - prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0 - __m512i is_third_or_fourth_byte = - _mm512_or_si512(is_third_byte, is_fourth_byte); - const __m512i v_7f = _mm512_set1_epi8(char(0x7f)); - is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte); - // We want to compute (is_third_or_fourth_byte AND v80) XOR sc. - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc, - 0b1101010); - //__m512i is_third_or_fourth_byte_mask = - //_mm512_and_si512(is_third_or_fourth_byte, v_80); return - // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc); -} -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline __m512i is_incomplete(const __m512i input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xbfdfefffffffffff); - return _mm512_subs_epu8(input, max_value); -} - -struct avx512_utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - __m512i error{}; - - // The last input we received - __m512i prev_input_block{}; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - __m512i prev_incomplete{}; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const __m512i input, - const __m512i prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - __m512i prev1 = prev<1>(input, prev_input); - __m512i sc = check_special_cases(input, prev1); - this->error = _mm512_or_si512( - check_multibyte_lengths(input, prev_input, sc), this->error); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error = _mm512_or_si512(this->error, this->prev_incomplete); - } - - // returns true if ASCII. - simdutf_really_inline bool check_next_input(const __m512i input) { - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80); - if (ascii == 0) { - this->error = _mm512_or_si512(this->error, this->prev_incomplete); - return true; - } else { - this->check_utf8_bytes(input, this->prev_input_block); - this->prev_incomplete = is_incomplete(input); - this->prev_input_block = input; - return false; - } - } - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return _mm512_test_epi8_mask(this->error, this->error) != 0; - } -}; // struct avx512_utf8_checker -/* end file src/icelake/icelake_utf8_validation.inl.cpp */ /* begin file src/icelake/icelake_from_utf8.inl.cpp */ // file included directly // File contains conversion procedure from possibly invalid UTF-8 strings. -/** - * Attempts to convert up to len 1-byte code units from in (in UTF-8 format) to - * out. - * Returns the position of the input and output after the processing is - * completed. Upon error, the output is set to null. - */ - -template -utf8_to_utf16_result -fast_avx512_convert_utf8_to_utf16(const char *in, size_t len, char16_t *out) { - const char *const final_in = in + len; - bool result = true; - while (result) { - if (final_in - in >= 64) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else if (in < final_in) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else { - break; - } - } - if (!result) { - out = nullptr; - } - return std::make_pair(in, out); -} - -template -simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, - size_t len, - char16_t *out) { - const char *const init_in = in; - const char16_t *const init_out = out; - const char *const final_in = in + len; - bool result = true; - while (result) { - if (final_in - in >= 64) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else if (in < final_in) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else { - break; - } - } - if (!result) { - size_t pos = size_t(in - init_in); - if (pos < len && (init_in[pos] & 0xc0) == 0x80 && pos >= 64) { - // We must check whether we are the fourth continuation byte - bool c1 = (init_in[pos - 1] & 0xc0) == 0x80; - bool c2 = (init_in[pos - 2] & 0xc0) == 0x80; - bool c3 = (init_in[pos - 3] & 0xc0) == 0x80; - if (c1 && c2 && c3) { - return {simdutf::TOO_LONG, pos}; - } - } - // rewind_and_convert_with_errors will seek a potential error from in - // onward, with the ability to go back up to in - init_in bytes, and read - // final_in - in bytes forward. - simdutf::result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - in - init_in, in, final_in - in, out); - res.count += (in - init_in); - return res; - } else { - return simdutf::result(error_code::SUCCESS, out - init_out); - } -} - template // todo: replace with the utf-8 to utf-16 routine adapted to utf-32. This code // is legacy. @@ -24679,6 +27027,10 @@ validating_utf8_to_fixed_length_with_constant_checks(const char *str, return {ptr, output, true}; } /* end file src/icelake/icelake_from_utf8.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ // file included directly @@ -24856,6 +27208,9 @@ size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len, return (size_t)(latin_output - start); } /* end file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 /* begin file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ // file included directly template @@ -24923,7 +27278,7 @@ icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, } if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { uint16_t word; - while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf)) + while ((word = (big_endian ? scalar::u16_swap_bytes(uint16_t(*buf)) : uint16_t(*buf))) <= 0xff) { *latin1_output++ = uint8_t(word); buf++; @@ -24946,7 +27301,7 @@ icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { uint16_t word; - while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf)) + while ((word = (big_endian ? scalar::u16_swap_bytes(uint16_t(*buf)) : uint16_t(*buf))) <= 0xff) { *latin1_output++ = uint8_t(word); buf++; @@ -24961,6 +27316,9 @@ icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, len), latin1_output); } /* end file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ // file included directly @@ -25169,6 +27527,86 @@ tail: return ((inbuf - inbuf_orig) + adjust); } /* end file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf8_to_utf16.inl.cpp */ +// file included directly + +// File contains conversion procedure from possibly invalid UTF-8 strings. + +/** + * Attempts to convert up to len 1-byte code units from in (in UTF-8 format) to + * out. + * Returns the position of the input and output after the processing is + * completed. Upon error, the output is set to null. + */ + +template +utf8_to_utf16_result +fast_avx512_convert_utf8_to_utf16(const char *in, size_t len, char16_t *out) { + const char *const final_in = in + len; + bool result = true; + while (result) { + if (final_in - in >= 64) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else if (in < final_in) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else { + break; + } + } + if (!result) { + out = nullptr; + } + return std::make_pair(in, out); +} + +template +simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, + size_t len, + char16_t *out) { + const char *const init_in = in; + const char16_t *const init_out = out; + const char *const final_in = in + len; + bool result = true; + while (result) { + if (final_in - in >= 64) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else if (in < final_in) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else { + break; + } + } + if (!result) { + size_t pos = size_t(in - init_in); + if (pos < len && (init_in[pos] & 0xc0) == 0x80 && pos >= 64) { + // We must check whether we are the fourth continuation byte + bool c1 = (init_in[pos - 1] & 0xc0) == 0x80; + bool c2 = (init_in[pos - 2] & 0xc0) == 0x80; + bool c3 = (init_in[pos - 3] & 0xc0) == 0x80; + if (c1 && c2 && c3) { + return {simdutf::TOO_LONG, pos}; + } + } + // rewind_and_convert_with_errors will seek a potential error from in + // onward, with the ability to go back up to in - init_in bytes, and read + // final_in - in bytes forward. + simdutf::result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + in - init_in, in, final_in - in, out); + res.count += (in - init_in); + return res; + } else { + return simdutf::result(error_code::SUCCESS, out - init_out); + } +} +/* end file src/icelake/icelake_convert_utf8_to_utf16.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ // file included directly @@ -25307,6 +27745,9 @@ convert_utf16_to_utf32(const char16_t *buf, size_t len, return std::make_tuple(buf + carry, utf32_output, true); } /* end file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 /* begin file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ // file included directly size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len, @@ -25383,6 +27824,9 @@ icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, len), latin1_output); } /* end file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ // file included directly @@ -25959,186 +28403,352 @@ avx512_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } /* end file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ // file included directly -// Todo: currently, this is just the haswell code, optimize for icelake kernel. template std::pair avx512_convert_utf32_to_utf16(const char32_t *buf, size_t len, char16_t *utf16_output) { const char32_t *end = buf + len; + __mmask32 forbidden_bytemask = 0; + const __m512i v_00000000 = _mm512_setzero_si512(); + const __m512i v_ffff0000 = _mm512_set1_epi32((int32_t)0xffff0000); + const __m512i v_f800 = _mm512_set1_epi32((uint32_t)0xf800); + const __m512i v_d800 = _mm512_set1_epi32((uint32_t)0xd800); + const __m512i v_10ffff = _mm512_set1_epi32(0x10FFFF); + const __m512i v_10000 = _mm512_set1_epi32(0x10000); + const __m512i v_3ff0000 = _mm512_set1_epi32(0x3FF0000); + const __m512i v_3ff = _mm512_set1_epi32(0x3FF); + const __m512i v_dc00d800 = _mm512_set1_epi32((int32_t)0xDC00D800); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - __m256i forbidden_bytemask = _mm256_setzero_si256(); - - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + while (end - buf >= std::ptrdiff_t(16)) { + __m512i in = _mm512_loadu_si512(buf); // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000); - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - forbidden_bytemask = _mm256_or_si256( - forbidden_bytemask, - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); + if (saturation_bitmask == 0xffff) { + forbidden_bytemask |= + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); } - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; + _mm256_storeu_si256((__m256i *)utf16_output, utf16_packed); + utf16_output += 16; + buf += 16; } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); + // saturation_bitmask == 1 words will generate 1 utf16 char, + // and saturation_bitmask == 0 words will generate 2 utf16 chars assuming + // no errors. Thus we need a output_mask which has the structure b_2i = 1, + // b_2i+1 = !saturation_bitmask_i + const __mmask32 output_mask = ~_pdep_u32(saturation_bitmask, 0xAAAAAAAA); + const __mmask16 surrogate_bitmask = __mmask16(~saturation_bitmask); + __mmask32 error = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + error |= _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error)) { + return std::make_pair(nullptr, utf16_output); } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf16_output); - } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf16_output); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { - high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } + __m512i v1, v2, v; + // for the bits saturation_bitmask == 0, we need to unpack the 32-bit word + // into two 16 bit words corresponding to high_surrogate and + // low_surrogate. Once the bits are unpacked and merged, the output will + // be compressed as per output_mask. + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); } - buf += k; + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + buf += 16; + } + } + + size_t remaining_len = size_t(end - buf); + if (remaining_len) { + __mmask16 input_mask = __mmask16((1 << remaining_len) - 1); + __m512i in = _mm512_maskz_loadu_epi32(input_mask, buf); + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000) & + input_mask; + if (saturation_bitmask == input_mask) { + forbidden_bytemask |= + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); + + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); + } + _mm256_mask_storeu_epi16(utf16_output, input_mask, utf16_packed); + utf16_output += remaining_len; + buf += remaining_len; + } else { + const __mmask32 output_max_mask = (1 << (remaining_len * 2)) - 1; + const __mmask32 output_mask = + (~_pdep_u32(saturation_bitmask, 0xAAAAAAAA)) & output_max_mask; + const __mmask16 surrogate_bitmask = + __mmask16(~saturation_bitmask) & input_mask; + __mmask32 error = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + error |= _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error)) { + return std::make_pair(nullptr, utf16_output); + } + __m512i v1, v2, v; + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + buf += remaining_len; } } // check for invalid input - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + if (forbidden_bytemask != 0) { return std::make_pair(nullptr, utf16_output); } return std::make_pair(buf, utf16_output); } -// Todo: currently, this is just the haswell code, optimize for icelake kernel. template std::pair avx512_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, char16_t *utf16_output) { const char32_t *start = buf; const char32_t *end = buf + len; + const __m512i v_00000000 = _mm512_setzero_si512(); + const __m512i v_ffff0000 = _mm512_set1_epi32((int32_t)0xffff0000); + const __m512i v_f800 = _mm512_set1_epi32((uint32_t)0xf800); + const __m512i v_d800 = _mm512_set1_epi32((uint32_t)0xd800); + const __m512i v_10ffff = _mm512_set1_epi32(0x10FFFF); + const __m512i v_10000 = _mm512_set1_epi32(0x10000); + const __m512i v_3ff0000 = _mm512_set1_epi32(0x3FF0000); + const __m512i v_3ff = _mm512_set1_epi32(0x3FF); + const __m512i v_dc00d800 = _mm512_set1_epi32((int32_t)0xDC00D800); + int error_idx = 0; + error_code code = error_code::SUCCESS; + bool err = false; - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + while (end - buf >= std::ptrdiff_t(16)) { + __m512i in = _mm512_loadu_si512(buf); // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000); - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), + if (saturation_bitmask == 0xffff) { + __mmask32 forbidden_bytemask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); + + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); + } + if (simdutf_unlikely(forbidden_bytemask)) { + int idx = _tzcnt_u32(forbidden_bytemask); + _mm256_mask_storeu_epi16( + utf16_output, __mmask16(_blsmsk_u32(forbidden_bytemask) >> 1), + utf16_packed); + return std::make_pair(result(error_code::SURROGATE, buf - start + idx), + utf16_output + idx); + } + _mm256_storeu_si256((__m256i *)utf16_output, utf16_packed); + utf16_output += 16; + } else { + __mmask32 output_mask = ~_pdep_u32(saturation_bitmask, 0xAAAAAAAA); + const __mmask16 surrogate_bitmask = __mmask16(~saturation_bitmask); + __mmask32 error_surrogate = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + __mmask32 error_too_large = + _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error_surrogate || error_too_large)) { + // Need to find the lowest set bit between the two error masks + // Need to also write the partial chunk until the error index to output. + int large_idx = _tzcnt_u32(error_too_large); + int surrogate_idx = _tzcnt_u32(error_surrogate); + err = true; + if (large_idx < surrogate_idx) { + code = error_code::TOO_LARGE; + error_idx = large_idx; + } else { + code = error_code::SURROGATE; + error_idx = surrogate_idx; + } + output_mask &= ((1 << (2 * error_idx)) - 1); + } + __m512i v1, v2, v; + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + if (simdutf_unlikely(err)) { + return std::make_pair(result(code, buf - start + error_idx), utf16_output); } - - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); - } - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf16_output); - } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf16_output); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { - high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; } + buf += 16; + } + + size_t remaining_len = size_t(end - buf); + if (remaining_len) { + __mmask16 input_mask = __mmask16((1 << remaining_len) - 1); + __m512i in = _mm512_maskz_loadu_epi32(input_mask, buf); + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000) & + input_mask; + if (saturation_bitmask == input_mask) { + __mmask32 forbidden_bytemask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); + } + if (simdutf_unlikely(forbidden_bytemask)) { + int idx = _tzcnt_u32(forbidden_bytemask); + _mm256_mask_storeu_epi16( + utf16_output, __mmask16(_blsmsk_u32(forbidden_bytemask) >> 1), + utf16_packed); + return std::make_pair(result(error_code::SURROGATE, buf - start + idx), + utf16_output + idx); + } + _mm256_mask_storeu_epi16(utf16_output, input_mask, utf16_packed); + utf16_output += remaining_len; + } else { + const __mmask32 output_max_mask = (1 << (remaining_len * 2)) - 1; + __mmask32 output_mask = + (~_pdep_u32(saturation_bitmask, 0xAAAAAAAA)) & output_max_mask; + const __mmask16 surrogate_bitmask = + __mmask16(~saturation_bitmask) & input_mask; + __mmask32 error_surrogate = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + __mmask32 error_too_large = + _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error_surrogate || error_too_large)) { + int large_idx = _tzcnt_u32(error_too_large); + int surrogate_idx = _tzcnt_u32(error_surrogate); + err = true; + if (large_idx < surrogate_idx) { + code = error_code::TOO_LARGE; + error_idx = large_idx; + } else { + code = error_code::SURROGATE; + error_idx = surrogate_idx; + } + output_mask &= ((1 << (2 * error_idx)) - 1); + } + __m512i v1, v2, v; + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + if (simdutf_unlikely(err)) { + return std::make_pair(result(code, buf - start + error_idx), + utf16_output); + } + } + buf += remaining_len; } return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); } /* end file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_ASCII /* begin file src/icelake/icelake_ascii_validation.inl.cpp */ // file included directly @@ -26160,11 +28770,13 @@ bool validate_ascii(const char *buf, size_t len) { return (_mm512_test_epi8_mask(running_or, running_or) == 0); } /* end file src/icelake/icelake_ascii_validation.inl.cpp */ +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/icelake/icelake_utf32_validation.inl.cpp */ // file included directly bool validate_utf32(const char32_t *buf, size_t len) { - if (len == 0) { + if (simdutf_unlikely(len == 0)) { return true; } const char32_t *end = buf + len; @@ -26189,20 +28801,22 @@ bool validate_utf32(const char32_t *buf, size_t len) { const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); - __m512i is_zero = - _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax); - if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { + const auto outside_range = _mm512_cmpgt_epu32_mask(currentmax, standardmax); + if (outside_range != 0) { return false; } - is_zero = _mm512_xor_si512( - _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); - if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { + + const auto surrogate = + _mm512_cmpgt_epu32_mask(currentoffsetmax, standardoffsetmax); + if (surrogate != 0) { return false; } return true; } /* end file src/icelake/icelake_utf32_validation.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 /* begin file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ // file included directly @@ -26312,6 +28926,8 @@ size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, return (size_t)(utf8_output - start); } /* end file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 /* begin file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ // file included directly template @@ -26350,28 +28966,35 @@ size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len, return len; } /* end file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 /* begin file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ -std::pair -avx512_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - - for (size_t i = 0; i < rounded_len; i += 16) { +void avx512_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + while (len >= 16) { // Load 16 Latin1 characters into a 128-bit register - __m128i in = _mm_loadu_si128((__m128i *)&buf[i]); + __m128i in = _mm_loadu_si128((__m128i *)buf); // Zero extend each set of 8 Latin1 characters to 16 32-bit integers using // vpmovzxbd __m512i out = _mm512_cvtepu8_epi32(in); // Store the results back to memory - _mm512_storeu_si512((__m512i *)&utf32_output[i], out); + _mm512_storeu_si512((__m512i *)utf32_output, out); + + len -= 16; + buf += 16; + utf32_output += 16; } - // Return pointers pointing to where we left off - return std::make_pair(buf + rounded_len, utf32_output + rounded_len); + __mmask16 mask = __mmask16((1 << len) - 1); + __m128i in = _mm_maskz_loadu_epi8(mask, buf); + __m512i out = _mm512_cvtepu8_epi32(in); + _mm512_mask_storeu_epi32((__m512i *)utf32_output, mask, out); } /* end file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_BASE64 /* begin file src/icelake/icelake_base64.inl.cpp */ // file included directly /** @@ -26807,6 +29430,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/icelake/icelake_base64.inl.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 #include @@ -26814,9 +29438,121 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } // namespace icelake } // namespace simdutf +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf32.h */ +#include + +namespace simdutf { +namespace icelake { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +size_t utf8_length_from_utf32(const char32_t *input, size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + + const auto one = vector_u32::splat(1); + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use uint32 counters, this is + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace icelake +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + namespace simdutf { namespace icelake { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -26895,7 +29631,9 @@ implementation::detect_encodings(const char *input, } return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -26916,7 +29654,9 @@ implementation::validate_utf8(const char *buf, size_t len) const noexcept { checker.check_eof(); return !checker.errors(); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -26959,7 +29699,9 @@ simdutf_warn_unused result implementation::validate_utf8_with_errors( } return result(error_code::SUCCESS, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { return icelake::validate_ascii(buf, len); @@ -26989,7 +29731,9 @@ simdutf_warn_unused result implementation::validate_ascii_with_errors( } return result(error_code::SUCCESS, len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { @@ -27037,7 +29781,9 @@ implementation::validate_utf16le(const char16_t *buf, } return true; } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { @@ -27210,12 +29956,16 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( } return result(error_code::SUCCESS, len); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { return icelake::validate_utf32(buf, len); } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { const char32_t *buf_orig = buf; @@ -27268,12 +30018,16 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( return result(error_code::SUCCESS, len); } +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { return icelake_convert_latin1_to_utf16(buf, len, @@ -27285,26 +30039,17 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( return icelake_convert_latin1_to_utf16(buf, len, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - avx512_convert_latin1_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; - } - return converted_chars; + avx512_convert_latin1_to_utf32(buf, len, utf32_output); + return len; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { return icelake::utf8_to_latin1_avx512(buf, len, latin1_output); @@ -27332,7 +30077,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { utf8_to_utf16_result ret = @@ -27431,7 +30178,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return saved_bytes; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *buf, size_t len, char32_t *utf32_out) const noexcept { uint32_t *utf32_output = reinterpret_cast(utf32_out); @@ -27563,7 +30312,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( return saved_bytes; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { return icelake_convert_utf16_to_latin1(buf, len, @@ -27603,7 +30354,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( // optimization opportunity: implement custom function return convert_utf16le_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { size_t outlen; @@ -27663,7 +30416,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf16be_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { return icelake_convert_utf32_to_latin1(buf, len, latin1_output); @@ -27679,7 +30434,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { return icelake_convert_utf32_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -27725,7 +30482,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf32_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -27734,15 +30493,6 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( return 0; } size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } return saved_bytes; } @@ -27754,15 +30504,6 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( return 0; } size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } return saved_bytes; } @@ -27773,16 +30514,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( std::pair ret = avx512_convert_utf32_to_utf16_with_errors( buf, len, utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } + if (ret.first.error) { + return ret.first; } ret.first.count = ret.second - @@ -27797,16 +30530,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( std::pair ret = avx512_convert_utf32_to_utf16_with_errors(buf, len, utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } + if (ret.first.error) { + return ret.first; } ret.first.count = ret.second - @@ -27960,7 +30685,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( } return saved_bytes; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { @@ -28037,7 +30764,9 @@ simdutf_warn_unused size_t implementation::count_utf16be( return count + scalar::utf16::count_code_points( ptr, length - (ptr - input)); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { const uint8_t *str = reinterpret_cast(input); @@ -28099,22 +30828,16 @@ implementation::count_utf8(const char *input, size_t length) const noexcept { return answer + scalar::utf8::count_code_points( reinterpret_cast(str + i), length - i); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *buf, size_t len) const noexcept { return count_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return scalar::utf32::latin1_length_from_utf32(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { const char16_t *ptr = input; @@ -28195,7 +30918,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( return count + scalar::utf16::utf8_length_from_utf16( ptr, length - (ptr - input)); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return implementation::count_utf16le(input, length); @@ -28205,17 +30930,9 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return implementation::count_utf16be(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} - -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t length) const noexcept { const uint8_t *str = reinterpret_cast(input); @@ -28291,7 +31008,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( return answer + scalar::latin1::utf8_length_from_latin1( reinterpret_cast(str + i), length - i); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { size_t pos = 0; @@ -28311,43 +31030,16 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf8( return count + scalar::utf8::utf16_length_from_utf8(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const char32_t *ptr = input; - size_t count{0}; - - if (length >= 16) { - const char32_t *end = input + length - 16; - - const __m512i v_0000_007f = _mm512_set1_epi32((uint32_t)0x7f); - const __m512i v_0000_07ff = _mm512_set1_epi32((uint32_t)0x7ff); - const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff); - - while (ptr <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr); - ptr += 16; - __mmask16 ascii_bitmask = _mm512_cmple_epu32_mask(utf32, v_0000_007f); - __mmask16 two_bytes_bitmask = _mm512_mask_cmple_epu32_mask( - _knot_mask16(ascii_bitmask), utf32, v_0000_07ff); - __mmask16 three_bytes_bitmask = _mm512_mask_cmple_epu32_mask( - _knot_mask16(_mm512_kor(ascii_bitmask, two_bytes_bitmask)), utf32, - v_0000_ffff); - - size_t ascii_count = count_ones(ascii_bitmask); - size_t two_bytes_count = count_ones(two_bytes_bitmask); - size_t three_bytes_count = count_ones(three_bytes_bitmask); - size_t four_bytes_count = - 16 - ascii_count - two_bytes_count - three_bytes_count; - count += ascii_count + 2 * two_bytes_count + 3 * three_bytes_count + - 4 * four_bytes_count; - } - } - - return count + - scalar::utf32::utf8_length_from_utf32(ptr, length - (ptr - input)); + return utf32::utf8_length_from_utf32(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { const char32_t *ptr = input; @@ -28371,17 +31063,16 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32( return count + scalar::utf32::utf16_length_from_utf32(ptr, length - (ptr - input)); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { return implementation::count_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -28426,11 +31117,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -28475,11 +31161,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { @@ -28489,6 +31170,7 @@ size_t implementation::binary_to_base64(const char *input, size_t length, return encode_base64(output, input, length, options); } } +#endif // SIMDUTF_FEATURE_BASE64 } // namespace icelake } // namespace simdutf @@ -28510,10 +31192,10 @@ SIMDUTF_POP_DISABLE_WARNINGS #endif #if SIMDUTF_IMPLEMENTATION_HASWELL /* begin file src/haswell/implementation.cpp */ - /* begin file src/simdutf/haswell/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "haswell" // #define SIMDUTF_IMPLEMENTATION haswell +#define SIMDUTF_SIMD_HAS_BYTEMASK #if SIMDUTF_CAN_ALWAYS_RUN_HASWELL // nothing needed. @@ -28528,6 +31210,7 @@ SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) // clang-format on #endif // end of workaround /* end file src/simdutf/haswell/begin.h */ + namespace simdutf { namespace haswell { namespace { @@ -28536,25 +31219,15 @@ namespace { #endif using namespace simd; +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 simdutf_really_inline bool is_ascii(const simd8x64 &input) { return input.reduce_or().is_ascii(); } +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = - prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = - prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = - prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction - // will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > - int8_t(0); -} - +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { @@ -28564,288 +31237,33 @@ must_be_2_3_continuation(const simd8 prev2, prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be > 0x80 return simd8(is_third_byte | is_fourth_byte); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +namespace utf16 { /* begin file src/haswell/avx2_validate_utf16.cpp */ -/* - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We're going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ - -/* Returns: - - pointer to the last unprocessed character (a scalar fallback should check - the rest); - - nullptr if an error was detected. -*/ template -const char16_t *avx2_validate_utf16(const char16_t *input, size_t size) { - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::ELEMENTS * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = simd16(input + simd16::ELEMENTS); - - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } +simd8 utf16_gather_high_bytes(const simd16 &in0, + const simd16 &in1) { + if (big_endian) { + // we want lower bytes + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; + return simd16::pack(t0, t1); + } else { const auto t0 = in0.shr<8>(); const auto t1 = in1.shr<8>(); - const auto in = simd16::pack(t0, t1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - if (surrogates_bitmask == 0x0) { - input += simd16::ELEMENTS * 2; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint32_t V = ~surrogates_bitmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint32_t H = vH.to_bitmask(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint32_t L = ~H & surrogates_bitmask; - - const uint32_t a = - L & (H >> 1); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint32_t b = - a << 1; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint32_t c = V | a | b; // Combine all the masks into the final one. - - if (c == 0xffffffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += simd16::ELEMENTS * 2; - } else if (c == 0x7fffffff) { - // The 31 lower code units of the input register contains valid UTF-16. - // The 31 word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += simd16::ELEMENTS * 2 - 1; - } else { - return nullptr; - } - } + return simd16::pack(t0, t1); } - - return input; -} - -template -const result avx2_validate_utf16_with_errors(const char16_t *input, - size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::ELEMENTS * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = simd16(input + simd16::ELEMENTS); - - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } - - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - - const auto in = simd16::pack(t0, t1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - if (surrogates_bitmask == 0x0) { - input += simd16::ELEMENTS * 2; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint32_t V = ~surrogates_bitmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint32_t H = vH.to_bitmask(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint32_t L = ~H & surrogates_bitmask; - - const uint32_t a = - L & (H >> 1); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint32_t b = - a << 1; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint32_t c = V | a | b; // Combine all the masks into the final one. - - if (c == 0xffffffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += simd16::ELEMENTS * 2; - } else if (c == 0x7fffffff) { - // The 31 lower code units of the input register contains valid UTF-16. - // The 31 word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += simd16::ELEMENTS * 2 - 1; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); } /* end file src/haswell/avx2_validate_utf16.cpp */ -/* begin file src/haswell/avx2_validate_utf32le.cpp */ -/* Returns: - - pointer to the last unprocessed character (a scalar fallback should check - the rest); - - nullptr if an error was detected. -*/ -const char32_t *avx2_validate_utf32le(const char32_t *input, size_t size) { - const char32_t *end = input + size; - - const __m256i standardmax = _mm256_set1_epi32(0x10ffff); - const __m256i offset = _mm256_set1_epi32(0xffff2000); - const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); - __m256i currentmax = _mm256_setzero_si256(); - __m256i currentoffsetmax = _mm256_setzero_si256(); - - while (input + 8 < end) { - const __m256i in = _mm256_loadu_si256((__m256i *)input); - currentmax = _mm256_max_epu32(in, currentmax); - currentoffsetmax = - _mm256_max_epu32(_mm256_add_epi32(in, offset), currentoffsetmax); - input += 8; - } - __m256i is_zero = - _mm256_xor_si256(_mm256_max_epu32(currentmax, standardmax), standardmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { - return nullptr; - } - - is_zero = _mm256_xor_si256( - _mm256_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { - return nullptr; - } - - return input; } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -const result avx2_validate_utf32le_with_errors(const char32_t *input, - size_t size) { - const char32_t *start = input; - const char32_t *end = input + size; - - const __m256i standardmax = _mm256_set1_epi32(0x10ffff); - const __m256i offset = _mm256_set1_epi32(0xffff2000); - const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); - __m256i currentmax = _mm256_setzero_si256(); - __m256i currentoffsetmax = _mm256_setzero_si256(); - - while (input + 8 < end) { - const __m256i in = _mm256_loadu_si256((__m256i *)input); - currentmax = _mm256_max_epu32(in, currentmax); - currentoffsetmax = - _mm256_max_epu32(_mm256_add_epi32(in, offset), currentoffsetmax); - - __m256i is_zero = _mm256_xor_si256( - _mm256_max_epu32(currentmax, standardmax), standardmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { - return result(error_code::TOO_LARGE, input - start); - } - - is_zero = - _mm256_xor_si256(_mm256_max_epu32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { - return result(error_code::SURROGATE, input - start); - } - input += 8; - } - - return result(error_code::SUCCESS, input - start); -} -/* end file src/haswell/avx2_validate_utf32le.cpp */ - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/haswell/avx2_convert_latin1_to_utf8.cpp */ std::pair avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len, @@ -28931,45 +31349,42 @@ avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len, return std::make_pair(latin1_input, utf8_output); } /* end file src/haswell/avx2_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/haswell/avx2_convert_latin1_to_utf16.cpp */ template std::pair avx2_convert_latin1_to_utf16(const char *latin1_input, size_t len, char16_t *utf16_output) { - size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 32 + size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 size_t i = 0; for (; i < rounded_len; i += 16) { // Load 16 bytes from the address (input + i) into a xmm register - __m128i xmm0 = + const __m128i latin1 = _mm_loadu_si128(reinterpret_cast(latin1_input + i)); - // Zero extend each byte in xmm0 to word and put it in another xmm register - __m128i xmm1 = _mm_cvtepu8_epi16(xmm0); - - // Shift xmm0 to the right by 8 bytes - xmm0 = _mm_srli_si128(xmm0, 8); - - // Zero extend each byte in the shifted xmm0 to word in xmm0 - xmm0 = _mm_cvtepu8_epi16(xmm0); + // Zero extend each byte in `in` to word + __m256i utf16 = _mm256_cvtepu8_epi16(latin1); if (big_endian) { - const __m128i swap = + const __m128i swap128 = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - xmm0 = _mm_shuffle_epi8(xmm0, swap); - xmm1 = _mm_shuffle_epi8(xmm1, swap); + const __m256i swap = _mm256_set_m128i(swap128, swap128); + utf16 = _mm256_shuffle_epi8(utf16, swap); } // Store the contents of xmm1 into the address pointed by (output + i) - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + i), xmm1); - - // Store the contents of xmm0 into the address pointed by (output + i + 8) - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + i + 8), xmm0); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf16_output + i), utf16); } return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); } /* end file src/haswell/avx2_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/haswell/avx2_convert_latin1_to_utf32.cpp */ std::pair avx2_convert_latin1_to_utf32(const char *buf, size_t len, @@ -28992,7 +31407,9 @@ avx2_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf + rounded_len, utf32_output + rounded_len); } /* end file src/haswell/avx2_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/haswell/avx2_convert_utf8_to_utf16.cpp */ // depends on "tables/utf8_to_utf16_tables.h" @@ -29190,6 +31607,9 @@ size_t convert_masked_utf8_to_utf16(const char *input, return consumed; } /* end file src/haswell/avx2_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/haswell/avx2_convert_utf8_to_utf32.cpp */ // depends on "tables/utf8_to_utf16_tables.h" @@ -29327,38 +31747,40 @@ size_t convert_masked_utf8_to_utf32(const char *input, return consumed; } /* end file src/haswell/avx2_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/haswell/avx2_convert_utf16_to_latin1.cpp */ template std::pair avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len, char *latin1_output) { const char16_t *end = buf + len; - while (end - buf >= 16) { + while (end - buf >= 32) { // Load 16 UTF-16 characters into 256-bit AVX2 register - __m256i in = _mm256_loadu_si256(reinterpret_cast(buf)); + __m256i in0 = _mm256_loadu_si256(reinterpret_cast(buf)); + __m256i in1 = + _mm256_loadu_si256(reinterpret_cast(buf + 16)); if (!match_system(big_endian)) { const __m256i swap = _mm256_setr_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); + in0 = _mm256_shuffle_epi8(in0, swap); + in1 = _mm256_shuffle_epi8(in1, swap); } __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); - if (_mm256_testz_si256(in, high_byte_mask)) { + if (_mm256_testz_si256(_mm256_or_si256(in0, in1), high_byte_mask)) { // Pack 16-bit characters into 8-bit and store in latin1_output - __m128i lo = _mm256_extractf128_si256(in, 0); - __m128i hi = _mm256_extractf128_si256(in, 1); - __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo); - __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed_lo); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8), - latin1_packed_hi); - // Adjust pointers for next iteration - buf += 16; - latin1_output += 16; + const __m256i packed = _mm256_packus_epi16(in0, in1); + + const __m256i result = _mm256_permute4x64_epi64(packed, 0b11011000); + + _mm256_storeu_si256(reinterpret_cast<__m256i *>(latin1_output), result); + // Adjust pointers for the next iteration + buf += 32; + latin1_output += 32; } else { return std::make_pair(nullptr, reinterpret_cast(latin1_output)); } @@ -29397,9 +31819,8 @@ avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, } else { // Fallback to scalar code for handling errors for (int k = 0; k < 16; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); } else { @@ -29415,6 +31836,9 @@ avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, latin1_output); } /* end file src/haswell/avx2_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/haswell/avx2_convert_utf16_to_utf8.cpp */ /* The vectorized algorithm works on single SSE register i.e., it @@ -29705,7 +32129,7 @@ avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -29719,7 +32143,7 @@ avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -29984,7 +32408,7 @@ avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -29998,7 +32422,7 @@ avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -30019,6 +32443,9 @@ avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } /* end file src/haswell/avx2_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/haswell/avx2_convert_utf16_to_utf32.cpp */ /* The vectorized algorithm works on single SSE register i.e., it @@ -30123,7 +32550,7 @@ avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { // No surrogate pair *utf32_output++ = char32_t(word); @@ -30131,7 +32558,7 @@ avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -30205,7 +32632,7 @@ avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { // No surrogate pair *utf32_output++ = char32_t(word); @@ -30213,7 +32640,7 @@ avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -30231,7 +32658,9 @@ avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); } /* end file src/haswell/avx2_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/haswell/avx2_convert_utf32_to_latin1.cpp */ std::pair avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len, @@ -30239,63 +32668,81 @@ avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len, const size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32 - __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); + const __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); - __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); + for (size_t i = 0; i < rounded_len; i += 4 * 8) { + __m256i a = _mm256_loadu_si256((__m256i *)(buf + 0 * 8)); + __m256i b = _mm256_loadu_si256((__m256i *)(buf + 1 * 8)); + __m256i c = _mm256_loadu_si256((__m256i *)(buf + 2 * 8)); + __m256i d = _mm256_loadu_si256((__m256i *)(buf + 3 * 8)); - for (size_t i = 0; i < rounded_len; i += 16) { - __m256i in1 = _mm256_loadu_si256((__m256i *)buf); - __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8)); - - __m256i check_combined = _mm256_or_si256(in1, in2); + const __m256i check_combined = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { return std::make_pair(nullptr, latin1_output); } - // Turn UTF32 bytes into latin 1 bytes - __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask); - __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask); + b = _mm256_slli_epi32(b, 1 * 8); + c = _mm256_slli_epi32(c, 2 * 8); + d = _mm256_slli_epi32(d, 3 * 8); - // move Latin1 bytes to their correct spot - __m256i idx1 = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, 4, 0); - __m256i idx2 = _mm256_set_epi32(-1, -1, -1, -1, 4, 0, -1, -1); - __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1); - __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2); + // clang-format off - __m256i result = _mm256_or_si256(reshuffled1, reshuffled2); - _mm_storeu_si128((__m128i *)latin1_output, _mm256_castsi256_si128(result)); + // a = [.. .. .. a7|.. .. .. a6|.. .. .. a5|.. .. .. a4||.. .. .. a3|.. .. .. a2|.. .. .. a1|.. .. .. a0] + // b = [.. .. b7 ..|.. .. b6 ..|.. .. b5 ..|.. .. b4 ..||.. .. b3 ..|.. .. b2 ..|.. .. b1 ..|.. .. b0 ..] + // c = [.. c7 .. ..|.. c6 .. ..|.. c5 .. ..|.. c4 .. ..||.. c3 .. ..|.. c2 .. ..|.. c1 .. ..|.. c0 .. ..] + // d = [d7 .. .. ..|d6 .. .. ..|d5 .. .. ..|d4 .. .. ..||d3 .. .. ..|d2 .. .. ..|d1 .. .. ..|d0 .. .. ..] - latin1_output += 16; - buf += 16; + // t0 = [d7 c7 b7 a7|d6 c6 b6 a6|d5 c5 b5 a5|d4 c4 b4 a4||d3 c3 b3 a3|d2 c2 b2 a2|d1 c1 b1 a1|d0 c0 b0 a0] + const __m256i t0 = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); + + // shuffle bytes within 128-bit lanes + // t1 = [d7 d6 d5 d4|c7 c6 c5 c4|b7 b6 b5 b4|a7 a6 a5 a4||d3 d2 d1 d0|c3 c2 c1 c0|b3 b2 b1 b0|a3 a2 a1 a0] + const __m256i shuffle_bytes = + _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + + const __m256i t1 = _mm256_shuffle_epi8(t0, shuffle_bytes); + + // reshuffle dwords + // t2 = [d7 d6 d5 d4|d3 d2 d1 d0|c7 c6 c5 c4|c3 c2 c1 c0||b7 b6 b5 b4|b3 b2 b1 b0|a7 a6 a5 a4|a3 a2 a1 a0] + const __m256i shuffle_dwords = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); + const __m256i t2 = _mm256_permutevar8x32_epi32(t1, shuffle_dwords); +// clang format on + + _mm256_storeu_si256((__m256i *)latin1_output, t2); + + latin1_output += 32; + buf += 32; } return std::make_pair(buf, latin1_output); } + std::pair avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, char *latin1_output) { const size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32 - __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); - __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); - const char32_t *start = buf; - for (size_t i = 0; i < rounded_len; i += 16) { - __m256i in1 = _mm256_loadu_si256((__m256i *)buf); - __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8)); + const __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); - __m256i check_combined = _mm256_or_si256(in1, in2); + for (size_t i = 0; i < rounded_len; i += 4 * 8) { + __m256i a = _mm256_loadu_si256((__m256i *)(buf + 0 * 8)); + __m256i b = _mm256_loadu_si256((__m256i *)(buf + 1 * 8)); + __m256i c = _mm256_loadu_si256((__m256i *)(buf + 2 * 8)); + __m256i d = _mm256_loadu_si256((__m256i *)(buf + 3 * 8)); + + const __m256i check_combined = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { // Fallback to scalar code for handling errors - for (int k = 0; k < 8; k++) { + for (int k = 0; k < 4 * 8; k++) { char32_t codepoint = buf[k]; if (codepoint <= 0xFF) { *latin1_output++ = static_cast(codepoint); @@ -30304,29 +32751,37 @@ avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, latin1_output); } } - buf += 8; - } else { - __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask); - __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask); - - __m256i idx1 = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, 4, 0); - __m256i idx2 = _mm256_set_epi32(-1, -1, -1, -1, 4, 0, -1, -1); - __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1); - __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2); - - __m256i result = _mm256_or_si256(reshuffled1, reshuffled2); - _mm_storeu_si128((__m128i *)latin1_output, - _mm256_castsi256_si128(result)); - - latin1_output += 16; - buf += 16; } + + b = _mm256_slli_epi32(b, 1 * 8); + c = _mm256_slli_epi32(c, 2 * 8); + d = _mm256_slli_epi32(d, 3 * 8); + + const __m256i t0 = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); + + const __m256i shuffle_bytes = + _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + + const __m256i t1 = _mm256_shuffle_epi8(t0, shuffle_bytes); + + const __m256i shuffle_dwords = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); + const __m256i t2 = _mm256_permutevar8x32_epi32(t1, shuffle_dwords); + + _mm256_storeu_si256((__m256i *)latin1_output, t2); + + latin1_output += 32; + buf += 32; } return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } /* end file src/haswell/avx2_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/haswell/avx2_convert_utf32_to_utf8.cpp */ std::pair avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { @@ -30898,6 +33353,9 @@ avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } /* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */ template std::pair @@ -30910,21 +33368,16 @@ avx2_convert_utf32_to_utf16(const char32_t *buf, size_t len, // https://github.com/simdutf/simdutf/issues/92 __m256i forbidden_bytemask = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); + const __m256i in = _mm256_loadu_si256((__m256i *)buf); - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); - - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + if (simdutf_likely(_mm256_testz_si256(in, v_ffff0000))) { + // no bits set above 16th bit <=> can pack to UTF16 + // without surrogate pairs forbidden_bytemask = _mm256_or_si256( forbidden_bytemask, _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); @@ -30997,21 +33450,16 @@ avx2_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); + const __m256i in = _mm256_loadu_si256((__m256i *)buf); - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); - - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + if (simdutf_likely(_mm256_testz_si256(in, v_ffff0000))) { + // no bits set above 16th bit <=> can pack to UTF16 without surrogate + // pairs const __m256i forbidden_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != @@ -31074,7 +33522,9 @@ avx2_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); } /* end file src/haswell/avx2_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/haswell/avx2_convert_utf8_to_latin1.cpp */ // depends on "tables/utf8_to_utf16_tables.h" @@ -31137,7 +33587,9 @@ size_t convert_masked_utf8_to_latin1(const char *input, return consumed; } /* end file src/haswell/avx2_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 /* begin file src/haswell/avx2_base64.cpp */ /** * References and further reading: @@ -31340,162 +33792,21 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } -static inline void compress(__m256i data, uint32_t mask, char *output) { +// --- decoding ----------------------------------------------- + +template +simdutf_really_inline void compress(__m256i data, uint32_t mask, char *output) { if (mask == 0) { _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), data); return; } compress(_mm256_castsi256_si128(data), uint16_t(mask), output); compress(_mm256_extracti128_si256(data, 1), uint16_t(mask >> 16), - output + _mm_popcnt_u32(~mask & 0xFFFF)); + output + count_ones(~mask & 0xFFFF)); } -struct block64 { - __m256i chunks[2]; -}; - -template -static inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) { - const __m256i ascii_space_tbl = - _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, - 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0); - // credit: aqrit - __m256i delta_asso; - if (base64_url) { - delta_asso = - _mm256_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, - 0x0, 0x0, 0xF, 0x0, 0xF, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); - } else { - delta_asso = _mm256_setr_epi8( - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x0F, 0x00, 0x0F, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); - } - - __m256i delta_values; - if (base64_url) { - delta_values = _mm256_setr_epi8( - 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), - uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), uint8_t(0xE0), - uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), - uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), - uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); - } else { - delta_values = _mm256_setr_epi8( - int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), int8_t(0x04), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), int8_t(0x00), - int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), - int8_t(0x00), int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), - int8_t(0xB9), int8_t(0xB9)); - } - __m256i check_asso; - - if (base64_url) { - check_asso = - _mm256_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x3, - 0x7, 0xB, 0xE, 0xB, 0x6, 0xD, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x1, 0x1, 0x1, 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); - } else { - - check_asso = _mm256_setr_epi8( - 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, - 0x0B, 0x0B, 0x0B, 0x0F, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); - } - __m256i check_values; - if (base64_url) { - check_values = _mm256_setr_epi8( - uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), uint8_t(0xA6), - uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, uint8_t(0x80), - 0x0, uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), - uint8_t(0xA6), uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, - uint8_t(0x80), 0x0, uint8_t(0x80)); - } else { - check_values = _mm256_setr_epi8( - int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0xCF), - int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), int8_t(0x86), - int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), int8_t(0x91), - int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), - int8_t(0x86), int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), - int8_t(0x91), int8_t(0x80)); - } - const __m256i shifted = _mm256_srli_epi32(*src, 3); - const __m256i delta_hash = - _mm256_avg_epu8(_mm256_shuffle_epi8(delta_asso, *src), shifted); - const __m256i check_hash = - _mm256_avg_epu8(_mm256_shuffle_epi8(check_asso, *src), shifted); - const __m256i out = - _mm256_adds_epi8(_mm256_shuffle_epi8(delta_values, delta_hash), *src); - const __m256i chk = - _mm256_adds_epi8(_mm256_shuffle_epi8(check_values, check_hash), *src); - const int mask = _mm256_movemask_epi8(chk); - if (!ignore_garbage && mask) { - __m256i ascii_space = - _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src); - *error = (mask ^ _mm256_movemask_epi8(ascii_space)); - } - *src = out; - return (uint32_t)mask; -} - -template -static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { - uint32_t err0 = 0; - uint32_t err1 = 0; - uint64_t m0 = - to_base64_mask(&b->chunks[0], &err0); - uint64_t m1 = - to_base64_mask(&b->chunks[1], &err1); - if (!ignore_garbage) { - *error = err0 | ((uint64_t)err1 << 32); - } - return m0 | (m1 << 32); -} - -static inline void copy_block(block64 *b, char *output) { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), b->chunks[0]); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + 32), b->chunks[1]); -} - -static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t nmask = ~mask; - compress(b->chunks[0], uint32_t(mask), output); - compress(b->chunks[1], uint32_t(mask >> 32), - output + _mm_popcnt_u64(nmask & 0xFFFFFFFF)); - return _mm_popcnt_u64(nmask); -} - -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = _mm256_loadu_si256(reinterpret_cast(src)); - b->chunks[1] = - _mm256_loadu_si256(reinterpret_cast(src + 32)); -} - -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char16_t *src) { - __m256i m1 = _mm256_loadu_si256(reinterpret_cast(src)); - __m256i m2 = _mm256_loadu_si256(reinterpret_cast(src + 16)); - __m256i m3 = _mm256_loadu_si256(reinterpret_cast(src + 32)); - __m256i m4 = _mm256_loadu_si256(reinterpret_cast(src + 48)); - __m256i m1p = _mm256_permute2x128_si256(m1, m2, 0x20); - __m256i m2p = _mm256_permute2x128_si256(m1, m2, 0x31); - __m256i m3p = _mm256_permute2x128_si256(m3, m4, 0x20); - __m256i m4p = _mm256_permute2x128_si256(m3, m4, 0x31); - b->chunks[0] = _mm256_packus_epi16(m1p, m2p); - b->chunks[1] = _mm256_packus_epi16(m3p, m4p); -} - -static inline void base64_decode(char *out, __m256i str) { +template +simdutf_really_inline void base64_decode(char *out, __m256i str) { // credit: aqrit const __m256i pack_shuffle = _mm256_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, @@ -31508,14 +33819,18 @@ static inline void base64_decode(char *out, __m256i str) { _mm_storeu_si128((__m128i *)out, _mm256_castsi256_si128(t2)); _mm_storeu_si128((__m128i *)(out + 12), _mm256_extracti128_si256(t2, 1)); } -// decode 64 bytes and output 48 bytes -static inline void base64_decode_block(char *out, const char *src) { + +template +simdutf_really_inline void base64_decode_block(char *out, const char *src) { base64_decode(out, _mm256_loadu_si256(reinterpret_cast(src))); base64_decode(out + 24, _mm256_loadu_si256( reinterpret_cast(src + 32))); } -static inline void base64_decode_block_safe(char *out, const char *src) { + +template +simdutf_really_inline void base64_decode_block_safe(char *out, + const char *src) { base64_decode(out, _mm256_loadu_si256(reinterpret_cast(src))); char buffer[32]; // We enforce safety with a buffer. @@ -31523,219 +33838,246 @@ static inline void base64_decode_block_safe(char *out, const char *src) { buffer, _mm256_loadu_si256(reinterpret_cast(src + 32))); std::memcpy(out + 24, buffer, 24); } -static inline void base64_decode_block(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - base64_decode(out + 24, b->chunks[1]); -} -static inline void base64_decode_block_safe(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - char buffer[32]; // We enforce safety with a buffer. - base64_decode(buffer, b->chunks[1]); - std::memcpy(out + 24, buffer, 24); -} -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (!ignore_garbage && srclen > 0 && - scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - char *end_of_safe_64byte_zone = - (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; +// --- decoding - base64 class -------------------------------- - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; +class block64 { + __m256i chunks[2]; - constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block_size must be at least two"); - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - uint64_t error = 0; - uint64_t badcharmask = - to_base64_mask(&b, &error); - if (!ignore_garbage && error) { - src -= 64; - size_t error_offset = _tzcnt_u64(error); - return {error_code::INVALID_BASE64_CHARACTER, - size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else if (bufferptr != buffer) { - copy_block(&b, bufferptr); - bufferptr += 64; - } else { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, &b); - } else { - base64_decode_block(dst, &b); - } - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 2); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); - } else { - base64_decode_block(dst, buffer + (block_size - 2) * 64); - } - dst += 48; - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } +public: + // The caller of this function is responsible to ensure that there are 64 + // bytes available from reading at src. + simdutf_really_inline block64(const char *src) { + chunks[0] = _mm256_loadu_si256(reinterpret_cast(src)); + chunks[1] = _mm256_loadu_si256(reinterpret_cast(src + 32)); } - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { + // The caller of this function is responsible to ensure that there are 128 + // bytes available from reading at src. + simdutf_really_inline block64(const char16_t *src) { + const auto m1 = _mm256_loadu_si256(reinterpret_cast(src)); + const auto m2 = + _mm256_loadu_si256(reinterpret_cast(src + 16)); + const auto m3 = + _mm256_loadu_si256(reinterpret_cast(src + 32)); + const auto m4 = + _mm256_loadu_si256(reinterpret_cast(src + 48)); - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if (!ignore_garbage && - (!scalar::base64::is_eight_byte(*src) || val > 64)) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } + const auto m1p = _mm256_permute2x128_si256(m1, m2, 0x20); + const auto m2p = _mm256_permute2x128_si256(m1, m2, 0x31); + const auto m3p = _mm256_permute2x128_si256(m3, m4, 0x20); + const auto m4p = _mm256_permute2x128_si256(m3, m4, 0x31); + + chunks[0] = _mm256_packus_epi16(m1p, m2p); + chunks[1] = _mm256_packus_epi16(m3p, m4p); } - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer_start); + simdutf_really_inline void copy_block(char *output) { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), chunks[0]); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + 32), chunks[1]); + } + + // decode 64 bytes and output 48 bytes + simdutf_really_inline void base64_decode_block(char *out) { + base64_decode(out, chunks[0]); + base64_decode(out + 24, chunks[1]); + } + + simdutf_really_inline void base64_decode_block_safe(char *out) { + base64_decode(out, chunks[0]); + char buffer[32]; // We enforce safety with a buffer. + base64_decode(buffer, chunks[1]); + std::memcpy(out + 24, buffer, 24); + } + + template + simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { + uint32_t err0 = 0; + uint32_t err1 = 0; + uint64_t m0 = to_base64_mask(&chunks[0], &err0); + uint64_t m1 = to_base64_mask(&chunks[1], &err1); + if (!ignore_garbage) { + *error = err0 | ((uint64_t)err1 << 32); + } + return m0 | (m1 << 32); + } + + template + simdutf_really_inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) { + const __m256i ascii_space_tbl = + _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, + 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0); + // credit: aqrit + __m256i delta_asso; + if (base64_url) { + delta_asso = _mm256_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xF, 0x0, 0xF); } else { - base64_decode_block(dst, buffer_start); + delta_asso = _mm256_setr_epi8( + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0F, 0x00, 0x0F, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); } - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 4); - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; + __m256i delta_values; + if (base64_url) { + delta_values = _mm256_setr_epi8( + 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), + uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), uint8_t(0xE0), + uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), + uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), + uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); } else { - r.output_count += size_t(dst - dstinit); + delta_values = _mm256_setr_epi8( + int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), int8_t(0x04), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), int8_t(0x00), + int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), + int8_t(0x00), int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), + int8_t(0xB9), int8_t(0xB9)); } - if (!ignore_garbage && last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } + + __m256i check_asso; + if (base64_url) { + check_asso = _mm256_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6, 0xD, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x3, + 0x7, 0xB, 0xE, 0xB, 0x6); + } else { + check_asso = _mm256_setr_epi8( + 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, + 0x07, 0x0B, 0x0B, 0x0B, 0x0F, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); } - return r; + __m256i check_values; + if (base64_url) { + check_values = _mm256_setr_epi8( + uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), uint8_t(0xA6), + uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, uint8_t(0x80), + 0x0, uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), + uint8_t(0xA6), uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, + uint8_t(0x80), 0x0, uint8_t(0x80)); + } else { + check_values = _mm256_setr_epi8( + int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0xCF), + int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), int8_t(0x86), + int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), int8_t(0x91), + int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), + int8_t(0x86), int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), + int8_t(0x91), int8_t(0x80)); + } + const __m256i shifted = _mm256_srli_epi32(*src, 3); + const __m256i delta_hash = + _mm256_avg_epu8(_mm256_shuffle_epi8(delta_asso, *src), shifted); + const __m256i check_hash = + _mm256_avg_epu8(_mm256_shuffle_epi8(check_asso, *src), shifted); + const __m256i out = + _mm256_adds_epi8(_mm256_shuffle_epi8(delta_values, delta_hash), *src); + const __m256i chk = + _mm256_adds_epi8(_mm256_shuffle_epi8(check_values, check_hash), *src); + const int mask = _mm256_movemask_epi8(chk); + if (!ignore_garbage && mask) { + __m256i ascii_space = + _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src); + *error = (mask ^ _mm256_movemask_epi8(ascii_space)); + } + *src = out; + return (uint32_t)mask; } - if (!ignore_garbage && equalsigns > 0) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + + simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { + if (is_power_of_two(mask)) { + return compress_block_single(mask, output); } + + uint64_t nmask = ~mask; + compress(chunks[0], uint32_t(mask), output); + compress(chunks[1], uint32_t(mask >> 32), + output + count_ones(nmask & 0xFFFFFFFF)); + return count_ones(nmask); } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} + + simdutf_really_inline size_t compress_block_single(uint64_t mask, + char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + switch (pos64 >> 4) { + case 0b00: { + const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0); + const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane0, sh); + + _mm_storeu_si128((__m128i *)(output + 0 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 1 * 16 - 1), lane1); + _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]); + } break; + case 0b01: { + const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0); + const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1); + _mm_storeu_si128((__m128i *)(output + 0 * 16), lane0); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane1, sh); + + _mm_storeu_si128((__m128i *)(output + 1 * 16), compressed); + _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]); + } break; + case 0b10: { + const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0); + const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1); + + _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane2, sh); + + _mm_storeu_si128((__m128i *)(output + 2 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), lane3); + } break; + case 0b11: { + const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0); + const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1); + + _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]); + _mm_storeu_si128((__m128i *)(output + 2 * 16), lane2); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane3, sh); + + _mm_storeu_si128((__m128i *)(output + 3 * 16), compressed); + } break; + } + + return 63; + } +}; /* end file src/haswell/avx2_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 } // unnamed namespace } // namespace haswell @@ -31852,6 +34194,7 @@ simdutf_really_inline void buf_block_reader::advance() { } // namespace haswell } // namespace simdutf /* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { namespace haswell { @@ -32158,9 +34501,22 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { reinterpret_cast(input), length); } -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +} // namespace utf8_validation +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace ascii_validation { + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); uint8_t blocks[64]{}; simd::simd8x64 running_or(blocks); while (reader.has_full_block()) { @@ -32175,14 +34531,8 @@ bool generic_validate_ascii(const uint8_t *input, size_t length) { return running_or.is_ascii(); } -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} - -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); size_t count{0}; while (reader.has_full_block()) { simd::simd8x64 in(reader.full_block()); @@ -32207,19 +34557,16 @@ result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { } } -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation +} // namespace ascii_validation } // unnamed namespace } // namespace haswell } // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -// transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ namespace simdutf { namespace haswell { namespace { @@ -32296,7 +34643,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ - namespace simdutf { namespace haswell { namespace { @@ -32630,9 +34976,11 @@ struct validating_transcoder { } // namespace haswell } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ namespace simdutf { namespace haswell { namespace { @@ -32677,7 +35025,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ - namespace simdutf { namespace haswell { namespace { @@ -32997,9 +35344,119 @@ struct validating_transcoder { } // namespace haswell } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -// other functions -/* begin file src/generic/utf8.h */ +/* begin file src/generic/utf32.h */ +#include +namespace simdutf { +namespace haswell { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +size_t utf8_length_from_utf32(const char32_t *input, size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + + const auto one = vector_u32::splat(1); + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use uint32 counters, this is + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +// other functions +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ namespace simdutf { namespace haswell { namespace { @@ -33018,6 +35475,59 @@ simdutf_really_inline size_t count_code_points(const char *in, size_t size) { return count + scalar::utf8::count_code_points(in + pos, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif + simdutf_really_inline size_t utf16_length_from_utf8(const char *in, size_t size) { size_t pos = 0; @@ -33039,6 +35549,9 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } // namespace haswell } // namespace simdutf /* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16.h */ namespace simdutf { namespace haswell { @@ -33088,6 +35601,89 @@ simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + template simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, size_t size) { @@ -33114,10 +35710,146 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace haswell } // namespace simdutf /* end file src/generic/utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- -// transcoding from UTF-8 to Latin 1 + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 /* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ - namespace simdutf { namespace haswell { namespace { @@ -33436,7 +36168,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ - namespace simdutf { namespace haswell { namespace { @@ -33516,10 +36247,366 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf32.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf32 { + +simdutf_really_inline bool validate(const char32_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return true; + } + + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + auto currentmax = vector_u32::zero(); + auto currentoffsetmax = vector_u32::zero(); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + currentmax = max(currentmax, in); + currentoffsetmax = max(currentoffsetmax, in + offset); + input += N; + } + + const auto too_large = currentmax > standardmax; + if (too_large.any()) { + return false; + } + + const auto surrogate = currentoffsetmax > standardoffsetmax; + if (surrogate.any()) { + return false; + } + + return scalar::utf32::validate(input, end - input); +} + +simdutf_really_inline result validate_with_errors(const char32_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return result(error_code::SUCCESS, 0); + } + + const char32_t *start = input; + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + const auto too_large = in > standardmax; + const auto surrogate = (in + offset) > standardoffsetmax; + + const auto combined = too_large | surrogate; + if (simdutf_unlikely(combined.any())) { + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; + } + + input += N; + } + + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; +} + +} // namespace utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/validate_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/generic/base64.h */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ +namespace simdutf { +namespace haswell { +namespace { +namespace base64 { + +/* + The following template function implements API for Base64 decoding. + + An implementation is responsible for providing the `block64` type and + associated methods that perform actual conversion. Please refer + to any vectorized implementation to learn the API of these procedures. +*/ +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + size_t equallocation = + srclen; // location of the first padding character if any + // skip trailing spaces + while (!ignore_garbage && srclen > 0 && + scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + size_t equalsigns = 0; + if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + char *end_of_safe_64byte_zone = + (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; + + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b(src); + src += 64; + uint64_t error = 0; + const uint64_t badcharmask = + b.to_base64_mask(&error); + if (!ignore_garbage && error) { + src -= 64; + const size_t error_offset = trailing_zeroes(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + bufferptr += b.compress_block(badcharmask, bufferptr); + } else if (bufferptr != buffer) { + b.copy_block(bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + b.base64_decode_block_safe(dst); + } else { + b.base64_decode_block(dst); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!ignore_garbage && + (!scalar::base64::is_eight_byte(*src) || val > 64)) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + return r; + } else { + r.output_count += size_t(dst - dstinit); + } + if (!ignore_garbage && last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + r.input_count = equallocation; + } + } + return r; + } + if (!ignore_garbage && equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} + +} // namespace base64 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/base64.h */ +#endif // SIMDUTF_FEATURE_BASE64 namespace simdutf { namespace haswell { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -33628,27 +36715,36 @@ implementation::detect_encodings(const char *input, } return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { return haswell::utf8_validation::generic_validate_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { return haswell::utf8_validation::generic_validate_utf8_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_ascii(buf, len); + return haswell::ascii_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_ascii_with_errors(buf, len); + return haswell::ascii_validation::generic_validate_ascii_with_errors(buf, + len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { @@ -33657,15 +36753,22 @@ implementation::validate_utf16le(const char16_t *buf, // handling nullptr return true; } - const char16_t *tail = avx2_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, - len - (tail - buf)); - } else { + const auto res = + haswell::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } -} + if (res.count == len) { + return true; + } + + return scalar::utf16::validate(buf + res.count, + len - res.count); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { @@ -33674,20 +36777,29 @@ implementation::validate_utf16be(const char16_t *buf, // handling nullptr return true; } - const char16_t *tail = avx2_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, len - (tail - buf)); - } else { + const auto res = + haswell::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } + + if (res.count == len) { + return true; + } + + return scalar::utf16::validate(buf + res.count, + len - res.count); } simdutf_warn_unused result implementation::validate_utf16le_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = avx2_validate_utf16_with_errors(buf, len); + + const result res = + haswell::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); return result(scalar_res.error, res.count + scalar_res.count); } else { return res; @@ -33696,48 +36808,34 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( simdutf_warn_unused result implementation::validate_utf16be_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = avx2_validate_utf16_with_errors(buf, len); + const result res = + haswell::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); + const result scalar_res = + scalar::utf16::validate_with_errors(buf + res.count, + len - res.count); return result(scalar_res.error, res.count + scalar_res.count); } else { return res; } } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return true; - } - const char32_t *tail = avx2_validate_utf32le(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - return false; - } + return utf32::validate(buf, len); } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return result(error_code::SUCCESS, 0); - } - result res = avx2_validate_utf32le_with_errors(buf, len); - if (res.count != len) { - result scalar_res = - scalar::utf32::validate_with_errors(buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } + return utf32::validate_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -33752,7 +36850,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -33792,7 +36892,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -33811,7 +36913,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { utf8_to_latin1::validating_transcoder converter; @@ -33828,7 +36932,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *input, size_t size, char *latin1_output) const noexcept { return utf8_to_latin1::convert_valid(input, size, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { utf8_to_utf16::validating_transcoder converter; @@ -33865,7 +36971,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return utf8_to_utf16::convert_valid(input, size, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { utf8_to_utf32::validating_transcoder converter; @@ -33882,7 +36990,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( const char *input, size_t size, char32_t *utf32_output) const noexcept { return utf8_to_utf32::convert_valid(input, size, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -33990,7 +37100,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( // optimization opportunity: implement a custom function return convert_utf16le_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -34098,7 +37210,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf16be_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -34117,7 +37231,9 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( } return saved_bytes; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -34163,7 +37279,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { return convert_utf32_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( const char32_t *buf, size_t len, char *utf8_output) const noexcept { // ret.first.count is always the position in the buffer, not the number of @@ -34185,7 +37303,9 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( utf8_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -34283,12 +37403,16 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( utf32_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf32_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -34396,7 +37520,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { return convert_utf16be_to_utf32(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { @@ -34412,37 +37538,36 @@ simdutf_warn_unused size_t implementation::count_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); +implementation::count_utf8(const char *in, size_t size) const noexcept { + return utf8::count_code_points_bytemask(in, size); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *buf, size_t len) const noexcept { return count_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return scalar::utf32::latin1_length_from_utf32(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); + return utf16::utf8_length_from_utf16_bytemask(input, + length); } simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); + return utf16::utf8_length_from_utf16_bytemask(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); @@ -34452,22 +37577,16 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::utf16_length_from_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t len) const noexcept { const uint8_t *data = reinterpret_cast(input); @@ -34523,43 +37642,16 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( return answer + scalar::latin1::utf8_length_from_latin1( reinterpret_cast(data + i), len - i); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffffff80 = _mm256_set1_epi32((uint32_t)0xffffff80); - const __m256i v_fffff800 = _mm256_set1_epi32((uint32_t)0xfffff800); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - size_t pos = 0; - size_t count = 0; - for (; pos + 8 <= length; pos += 8) { - __m256i in = _mm256_loadu_si256((__m256i *)(input + pos)); - const __m256i ascii_bytes_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffffff80), v_00000000); - const __m256i one_two_bytes_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_fffff800), v_00000000); - const __m256i two_bytes_bytemask = - _mm256_xor_si256(one_two_bytes_bytemask, ascii_bytes_bytemask); - const __m256i one_two_three_bytes_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const __m256i three_bytes_bytemask = - _mm256_xor_si256(one_two_three_bytes_bytemask, one_two_bytes_bytemask); - const uint32_t ascii_bytes_bitmask = - static_cast(_mm256_movemask_epi8(ascii_bytes_bytemask)); - const uint32_t two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(two_bytes_bytemask)); - const uint32_t three_bytes_bitmask = - static_cast(_mm256_movemask_epi8(three_bytes_bytemask)); - - size_t ascii_count = count_ones(ascii_bytes_bitmask) / 4; - size_t two_bytes_count = count_ones(two_bytes_bitmask) / 4; - size_t three_bytes_count = count_ones(three_bytes_bitmask) / 4; - count += 32 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; - } - return count + - scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); + return utf32::utf8_length_from_utf32(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { const __m256i v_00000000 = _mm256_setzero_si256(); @@ -34578,35 +37670,34 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32( return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } @@ -34616,46 +37707,41 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } @@ -34665,28 +37751,23 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { @@ -34696,6 +37777,7 @@ size_t implementation::binary_to_base64(const char *input, size_t length, return encode_base64(output, input, length, options); } } +#endif // SIMDUTF_FEATURE_BASE64 } // namespace haswell } // namespace simdutf @@ -34706,6 +37788,7 @@ size_t implementation::binary_to_base64(const char *input, size_t length, SIMDUTF_UNTARGET_REGION #endif +#undef SIMDUTF_SIMD_HAS_BYTEMASK #if SIMDUTF_GCC11ORMORE // workaround for // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 @@ -34716,15 +37799,1025 @@ SIMDUTF_POP_DISABLE_WARNINGS #endif #if SIMDUTF_IMPLEMENTATION_PPC64 /* begin file src/ppc64/implementation.cpp */ - - - - - /* begin file src/simdutf/ppc64/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "ppc64" // #define SIMDUTF_IMPLEMENTATION ppc64 /* end file src/simdutf/ppc64/begin.h */ + +/* begin file src/ppc64/ppc64_utf16_to_utf8_tables.h */ +// Code generated automatically; DO NOT EDIT +// file generated by scripts/ppc64_convert_utf16_to_utf8.py +#ifndef PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H +#define PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H + +namespace simdutf { +namespace { +namespace tables { +namespace ppc64_utf16_to_utf8 { + +#if SIMDUTF_IS_BIG_ENDIAN +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_3_utf8_bytes[256][17] = { + {12, 1, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80}, + {9, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 17, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 1, 0, 16, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 1, 0, 16, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 16, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 17, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 0, 16, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 0, 16, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 1, 0, 16, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 16, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 17, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 1, 0, 16, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 0, 16, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 0, 16, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 1, 0, 16, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 1, 0, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {2, 0, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {5, 1, 0, 16, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 16, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 0, 16, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 0, 16, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 17, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 1, 0, 16, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 16, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 1, 0, 16, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 0, 16, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 17, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 1, 0, 16, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 0, 16, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 0, 16, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {11, 1, 0, 16, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 16, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 17, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 1, 0, 16, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 0, 16, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 1, 0, 16, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 16, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {10, 1, 0, 16, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 1, 0, 16, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 1, 0, 16, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 1, 0, 16, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 1, 0, 16, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 0, 16, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 17, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 1, 0, 16, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 0, 16, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 0, 16, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {9, 1, 0, 16, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 0, 16, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 1, 0, 16, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 0, 16, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, +}; +#else +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_3_utf8_bytes[256][17] = { + {12, 0, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80}, + {9, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 16, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 1, 17, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 0, 1, 17, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 17, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 16, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 1, 17, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 1, 17, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 0, 1, 17, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 17, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 16, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 0, 1, 17, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 1, 17, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 1, 17, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 1, 17, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 0, 1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {2, 1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {5, 0, 1, 17, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 17, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 1, 17, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 1, 17, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 16, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 1, 17, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 17, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 1, 17, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 1, 17, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 16, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 0, 1, 17, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 1, 17, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 1, 17, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {11, 0, 1, 17, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 17, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 16, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 0, 1, 17, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 1, 17, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 1, 17, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 17, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {10, 0, 1, 17, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 1, 17, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 0, 1, 17, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 1, 17, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 1, 17, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 1, 17, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 16, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 0, 1, 17, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 1, 17, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 1, 17, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {9, 0, 1, 17, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 1, 17, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 1, 17, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 1, 17, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, +}; +#endif // SIMDUTF_IS_BIG_ENDIAN +} // namespace ppc64_utf16_to_utf8 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H +/* end file src/ppc64/ppc64_utf16_to_utf8_tables.h */ + namespace simdutf { namespace ppc64 { namespace { @@ -34738,21 +38831,6 @@ simdutf_really_inline bool is_ascii(const simd8x64 &input) { return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); } -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = - prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = - prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = - prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction - // will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > - int8_t(0); -} - simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { @@ -34765,10 +38843,2023 @@ must_be_2_3_continuation(const simd8 prev2, return simd8(is_third_byte | is_fourth_byte); } +/// ErrorReporting describes behaviour of a vectorized procedure regarding error +/// checking +enum class ErrorReporting { + precise, // the procedure will report *approximate* or *precise* error + // position + at_the_end, // the procedure will only inform about an error after scanning + // the whole input (or its significant portion) + none, // no error checking is done, we assume valid inputs +}; + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_validate_utf16.cpp */ +template +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + const vec_u8_t pack_high = { + 0, 2, 4, 6, 8, 10, 12, 14, // in0 + 16, 18, 20, 22, 24, 26, 28, 30 // in1 + }; + + return vec_perm(vec_u8_t(in0.value), vec_u8_t(in1.value), pack_high); + } else { + const vec_u8_t pack_high = { + 1, 3, 5, 7, 9, 11, 13, 15, // in0 + 17, 19, 21, 23, 25, 27, 29, 31 // in1 + }; + + return vec_perm(vec_u8_t(in0.value), vec_u8_t(in1.value), pack_high); + } +} +/* end file src/ppc64/ppc64_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF8 +/* begin file src/ppc64/ppc64_convert_latin1_to_utf8.cpp */ +/* + * reads a vector of uint16 values + * bits after 11th are ignored + * first 11 bits are encoded into utf8 + * !important! utf8_output must have at least 16 writable bytes + */ +simdutf_really_inline void +write_v_u16_11bits_to_utf8(const vector_u16 v_u16, char *&utf8_output, + const vector_u8 one_byte_bytemask, + const uint16_t one_byte_bitmask) { + + // 0b1100_0000_1000_0000 + const auto v_c080 = vector_u16(0xc080); + // 0b0011_1111_0000_0000 + const auto v_1f00 = vector_u16(0x1f00); + // 0b0000_0000_0011_1111 + const auto v_003f = vector_u16(0x003f); + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [0000|0000|00bb|bbbb] + const auto t0 = v_u16 & v_003f; + // t1 = [000a|aaaa|bbbb|bb00] + const auto t1 = v_u16.shl<2>(); + // t2 = [000a|aaaa|00bb|bbbb] + const auto t2 = select(v_1f00, t1, t0); + // t3 = [110a|aaaa|10bb|bbbb] + const auto t3 = t2 | v_c080; + + // 2. merge ASCII and 2-byte codewords + const auto utf8_unpacked1 = + select(one_byte_bytemask, as_vector_u8(v_u16), as_vector_u8(t3)); + +#if SIMDUTF_IS_BIG_ENDIAN + const auto tmp = as_vector_u16(utf8_unpacked1).swap_bytes(); +#else + const auto tmp = as_vector_u16(utf8_unpacked1); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto utf8_unpacked = as_vector_u8(tmp); + + // 3. prepare bitmask for 8-bit lookup + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a + // - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const auto shuffle = vector_u8::load(row + 1); + const auto utf8_packed = shuffle.lookup_16(utf8_unpacked); + + // 5. store bytes + utf8_packed.store(utf8_output); + + // 6. adjust pointers + utf8_output += row[0]; +} + +inline void write_v_u16_11bits_to_utf8(const vector_u16 v_u16, + char *&utf8_output, + const vector_u16 v_0000, + const vector_u16 v_ff80) { + // no bits set above 7th bit + const auto one_byte_bytemask = (v_u16 & v_ff80) == v_0000; + const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); + + write_v_u16_11bits_to_utf8(v_u16, utf8_output, + as_vector_u8(one_byte_bytemask), one_byte_bitmask); +} + +std::pair +ppc64_convert_latin1_to_utf8(const char *latin_input, + const size_t latin_input_length, + char *utf8_output) { + const char *end = latin_input + latin_input_length; + + const auto v_0000 = vector_u16::zero(); + const auto v_00 = vector_u8::zero(); + + // 0b1111_1111_1000_0000 + const auto v_ff80 = vector_u16(0xff80); + +#if SIMDUTF_IS_BIG_ENDIAN + const auto latin_1_half_into_u16_byte_mask = + vector_u8(16, 0, 16, 1, 16, 2, 16, 3, 16, 4, 16, 5, 16, 6, 16, 7); + const auto latin_2_half_into_u16_byte_mask = + vector_u8(16, 8, 16, 9, 16, 10, 16, 11, 16, 12, 16, 13, 16, 14, 16, 15); +#else + const auto latin_1_half_into_u16_byte_mask = + vector_u8(0, 16, 1, 16, 2, 16, 3, 16, 4, 16, 5, 16, 6, 16, 7, 16); + const auto latin_2_half_into_u16_byte_mask = + vector_u8(8, 16, 9, 16, 10, 16, 11, 16, 12, 16, 13, 16, 14, 16, 15, 16); +#endif // SIMDUTF_IS_BIG_ENDIAN + + // each latin1 takes 1-2 utf8 bytes + // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then + // adjust the pointer) so the last write can exceed the utf8_output size by + // 8-1 bytes by reserving 8 extra input bytes, we expect the output to have + // 8-16 bytes free + while (end - latin_input >= 16 + 8) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + const auto v_latin = vector_u8::load(latin_input); + + if (v_latin.is_ascii()) { // ASCII fast path!!!! + v_latin.store(utf8_output); + latin_input += 16; + utf8_output += 16; + continue; + } + + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + const vector_u16 v_u16_latin_1_half = + as_vector_u16(latin_1_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); + + // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB + const vector_u16 v_u16_latin_2_half = + as_vector_u16(latin_2_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); + + write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, v_ff80); + write_v_u16_11bits_to_utf8(v_u16_latin_2_half, utf8_output, v_0000, v_ff80); + latin_input += 16; + } + + if (end - latin_input >= 16) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + const auto v_latin = vector_u8::load(latin_input); + + if (v_latin.is_ascii()) { // ASCII fast path!!!! + v_latin.store(utf8_output); + latin_input += 16; + utf8_output += 16; + } else { + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + const auto v_u16_latin_1_half = as_vector_u16( + latin_1_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); + + write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, + v_ff80); + latin_input += 8; + } + } + + return std::make_pair(latin_input, utf8_output); +} +/* end file src/ppc64/ppc64_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_convert_latin1_to_utf16.cpp */ +template +size_t ppc64_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + const size_t rounded_len = align_down(len); + + for (size_t i = 0; i < rounded_len; i += vector_u8::ELEMENTS) { + const auto in = vector_u8::load(&latin1_input[i]); + in.store_bytes_as_utf16(&utf16_output[i]); + } + + return rounded_len; +} +/* end file src/ppc64/ppc64_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_latin1_to_utf32.cpp */ +std::pair +ppc64_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const size_t rounded_len = align_down(len); + + for (size_t i = 0; i < rounded_len; i += vector_u8::ELEMENTS) { + const auto in = vector_u8::load(&buf[i]); + in.store_bytes_as_utf32(&utf32_output[i]); + } + + return std::make_pair(buf + rounded_len, utf32_output + rounded_len); +} +/* end file src/ppc64/ppc64_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_convert_utf8_to_latin1.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + const auto in = vector_u8::load(input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & + 0xfff; // we are only processing 12 bytes in case it is not all ASCII + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + in.store(latin1_output); + latin1_output += 12; // We wrote 12 characters. + return 12; // We consumed 12 bytes. + } + /// We do not have a fast path available, so we fallback. + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small lookup + // table. + + const auto reshuffle = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); + const auto perm8 = reshuffle.lookup_32(in, vector_u8::zero()); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm16 = as_vector_u16(perm8).swap_bytes(); +#else + const auto perm16 = as_vector_u16(perm8); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm16 & uint16_t(0x7f); + const auto highbyte = perm16 & uint16_t(0x1f00); + const auto composed = ascii | highbyte.shr<2>(); + + const auto latin1_packed = vector_u16::pack(composed, composed); +#if defined(__clang__) + __attribute__((aligned(16))) char buf[16]; + latin1_packed.store(buf); + memcpy(latin1_output, buf, 6); +#else + // writing 8 bytes even though we only care about the first 6 bytes. + const auto tmp = vec_u64_t(latin1_packed.value); + memcpy(latin1_output, &tmp[0], 8); +#endif + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/ppc64/ppc64_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_convert_utf8_to_utf16.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const auto in = vector_u8::load(input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + // Note: using 16 bytes is unsafe, see issue_ossfuzz_71218 + in.store_bytes_as_utf16(utf16_output); + utf16_output += 12; // We wrote 12 16-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte + // UTF-16 code units. +#if SIMDUTF_IS_BIG_ENDIAN + const auto in16 = as_vector_u16(in); +#else + const auto in16 = as_vector_u16(in).swap_bytes(); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto lo = in16 & uint16_t(0x007f); + const auto hi = in16.shr<2>(); + + auto composed = select(uint16_t(0x1f00 >> 2), hi, lo); + if (!match_system(big_endian)) { + composed = composed.swap_bytes(); + } + + composed.store(utf16_output); + utf16_output += 8; // We wrote 16 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + + // AltiVec: it might be done better, for now SSE translation + + const auto sh = + vector_u8(2, 1, 0, 16, 5, 4, 3, 16, 8, 7, 6, 16, 11, 10, 9, 16); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto b0 = perm & uint32_t(0x0000007f); + const auto b1 = select(uint32_t(0x00003f00 >> 2), perm.shr<2>(), b0); + const auto b2 = select(uint32_t(0x000f0000 >> 4), perm.shr<4>(), b1); + const auto composed = b2; + auto packed = vector_u32::pack(composed, composed); + + if (!match_system(big_endian)) { + packed = packed.swap_bytes(); + } + + packed.store(utf16_output); + utf16_output += 4; + return 12; + } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto b0 = perm & uint16_t(0x007f); + const auto b1 = perm & uint16_t(0x1f00); + + auto composed = b0 | b1.shr<2>(); + + if (!match_system(big_endian)) { + composed = composed.swap_bytes(); + } + + composed.store(utf16_output); + utf16_output += 6; // We wrote 12 bytes, 6 code points. + } else if (idx < 145) { + // FOUR (4) input code-code units + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto b0 = perm & uint32_t(0x0000007f); + const auto b1 = perm & uint32_t(0x00003f00); + const auto b2 = perm & uint32_t(0x000f0000); + + const auto composed = b0 | b1.shr<2>() | b2.shr<4>(); + + auto packed = vector_u32::pack(composed, composed); + + if (!match_system(big_endian)) { + packed = packed.swap_bytes(); + } + + packed.store(utf16_output); + utf16_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + ////////////// + // There might be garbage inputs where a leading byte mascarades as a + // four-byte leading byte (by being followed by 3 continuation byte), but is + // not greater than 0xf0. This could trigger a buffer overflow if we only + // counted leading bytes of the form 0xf0 as generating surrogate pairs, + // without further UTF-8 validation. Thus we must be careful to ensure that + // only leading bytes at least as large as 0xf0 generate surrogate pairs. We + // do as at the cost of an extra mask. + ///////////// + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint32_t(0x00000007f); + const auto middlebyte = perm & uint32_t(0x00003f00); + const auto middlebyte_shifted = middlebyte.shr<2>(); + + auto middlehighbyte = perm & uint32_t(0x003f0000); + // correct for spurious high bit + + const auto correct = (perm & uint32_t(0x00400000)).shr<1>(); + middlehighbyte = correct ^ middlehighbyte; + const auto middlehighbyte_shifted = middlehighbyte.shr<4>(); + // We deliberately carry the leading four bits in highbyte if they are + // present, we remove them later when computing hightenbits. + const auto highbyte = perm & uint32_t(0xff000000); + const auto highbyte_shifted = highbyte.shr<6>(); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. + const auto composed = + ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted; + + const auto composedminus = composed - uint32_t(0x10000); + const auto lowtenbits = composedminus & uint32_t(0x3ff); + // Notice the 0x3ff mask: + const auto hightenbits = composedminus.shr<10>() & uint32_t(0x3ff); + const auto lowtenbitsadd = lowtenbits + uint32_t(0xDC00); + const auto hightenbitsadd = hightenbits + uint32_t(0xD800); + const auto lowtenbitsaddshifted = lowtenbitsadd.shl<16>(); + auto surrogates = hightenbitsadd | lowtenbitsaddshifted; + + uint32_t basic_buffer[4]; + composed.store(basic_buffer); + uint32_t surrogate_buffer[4]; + surrogates.swap_bytes().store(surrogate_buffer); + + for (size_t i = 0; i < 3; i++) { + if (basic_buffer[i] > 0x3c00000) { + const auto ch0 = uint16_t(surrogate_buffer[i] & 0xffff); + const auto ch1 = uint16_t(surrogate_buffer[i] >> 16); + if (match_system(big_endian)) { + utf16_output[1] = scalar::u16_swap_bytes(ch0); + utf16_output[0] = scalar::u16_swap_bytes(ch1); + } else { + utf16_output[1] = ch0; + utf16_output[0] = ch1; + } + utf16_output += 2; + } else { + const auto chr = uint16_t(basic_buffer[i]); + if (match_system(big_endian)) { + utf16_output[0] = chr; + } else { + utf16_output[0] = scalar::u16_swap_bytes(chr); + } + + utf16_output++; + } + } + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/ppc64/ppc64_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_utf8_to_utf32.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const auto in = vector_u8::load(input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + in.store_bytes_as_utf32(utf32_output); + utf32_output += 12; // We wrote 12 32-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte + // UTF-32 code units. +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = as_vector_u16(in); +#else + const auto perm = as_vector_u16(in).swap_bytes(); +#endif // SIMDUTF_IS_BIG_ENDIAN + // in = [110aaaaa|10bbbbbb] + // t0 = [00000000|00bbbbbb] + const auto t0 = perm & uint16_t(0x007f); + + // t1 = [00110aaa|aabbbbbb] + const auto t1 = perm.shr<2>(); + const auto composed = select(uint16_t(0x1f00 >> 2), t1, t0); + + const auto composed8 = as_vector_u8(composed); + composed8.store_words_as_utf32(utf32_output); + + utf32_output += 8; // We wrote 32 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. +#if SIMDUTF_IS_BIG_ENDIAN + const auto sh = + vector_u8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11); +#else + const auto sh = + vector_u8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); + + // in = [1110aaaa|10bbbbbb|10cccccc] + + // t0 = [00000000|00000000|00cccccc] + const auto t0 = perm & uint32_t(0x0000007f); + + // t2 = [00000000|0000bbbb|bbcccccc] + const auto t1 = perm.shr<2>(); + const auto t2 = select(uint32_t(0x00003f00 >> 2), t1, t0); + + // t4 = [00000000|aaaabbbb|bbcccccc] + const auto t3 = perm.shr<4>(); + const auto t4 = select(uint32_t(0x0f0000 >> 4), t3, t2); + + t4.store(utf32_output); + utf32_output += 4; + return 12; + } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint16_t(0x7f); + const auto highbyte = perm & uint16_t(0x1f00); + const auto composed = ascii | highbyte.shr<2>(); + + as_vector_u8(composed).store_words_as_utf32(utf32_output); + utf32_output += 6; // We wrote 12 bytes, 6 code points. + } else if (idx < 145) { + // FOUR (4) input code-code units + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint32_t(0x7f); + const auto middlebyte = perm & uint32_t(0x3f00); + const auto middlebyte_shifted = middlebyte.shr<2>(); + const auto highbyte = perm & uint32_t(0x0f0000); + const auto highbyte_shifted = highbyte.shr<4>(); + const auto composed = ascii | middlebyte_shifted | highbyte_shifted; + + composed.store(utf32_output); + utf32_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint32_t(0x0000007f); + const auto middlebyte = perm & uint32_t(0x3f00); + const auto middlebyte_shifted = middlebyte.shr<2>(); + auto middlehighbyte = perm & uint32_t(0x003f0000); + // correct for spurious high bit + const auto correct0 = perm & uint32_t(0x00400000); + const auto correct = correct0.shr<1>(); + middlehighbyte = correct ^ middlehighbyte; + const auto middlehighbyte_shifted = middlehighbyte.shr<4>(); + const auto highbyte = perm & uint32_t(0x07000000); + const auto highbyte_shifted = highbyte.shr<6>(); + const auto composed = + ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted; + composed.store(utf32_output); + utf32_output += 3; + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/ppc64/ppc64_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_convert_utf16_to_latin1.cpp */ +struct utf16_to_latin1_t { + error_code err; + const char16_t *input; + char *output; +}; + +template +utf16_to_latin1_t ppc64_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 8) { + // Load 8 x UTF-16 characters + auto in = vector_u8::load(buf); + + // Move low bytes of UTF-16 chars to lower half of `in` + // and upper bytes to upper half of `in`. + if (!match_system(big_endian)) { + const auto perm = + vector_u8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + in = perm.lookup_16(in); + } else { + const auto perm = + vector_u8(1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14); + in = perm.lookup_16(in); + } + + // AltiVec-specific +#if defined(__clang__) + __attribute__((aligned(16))) uint64_t tmp[8]; + in.store(tmp); + #if SIMDUTF_IS_BIG_ENDIAN + memcpy(latin1_output, &tmp[0], 8); + const uint64_t upper = tmp[1]; + #else + memcpy(latin1_output, &tmp[1], 8); + const uint64_t upper = tmp[0]; + #endif // SIMDUTF_IS_BIG_ENDIAN +#else + const auto tmp = vec_u64_t(in.value); + #if SIMDUTF_IS_BIG_ENDIAN + memcpy(latin1_output, &tmp[0], 8); + const uint64_t upper = tmp[1]; + #else + memcpy(latin1_output, &tmp[1], 8); + const uint64_t upper = tmp[0]; + #endif // SIMDUTF_IS_BIG_ENDIAN +#endif // defined(__clang__) + // AltiVec + + if (simdutf_unlikely(upper)) { + uint8_t bytes[8]; + memcpy(bytes, &upper, 8); + for (size_t k = 0; k < 8; k++) { + if (bytes[k] != 0) { + return utf16_to_latin1_t{error_code::TOO_LARGE, buf + k, + latin1_output}; + } + } + } else { + // Adjust pointers for next iteration + buf += 8; + latin1_output += 8; + } + } // while + + return utf16_to_latin1_t{error_code::SUCCESS, buf, latin1_output}; +} +/* end file src/ppc64/ppc64_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +/* begin file src/ppc64/ppc64_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ + +// Auxiliary procedure used by UTF-16 and UTF-32 into UTF-8. +// Note the pointer is passed by reference, it is updated by the procedure. +template +simdutf_really_inline void ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( + const vector_u16 in, uint16_t one_byte_bitmask, + const T one_or_two_bytes_bytemask, uint16_t one_or_two_bytes_bitmask, + char *&utf8_output) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes +#if SIMDUTF_IS_BIG_ENDIAN + const auto dup_lsb = + vector_u8(1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); +#else + const auto dup_lsb = + vector_u8(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); +#endif // SIMDUTF_IS_BIG_ENDIAN + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const auto t0 = as_vector_u16(dup_lsb.lookup_16(as_vector_u8(in))); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const auto t1 = t0 & uint16_t(0b0011111101111111); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const auto t2 = t1 | uint16_t(0b1000000000000000); + + // in = [aaaa|bbbb|bbcc|cccc] + // a0 = [0000|0000|0000|aaaa] + const auto a0 = in.shr<12>(); + // b0 = [aabb|bbbb|cccc|cc00] + const auto b0 = in.shl<2>(); + // s0 = [00bb|bbbb|00cc|cccc] + const auto s0 = select(uint16_t(0x3f00), b0, a0); + + // s3 = [11bb|bbbb|1110|aaaa] + const auto s3 = s0 | uint16_t(0b1100000011100000); + + const auto m0 = + ~as_vector_u16(one_or_two_bytes_bytemask) & uint16_t(0b0100000000000000); + const auto s4 = s3 ^ m0; + + // 4. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { + // We only have three-byte code units. Use fast path. +#if SIMDUTF_IS_BIG_ENDIAN + // Lookups produced by scripts/ppc64_convert_utf16_to_utf8.py + const auto shuffle0 = + vector_u8(1, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 9, 8, 24, 11); + const auto shuffle1 = vector_u8(10, 26, 13, 12, 28, 15, 14, 30, -1, -1, -1, + -1, -1, -1, -1, -1); +#else + const auto shuffle0 = + vector_u8(0, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 8, 9, 25, 10); + const auto shuffle1 = vector_u8(11, 27, 12, 13, 29, 14, 15, 31, -1, -1, -1, + -1, -1, -1, -1, -1); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto utf8_0 = shuffle0.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + const auto utf8_1 = shuffle1.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + + utf8_0.store(utf8_output); + utf8_output += 16; + utf8_1.store(utf8_output); + utf8_output += 8; + return; + } + + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::ppc64_utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const auto shuffle0 = vector_u8::load(row0 + 1); + + const auto utf8_0 = shuffle0.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + const uint8_t mask1 = static_cast(mask >> 8); + + const uint8_t *row1 = + &simdutf::tables::ppc64_utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const auto shuffle1 = vector_u8::load(row1 + 1) + uint8_t(8); + const auto utf8_1 = shuffle1.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + + utf8_0.store(utf8_output); + utf8_output += row0[0]; + utf8_1.store(utf8_output); + utf8_output += row1[0]; +} + +struct utf16_to_utf8_t { + error_code err; + const char16_t *input; + char *output; +}; + +/* + Returns utf16_to_utf8_t value + A scalar routine should carry on the conversion of the tail, + iff there was no error. +*/ +template +utf16_to_utf8_t ppc64_convert_utf16_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) { + + const char16_t *end = buf + len; + + const auto v_f800 = vector_u16(0xf800); + const auto v_d800 = vector_u16(0xd800); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + auto in = vector_u16::load(buf); + if (not match_system(big_endian)) { + in = in.swap_bytes(); + } + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + if (in.is_ascii()) { + auto nextin = vector_u16::load(buf + vector_u16::ELEMENTS); + if (not match_system(big_endian)) { + nextin = nextin.swap_bytes(); + } + + if (nextin.is_ascii()) { + // 1. pack the bytes + const auto utf8_packed = vector_u16::pack(in, nextin); + // 2. store (16 bytes) + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + // next block is not ASCII + const auto utf8_packed = vector_u16::pack(in, in); + // 2. store (16 bytes) + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + // fallback + } + + // no bits set above 7th bit + const auto one_byte_bytemask = in < uint16_t(1 << 7); + const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); + + // no bits set above 11th bit + const auto one_or_two_bytes_bytemask = in < uint16_t(1 << 11); + const uint16_t one_or_two_bytes_bitmask = + one_or_two_bytes_bytemask.to_bitmask(); + + if (one_or_two_bytes_bitmask == 0xffff) { + write_v_u16_11bits_to_utf8( + in, utf8_output, as_vector_u8(one_byte_bytemask), one_byte_bitmask); + buf += 8; + continue; + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also to deal with situation when there is a surrogate word + // at the end of a chunk. + const auto surrogates_bytemask = (in & v_f800) == v_d800; + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint16_t surrogates_bitmask = surrogates_bytemask.to_bitmask(); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( + in, one_byte_bitmask, one_or_two_bytes_bytemask, + one_or_two_bytes_bitmask, utf8_output); + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = not match_system(big_endian) + ? scalar::u16_swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xFF80) == 0) { + *utf8_output++ = uint8_t(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = uint8_t((word >> 6) | 0b11000000); + *utf8_output++ = uint8_t((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = uint8_t((word >> 12) | 0b11100000); + *utf8_output++ = uint8_t(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = uint8_t((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = not match_system(big_endian) + ? scalar::u16_swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return utf16_to_utf8_t{error_code::SURROGATE, buf + k - 1, + utf8_output}; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = uint8_t((value >> 18) | 0b11110000); + *utf8_output++ = uint8_t(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = uint8_t(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = uint8_t((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return utf16_to_utf8_t{error_code::SUCCESS, buf, utf8_output}; +} +/* end file src/ppc64/ppc64_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_utf16_to_utf32.cpp */ +struct utf16_to_utf32_t { + error_code err; // error code + const char16_t *input; // last position in input buffer + char32_t *output; // last position in output buffer +}; + +template +utf16_to_utf32_t ppc64_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *end = buf + len; + + const auto v_f800 = vector_u16::splat(0xf800); + const auto v_d800 = vector_u16::splat(0xd800); + const auto zero = vector_u8::zero(); + + while (end - buf >= vector_u16::ELEMENTS) { + auto in = vector_u16::load(buf); + if (not match_system(big_endian)) { + in = in.swap_bytes(); + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const auto surrogates_bytemask = (in & v_f800) == v_d800; + + // bitmask = 0x0000 if there are no surrogates + const uint16_t surrogates_bitmask = surrogates_bytemask.to_bitmask(); + + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + // case: no surrogate pairs, extend 16-bit code units to 32-bit code units +#if SIMDUTF_IS_BIG_ENDIAN + const auto lo = + vector_u8(16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7); + const auto hi = vector_u8(16, 16, 8 + 0, 8 + 1, 16, 16, 8 + 2, 8 + 3, 16, + 16, 8 + 4, 8 + 5, 16, 16, 8 + 6, 8 + 7); +#else + const auto lo = + vector_u8(0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7, 16, 16); + const auto hi = vector_u8(8 + 0, 8 + 1, 16, 16, 8 + 2, 8 + 3, 16, 16, + 8 + 4, 8 + 5, 16, 16, 8 + 6, 8 + 7, 16, 16); +#endif // SIMDUTF_IS_BIG_ENDIAN + + const auto utf32_0 = lo.lookup_32(as_vector_u8(in), zero); + const auto utf32_1 = hi.lookup_32(as_vector_u8(in), zero); + + utf32_0.store(utf32_output); + utf32_1.store(utf32_output + 4); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + const uint16_t word = not match_system(big_endian) + ? scalar::u16_swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = not match_system(big_endian) + ? scalar::u16_swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return utf16_to_utf32_t{error_code::SURROGATE, buf + k - 1, + utf32_output}; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + + return utf16_to_utf32_t{error_code::SUCCESS, buf, utf32_output}; +} +/* end file src/ppc64/ppc64_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_convert_utf32_to_latin1.cpp */ +enum class ErrorChecking { disabled, enabled }; + +struct utf32_to_latin1_t { + error_code err; + const char32_t *input; + char *output; +}; + +template +utf32_to_latin1_t simdutf_really_inline ppc64_convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) { + constexpr size_t N = vector_u32::ELEMENTS; + const size_t rounded_len = align_down<4 * N>(len); + + const auto high_bytes_mask = vector_u32::splat(0xFFFFFF00); + + for (size_t i = 0; i < rounded_len; i += 4 * N) { + const auto in1 = vector_u32::load(buf + 0 * N); + const auto in2 = vector_u32::load(buf + 1 * N); + const auto in3 = vector_u32::load(buf + 2 * N); + const auto in4 = vector_u32::load(buf + 3 * N); + + if (ec == ErrorChecking::enabled) { + const auto combined = in1 | in2 | in3 | in4; + const auto too_big = (combined & high_bytes_mask) != uint32_t(0); + + if (simdutf_unlikely(too_big.any())) { + // Scalar code will carry on from the beginning of the current block + // and report the exact error position. + return utf32_to_latin1_t{error_code::OTHER, buf, latin1_output}; + } + } + + // Note: element #1 contains 0, and is used to mask-out elements +#if SIMDUTF_IS_BIG_ENDIAN + const auto shlo = vector_u8(0 + 3, 4 + 3, 8 + 3, 12 + 3, 16 + 3, 20 + 3, + 24 + 3, 28 + 3, 1, 1, 1, 1, 1, 1, 1, 1); + const auto shhi = vector_u8(1, 1, 1, 1, 1, 1, 1, 1, 0 + 3, 4 + 3, 8 + 3, + 12 + 3, 16 + 3, 20 + 3, 24 + 3, 28 + 3); +#else + const auto shlo = + vector_u8(0, 4, 8, 12, 16, 20, 24, 28, 1, 1, 1, 1, 1, 1, 1, 1); + const auto shhi = + vector_u8(1, 1, 1, 1, 1, 1, 1, 1, 0, 4, 8, 12, 16, 20, 24, 28); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto lo = shlo.lookup_32(as_vector_u8(in1), as_vector_u8(in2)); + const auto hi = shhi.lookup_32(as_vector_u8(in3), as_vector_u8(in4)); + + const auto merged = lo | hi; + + merged.store(latin1_output); + latin1_output += 4 * N; + buf += 4 * N; + } + + return utf32_to_latin1_t{error_code::SUCCESS, buf, latin1_output}; +} +/* end file src/ppc64/ppc64_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_convert_utf32_to_utf16.cpp */ +struct utf32_to_utf16_t { + error_code err; + const char32_t *input; + char16_t *output; +}; + +template +utf32_to_utf16_t ppc64_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { + + const char32_t *end = buf + len; + + const auto zero = vector_u32::zero(); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + + auto forbidden_global = simd16(); + + while (end - buf >= 8) { + const auto in0 = vector_u32::load(buf); + const auto in1 = vector_u32::load(buf + vector_u32::ELEMENTS); + + const auto any_surrogate = ((in0 | in1) & v_ffff0000) != zero; + + // Check if no bits set above 15th + if (any_surrogate.is_zero()) { + // Pack UTF-32 to UTF-16 +#if SIMDUTF_IS_BIG_ENDIAN + const auto sh = big_endian ? vector_u8(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, + 22, 23, 26, 27, 30, 31) + : vector_u8(3, 2, 7, 6, 11, 10, 15, 14, 19, 18, + 23, 22, 27, 26, 31, 30); +#else + const auto sh = big_endian ? vector_u8(1, 0, 5, 4, 9, 8, 13, 12, 17, 16, + 21, 20, 25, 24, 29, 28) + : vector_u8(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, + 20, 21, 24, 25, 28, 29); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto packed0 = sh.lookup_32(as_vector_u8(in0), as_vector_u8(in1)); + const auto packed = as_vector_u16(packed0); + +#if SIMDUTF_IS_BIG_ENDIAN + const auto v_f800 = + big_endian ? vector_u16::splat(0xf800) : vector_u16::splat(0x00f8); + const auto v_d800 = + big_endian ? vector_u16::splat(0xd800) : vector_u16::splat(0x00d8); +#else + const auto v_f800 = + big_endian ? vector_u16::splat(0x00f8) : vector_u16::splat(0xf800); + const auto v_d800 = + big_endian ? vector_u16::splat(0x00d8) : vector_u16::splat(0xd800); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto forbidden = (packed & v_f800) == v_d800; + + switch (er) { + case ErrorReporting::precise: + if (not forbidden.is_zero()) { + // scalar procedure will rescan the portion of buffer we've just + // analysed + return utf32_to_utf16_t{error_code::OTHER, buf, utf16_output}; + } + break; + case ErrorReporting::at_the_end: + forbidden_global |= forbidden; + break; + case ErrorReporting::none: + break; + } + + packed.store(utf16_output); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return utf32_to_utf16_t{error_code::SURROGATE, buf + k, + utf16_output}; + } + *utf16_output++ = not match_system(big_endian) + ? scalar::u16_swap_bytes(uint16_t(word)) + : uint16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return utf32_to_utf16_t{error_code::TOO_LARGE, buf + k, + utf16_output}; + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (not match_system(big_endian)) { + high_surrogate = scalar::u16_swap_bytes(high_surrogate); + low_surrogate = scalar::u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + if (er == ErrorReporting::at_the_end) { + // check for invalid input + if (not forbidden_global.is_zero()) { + return utf32_to_utf16_t{error_code::SURROGATE, buf, utf16_output}; + } + } + + return utf32_to_utf16_t{error_code::SUCCESS, buf, utf16_output}; +} +/* end file src/ppc64/ppc64_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_utf32_to_utf8.cpp */ +struct utf32_to_utf8_t { + error_code err; + const char32_t *input; + char *output; +}; + +template +utf32_to_utf8_t ppc64_convert_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + + const auto v_f800 = vector_u16::splat(0xf800); + const auto v_d800 = vector_u16::splat(0xd800); + + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto v_00000000 = vector_u32::zero(); + auto forbidden_bytemask = simd16(); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= + std::ptrdiff_t( + 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t + // has 4 bytes or 32 bits, thus buf + 16 * + // char_32t = 512 bits = 64 bytes + // We load two 16 bytes registers for a total of 32 bytes or 16 characters. + // These two values can hold only 8 UTF32 chars + auto in0 = vector_u32::load(buf); + auto in1 = vector_u32::load(buf + vector_u32::ELEMENTS); + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + auto in = vector_u32::pack(in0, in1); + + // Try to apply UTF-16 => UTF-8 from ./ppc64_convert_utf16_to_utf8.cpp + + // Check for ASCII fast path + + // ASCII fast path!!!! + // We eagerly load another 32 bytes, hoping that they will be ASCII too. + // The intuition is that we try to collect 16 ASCII characters which + // requires a total of 64 bytes of input. If we fail, we just pass thirdin + // and fourthin as our new inputs. + if (in.is_ascii()) { // if the first two blocks are ASCII + const auto in2 = vector_u32::load(buf + 2 * vector_u32::ELEMENTS); + const auto in3 = vector_u32::load(buf + 3 * vector_u32::ELEMENTS); + + const auto next = vector_u32::pack(in2, in3); + if (next.is_ascii()) { + // 1. pack the bytes + const auto utf8_packed = vector_u16::pack(in, next); + // 2. store (16 bytes) + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + // `next` is not ASCII, write `in` and carry on with next + + // 1. pack the bytes + const auto utf8_packed = vector_u16::pack(in, in); + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + + // Proceed with next input + in = next; + in0 = in2; + in1 = in3; + } + + // no bits set above 7th bit + const auto one_byte_bytemask = in < uint16_t(1 << 7); + const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); + + // no bits set above 11th bit + const auto one_or_two_bytes_bytemask = in < uint16_t(1 << 11); + const uint16_t one_or_two_bytes_bitmask = + one_or_two_bytes_bytemask.to_bitmask(); + + if (one_or_two_bytes_bitmask == 0xffff) { + write_v_u16_11bits_to_utf8( + in, utf8_output, as_vector_u8(one_byte_bytemask), one_byte_bitmask); + buf += 8; + continue; + } + + // Check for overflow in packing + const auto saturation_bytemask = ((in0 | in1) & v_ffff0000) == v_00000000; + const uint16_t saturation_bitmask = saturation_bytemask.to_bitmask(); + if (saturation_bitmask == 0xffff) { + switch (er) { + case ErrorReporting::precise: { + const auto forbidden = (in & v_f800) == v_d800; + if (forbidden.any()) { + // We return no error code, instead we force the scalar procedure + // to rescan the portion of input where we've just found an error. + return utf32_to_utf8_t{error_code::SUCCESS, buf, utf8_output}; + } + } break; + case ErrorReporting::at_the_end: + forbidden_bytemask |= (in & v_f800) == v_d800; + break; + case ErrorReporting::none: + break; + } + + ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( + in, one_byte_bitmask, one_or_two_bytes_bytemask, + one_or_two_bytes_bitmask, utf8_output); + buf += 8; + } else { + // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD in the + // presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (er != ErrorReporting::none and + (word >= 0xD800 && word <= 0xDFFF)) { + return utf32_to_utf8_t{error_code::SURROGATE, buf + k, utf8_output}; + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (er != ErrorReporting::none and (word > 0x10FFFF)) { + return utf32_to_utf8_t{error_code::TOO_LARGE, buf + k, utf8_output}; + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + if (er == ErrorReporting::at_the_end) { + if (forbidden_bytemask.any()) { + return utf32_to_utf8_t{error_code::SURROGATE, buf, utf8_output}; + } + } + + return utf32_to_utf8_t{ + error_code::SUCCESS, + buf, + utf8_output, + }; +} +/* end file src/ppc64/ppc64_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_utf8_length_from_latin1.cpp */ +template T min(T a, T b) { return a <= b ? a : b; } + +std::pair ppc64_utf8_length_from_latin1(const char *input, + size_t length) { + constexpr size_t N = vector_u8::ELEMENTS; + length = (length / N); + + size_t count = length * N; + while (length != 0) { + vector_u32 partial = vector_u32::zero(); + + // partial accumulator has 32 bits => this yields (2^31 / 16) + size_t chunk = min(length, size_t(0xffffffff / N)); + length -= chunk; + while (chunk != 0) { + auto local = vector_u8::zero(); + // local accumulator has 8 bits => this yields 255 max (we increment by 1 + // in each iteration) + const size_t n = min(chunk, size_t(255)); + chunk -= n; + for (size_t i = 0; i < n; i++) { + const auto in = vector_i8::load(input); + input += N; + + local -= as_vector_u8(in < vector_i8::splat(0)); + } + + partial = sum4bytes(local, partial); + } + + for (int i = 0; i < vector_u32::ELEMENTS; i++) { + count += size_t(partial.value[i]); + } + } + + return std::make_pair(input, count); +} +/* end file src/ppc64/ppc64_utf8_length_from_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/ppc64/ppc64_base64.cpp */ +/* + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + * + * AMD XOP specific: http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html + * Altivec has capabilites of AMD XOP (or vice versa): shuffle using 2 vectors + * and variable shifts, thus this implementation shares some code solution + * (modulo intrisic function names). + */ + +constexpr bool with_base64_std = false; +constexpr bool with_base64_url = true; +constexpr bool with_ignore_errors = true; +constexpr bool with_ignore_garbage = true; +constexpr bool with_strict_checking = false; + +// --- encoding ----------------------------------------------- + +/* + Procedure translates vector of bytes having 6-bit values + into ASCII counterparts. +*/ +template +vector_u8 encoding_translate_6bit_values(const vector_u8 input) { + // credit: Wojciech Muła + // reduce 0..51 -> 0 + // 52..61 -> 1 .. 10 + // 62 -> 11 + // 63 -> 12 + auto result = input.saturating_sub(vector_u8::splat(51)); + + // distinguish between ranges 0..25 and 26..51: + // 0 .. 25 -> remains 13 + // 26 .. 51 -> becomes 0 + const auto lt = input < vector_u8::splat(26); + result = select(as_vector_u8(lt), vector_u8::splat(13), result); + + const auto shift_LUT = + base64_url ? vector_u8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0) + : vector_u8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); + // read shift + result = result.lookup_16(shift_LUT); + + return input + result; +} + +/* + Procedure expands 12 bytes (4*3 bytes) into 16 bytes, + each byte stores 6 bits of data +*/ +template +simdutf_really_inline vector_u8 encoding_expand_6bit_fields(vector_u8 input) { +#if SIMDUTF_IS_BIG_ENDIAN + #define indices4(dx) (dx + 0), (dx + 1), (dx + 1), (dx + 2) + const auto expand_3_to_4 = vector_u8(indices4(0 * 3), indices4(1 * 3), + indices4(2 * 3), indices4(3 * 3)); + #undef indices4 + + // input = [........|ccdddddd|bbbbcccc|aaaaaabb] as uint8_t + // 3 2 1 0 + // + // in' = [aaaaaabb|bbbbcccc|bbbbcccc|ccdddddd] as uint32_t + // 0 1 1 2 + const auto in = as_vector_u32(expand_3_to_4.lookup_16(input)); + + // t0 = [00000000|00000000|00000000|00dddddd] + const auto t0 = in & uint32_t(0x0000003f); + + // t1 = [00000000|00000000|00cccccc|00dddddd] + const auto t1 = select(uint32_t(0x00003f00), in.shl<2>(), t0); + + // t2 = [00000000|00bbbbbb|00cccccc|00dddddd] + const auto t2 = select(uint32_t(0x003f0000), in.shr<4>(), t1); + + // t3 = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + const auto t3 = select(uint32_t(0x3f000000), in.shr<2>(), t2); + + return as_vector_u8(t3); +#else + #define indices4(dx) (dx + 1), (dx + 0), (dx + 2), (dx + 1) + const auto expand_3_to_4 = vector_u8(indices4(0 * 3), indices4(1 * 3), + indices4(2 * 3), indices4(3 * 3)); + #undef indices4 + + // input = [........|ccdddddd|bbbbcccc|aaaaaabb] as uint8_t + // 3 2 1 0 + // + // in' = [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] as uint32_t + // 1 2 0 1 + const auto in = as_vector_u32(expand_3_to_4.lookup_16(input)); + + // t0 = [00dddddd|00000000|00000000|00000000] + const auto t0 = in.shl<8>() & uint32_t(0x3f000000); + + // t1 = [00dddddd|00cccccc|00000000|00000000] + const auto t1 = select(uint32_t(0x003f0000), in.shr<6>(), t0); + + // t2 = [00dddddd|00cccccc|00bbbbbb|00000000] + const auto t2 = select(uint32_t(0x00003f00), in.shl<4>(), t1); + + // t3 = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + const auto t3 = select(uint32_t(0x0000003f), in.shr<10>(), t2); + + return as_vector_u8(t3); +#endif // SIMDUTF_IS_BIG_ENDIAN +} + +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + + const uint8_t *input = (const uint8_t *)src; + + uint8_t *out = (uint8_t *)dst; + + size_t i = 0; + for (; i + 52 <= srclen; i += 48) { + const auto in0 = vector_u8::load(input + i + 12 * 0); + const auto in1 = vector_u8::load(input + i + 12 * 1); + const auto in2 = vector_u8::load(input + i + 12 * 2); + const auto in3 = vector_u8::load(input + i + 12 * 3); + + const auto expanded0 = encoding_expand_6bit_fields(in0); + const auto expanded1 = encoding_expand_6bit_fields(in1); + const auto expanded2 = encoding_expand_6bit_fields(in2); + const auto expanded3 = encoding_expand_6bit_fields(in3); + + const auto base64_0 = + encoding_translate_6bit_values(expanded0); + const auto base64_1 = + encoding_translate_6bit_values(expanded1); + const auto base64_2 = + encoding_translate_6bit_values(expanded2); + const auto base64_3 = + encoding_translate_6bit_values(expanded3); + + base64_0.store(out); + out += 16; + + base64_1.store(out); + out += 16; + + base64_2.store(out); + out += 16; + + base64_3.store(out); + out += 16; + } + for (; i + 16 <= srclen; i += 12) { + const auto in = vector_u8::load(input + i); + const auto expanded = encoding_expand_6bit_fields(in); + const auto base64 = encoding_translate_6bit_values(expanded); + + base64.store(out); + out += 16; + } + + return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, + srclen - i, options); +} + +// --- decoding ----------------------------------------------- + +static simdutf_really_inline void compress(const vector_u8 data, uint16_t mask, + char *output) { + if (mask == 0) { + data.store(output); + return; + } + + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + +#if SIMDUTF_IS_BIG_ENDIAN + vec_u64_t tmp = { + tables::base64::thintable_epi8[mask2], + tables::base64::thintable_epi8[mask1], + }; + + auto shufmask = vector_u8(vec_reve(vec_u8_t(tmp))); + + // we increment by 0x08 the second half of the mask + shufmask = + shufmask + vector_u8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); +#else + vec_u64_t tmp = { + tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2], + }; + + auto shufmask = vector_u8(vec_u8_t(tmp)); + + // we increment by 0x08 the second half of the mask + shufmask = + shufmask + vector_u8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); +#endif // SIMDUTF_IS_BIG_ENDIAN + + // this is the version "nearly pruned" + const auto pruned = shufmask.lookup_16(data); + // we still need to put the two halves together. + // we compute the popcount of the first half: + const int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + const auto compactmask = + vector_u8::load(tables::base64::pshufb_combine_table + pop1 * 8); + + const auto answer = compactmask.lookup_16(pruned); + + answer.store(output); +} + +static simdutf_really_inline vector_u8 decoding_pack(vector_u8 input) { +#if SIMDUTF_IS_BIG_ENDIAN + // in = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + // want = [00000000|aaaaaabb|bbbbcccc|ccdddddd] + + auto in = as_vector_u16(input); + // t0 = [00??aaaa|aabbbbbb|00??cccc|ccdddddd] + const auto t0 = in.shr<2>(); + const auto t1 = select(uint16_t(0x0fc0), t0, in); + + // t0 = [00??????|aaaaaabb|bbbbcccc|ccdddddd] + const auto t2 = as_vector_u32(t1); + const auto t3 = t2.shr<4>(); + const auto t4 = select(uint32_t(0x00fff000), t3, t2); + + const auto tmp = as_vector_u8(t4); + + const auto shuffle = + vector_u8(1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 0, 0, 0, 0); + + const auto t = shuffle.lookup_16(tmp); + + return t; +#else + // in = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + // want = [00000000|aaaaaabb|bbbbcccc|ccdddddd] + + auto u = as_vector_u32(input).swap_bytes(); + + auto in = vector_u16((vec_u16_t)u.value); + // t0 = [00??aaaa|aabbbbbb|00??cccc|ccdddddd] + const auto t0 = in.shr<2>(); + const auto t1 = select(uint16_t(0x0fc0), t0, in); + + // t0 = [00??????|aaaaaabb|bbbbcccc|ccdddddd] + const auto t2 = as_vector_u32(t1); + const auto t3 = t2.shr<4>(); + const auto t4 = select(uint32_t(0x00fff000), t3, t2); + + const auto tmp = as_vector_u8(t4); + + const auto shuffle = + vector_u8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 0, 0, 0, 0); + + const auto t = shuffle.lookup_16(tmp); + + return t; +#endif // SIMDUTF_IS_BIG_ENDIAN +} +static simdutf_really_inline void base64_decode(char *out, vector_u8 input) { + const auto expanded = decoding_pack(input); + expanded.store(out); +} + +static simdutf_really_inline void base64_decode_block(char *out, + const char *src) { + base64_decode(out + 12 * 0, vector_u8::load(src + 0 * 16)); + base64_decode(out + 12 * 1, vector_u8::load(src + 1 * 16)); + base64_decode(out + 12 * 2, vector_u8::load(src + 2 * 16)); + base64_decode(out + 12 * 3, vector_u8::load(src + 3 * 16)); +} + +static simdutf_really_inline void base64_decode_block_safe(char *out, + const char *src) { + base64_decode(out + 12 * 0, vector_u8::load(src + 0 * 16)); + base64_decode(out + 12 * 1, vector_u8::load(src + 1 * 16)); + base64_decode(out + 12 * 2, vector_u8::load(src + 2 * 16)); + + char buffer[16]; + base64_decode(buffer, vector_u8::load(src + 3 * 16)); + std::memcpy(out + 36, buffer, 12); +} + +// ---base64 decoding::block64 class -------------------------- + +class block64 { + simd8x64 b; + +public: + simdutf_really_inline block64(const char *src) : b(load_block(src)) {} + simdutf_really_inline block64(const char16_t *src) : b(load_block(src)) {} + +private: + // The caller of this function is responsible to ensure that there are 64 + // bytes available from reading at src. The data is read into a block64 + // structure. + static simdutf_really_inline simd8x64 load_block(const char *src) { + const auto v0 = vector_u8::load(src + 16 * 0); + const auto v1 = vector_u8::load(src + 16 * 1); + const auto v2 = vector_u8::load(src + 16 * 2); + const auto v3 = vector_u8::load(src + 16 * 3); + + return simd8x64(v0, v1, v2, v3); + } + + // The caller of this function is responsible to ensure that there are 128 + // bytes available from reading at src. The data is read into a block64 + // structure. + static simdutf_really_inline simd8x64 + load_block(const char16_t *src) { + const auto m1 = vector_u16::load(src + 8 * 0); + const auto m2 = vector_u16::load(src + 8 * 1); + const auto m3 = vector_u16::load(src + 8 * 2); + const auto m4 = vector_u16::load(src + 8 * 3); + const auto m5 = vector_u16::load(src + 8 * 4); + const auto m6 = vector_u16::load(src + 8 * 5); + const auto m7 = vector_u16::load(src + 8 * 6); + const auto m8 = vector_u16::load(src + 8 * 7); + + return simd8x64(vector_u16::pack(m1, m2), vector_u16::pack(m3, m4), + vector_u16::pack(m5, m6), + vector_u16::pack(m7, m8)); + } + +public: + template + static inline uint16_t to_base64_mask(vector_u8 &src, uint16_t &error) { + const auto ascii_space_tbl = + vector_u8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, + 0xc, 0xd, 0x0, 0x0); + + // credit: aqrit + const auto delta_asso = + base64_url ? vector_u8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xF, 0x0, 0xF) + : vector_u8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); + + const auto delta_values = + base64_url ? vector_u8(0x0, 0x0, 0x0, 0x13, 0x4, 0xBF, 0xBF, 0xB9, 0xB9, + 0x0, 0x11, 0xC3, 0xBF, 0xE0, 0xB9, 0xB9) + : vector_u8(0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9, + 0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9); + + const auto check_asso = + base64_url ? vector_u8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x3, 0x7, 0xB, 0xE, 0xB, 0x6) + : vector_u8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); + + const auto check_values = + base64_url ? vector_u8(0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xB6, 0xA6, + 0xB5, 0xA1, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80) + : vector_u8(0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6, + 0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80); + + const auto shifted = src.shr<3>(); + + const auto delta_hash = avg(src.lookup_16(delta_asso), shifted); + const auto check_hash = avg(src.lookup_16(check_asso), shifted); + + const auto out = as_vector_i8(delta_hash.lookup_16(delta_values)) + .saturating_add(as_vector_i8(src)); + const auto chk = as_vector_i8(check_hash.lookup_16(check_values)) + .saturating_add(as_vector_i8(src)); + + const uint16_t mask = chk.to_bitmask(); + if (!ignore_garbage && mask) { + const auto ascii = src.lookup_16(ascii_space_tbl); + const auto ascii_space = (ascii == src); + error = (mask ^ ascii_space.to_bitmask()); + } + src = out; + + return mask; + } + + template + simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { + uint16_t err0 = 0; + uint16_t err1 = 0; + uint16_t err2 = 0; + uint16_t err3 = 0; + uint64_t m0 = to_base64_mask(b.chunks[0], err0); + uint64_t m1 = to_base64_mask(b.chunks[1], err1); + uint64_t m2 = to_base64_mask(b.chunks[2], err2); + uint64_t m3 = to_base64_mask(b.chunks[3], err3); + + if (!ignore_garbage) { + *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | + ((uint64_t)err3 << 48); + } + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); + } + + simdutf_really_inline void copy_block(char *output) { + b.store(reinterpret_cast(output)); + } + + simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { + uint64_t nmask = ~mask; + compress(b.chunks[0], uint16_t(mask), output); + compress(b.chunks[1], uint16_t(mask >> 16), + output + count_ones(nmask & 0xFFFF)); + compress(b.chunks[2], uint16_t(mask >> 32), + output + count_ones(nmask & 0xFFFFFFFF)); + compress(b.chunks[3], uint16_t(mask >> 48), + output + count_ones(nmask & 0xFFFFFFFFFFFFULL)); + return count_ones(nmask); + } + + simdutf_really_inline void base64_decode_block(char *out) { + base64_decode(out + 12 * 0, b.chunks[0]); + base64_decode(out + 12 * 1, b.chunks[1]); + base64_decode(out + 12 * 2, b.chunks[2]); + base64_decode(out + 12 * 3, b.chunks[3]); + } + + simdutf_really_inline void base64_decode_block_safe(char *out) { + base64_decode(out + 12 * 0, b.chunks[0]); + base64_decode(out + 12 * 1, b.chunks[1]); + base64_decode(out + 12 * 2, b.chunks[2]); + char buffer[16]; + base64_decode(buffer, b.chunks[3]); + std::memcpy(out + 12 * 3, buffer, 12); + } +}; +/* end file src/ppc64/ppc64_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + } // unnamed namespace } // namespace ppc64 } // namespace simdutf +#if SIMDUTF_FEATURE_UTF8 /* begin file src/generic/buf_block_reader.h */ namespace simdutf { namespace ppc64 { @@ -35186,68 +41277,15 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { reinterpret_cast(input), length); } -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} - -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} - -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); - - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} - -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} - } // namespace utf8_validation } // unnamed namespace } // namespace ppc64 } // namespace simdutf /* end file src/generic/utf8_validation/utf8_validator.h */ -// transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ namespace simdutf { namespace ppc64 { namespace { @@ -35582,7 +41620,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ - namespace simdutf { namespace ppc64 { namespace { @@ -35658,9 +41695,10 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace ppc64 } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ namespace simdutf { namespace ppc64 { namespace { @@ -35981,7 +42019,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ - namespace simdutf { namespace ppc64 { namespace { @@ -36025,7 +42062,105 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace ppc64 } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -// other functions +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} +} // namespace utf8 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16.h */ namespace simdutf { namespace ppc64 { @@ -36075,6 +42210,89 @@ simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + template simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, size_t size) { @@ -36101,54 +42319,1166 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace ppc64 } // namespace simdutf /* end file src/generic/utf16.h */ -/* begin file src/generic/utf8.h */ +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf32.h */ +#include namespace simdutf { namespace ppc64 { namespace { -namespace utf8 { +namespace utf32 { -using namespace simd; +template T min(T a, T b) { return a <= b ? a : b; } -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); +size_t utf8_length_from_utf32(const char32_t *input, size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + + const auto one = vector_u32::splat(1); + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use uint32 counters, this is + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); + + input += 4 * N; + } + + counter += acc.sum(); + } } - return count + scalar::utf8::count_code_points(in + pos, size - pos); + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); } -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} -} // namespace utf8 +} // namespace utf32 } // unnamed namespace } // namespace ppc64 } // namespace simdutf -/* end file src/generic/utf8.h */ +/* end file src/generic/utf32.h */ +/* begin file src/generic/validate_utf32.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf32 { +simdutf_really_inline bool validate(const char32_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return true; + } + + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + auto currentmax = vector_u32::zero(); + auto currentoffsetmax = vector_u32::zero(); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + currentmax = max(currentmax, in); + currentoffsetmax = max(currentoffsetmax, in + offset); + input += N; + } + + const auto too_large = currentmax > standardmax; + if (too_large.any()) { + return false; + } + + const auto surrogate = currentoffsetmax > standardoffsetmax; + if (surrogate.any()) { + return false; + } + + return scalar::utf32::validate(input, end - input); +} + +simdutf_really_inline result validate_with_errors(const char32_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return result(error_code::SUCCESS, 0); + } + + const char32_t *start = input; + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + const auto too_large = in > standardmax; + const auto surrogate = (in + offset) > standardoffsetmax; + + const auto combined = too_large | surrogate; + if (simdutf_unlikely(combined.any())) { + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; + } + + input += N; + } + + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; +} + +} // namespace utf32 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/validate_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace ascii_validation { + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + uint8_t blocks[64]{}; + simd::simd8x64 running_or(blocks); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + running_or |= in; + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + running_or |= in; + return running_or.is_ascii(); +} + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +} // namespace ascii_validation +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace ppc64 +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/generic/base64.h */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace base64 { + +/* + The following template function implements API for Base64 decoding. + + An implementation is responsible for providing the `block64` type and + associated methods that perform actual conversion. Please refer + to any vectorized implementation to learn the API of these procedures. +*/ +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + size_t equallocation = + srclen; // location of the first padding character if any + // skip trailing spaces + while (!ignore_garbage && srclen > 0 && + scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + size_t equalsigns = 0; + if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + char *end_of_safe_64byte_zone = + (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; + + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b(src); + src += 64; + uint64_t error = 0; + const uint64_t badcharmask = + b.to_base64_mask(&error); + if (!ignore_garbage && error) { + src -= 64; + const size_t error_offset = trailing_zeroes(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + bufferptr += b.compress_block(badcharmask, bufferptr); + } else if (bufferptr != buffer) { + b.copy_block(bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + b.base64_decode_block_safe(dst); + } else { + b.base64_decode_block(dst); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!ignore_garbage && + (!scalar::base64::is_eight_byte(*src) || val > 64)) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + return r; + } else { + r.output_count += size_t(dst - dstinit); + } + if (!ignore_garbage && last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + r.input_count = equallocation; + } + } + return r; + } + if (!ignore_garbage && equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} + +} // namespace base64 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/base64.h */ +#endif // SIMDUTF_FEATURE_BASE64 + +/* begin file src/ppc64/templates.cpp */ +/* + Template `convert_impl` implements generic conversion routine between + different encodings. Procedure returns the number of written elements, + or zero in the case of error. + + Parameters: + * VectorizedConvert - vectorized procedure that returns structure having + three fields: error_code (err), const Source* (input), Destination* + (output) + * ScalarConvert - scalar procedure that carries on conversion of tail + * Source - type of input char (like char16_t, char) + * Destination - type of input char +*/ +template +size_t convert_impl(VectorizedConvert vectorized_convert, + ScalarConvert scalar_convert, const Source *buf, size_t len, + Destination *output) { + const auto vr = vectorized_convert(buf, len, output); + const size_t consumed = vr.input - buf; + const size_t written = vr.output - output; + if (vr.err != simdutf::error_code::SUCCESS) { + if (vr.err == simdutf::error_code::OTHER) { + // Vectorized procedure detected an error, but does not know + // exact position. The scalar procedure rescan the portion of + // input and figure out where the error is located. + return scalar_convert(vr.input, len - consumed, vr.output); + } + return 0; + } + + if (consumed == len) { + return written; + } + + const auto ret = scalar_convert(vr.input, len - consumed, vr.output); + if (ret == 0) { + return 0; + } + + return written + ret; +} + +/* + Template `convert_with_errors_impl` implements generic conversion routine + between different encodings. Procedure returns a `result` instance --- + please refer to its documentation for details. + + Parameters: + * VectorizedConvert - vectorized procedure that returns structure having + three fields: error_code (err), const Source* (input), Destination* + (output) + * ScalarConvert - scalar procedure that carries on conversion of tail + * Source - type of input char (like char16_t, char) + * Destination - type of input char +*/ +template +simdutf::result convert_with_errors_impl(VectorizedConvert vectorized_convert, + ScalarConvert scalar_convert, + const Source *buf, size_t len, + Destination *output) { + + const auto vr = vectorized_convert(buf, len, output); + const size_t consumed = vr.input - buf; + const size_t written = vr.output - output; + if (vr.err != simdutf::error_code::SUCCESS) { + if (vr.err == simdutf::error_code::OTHER) { + // Vectorized procedure detected an error, but does not know + // exact position. The scalar procedure rescan the portion of + // input and figure out where the error is located. + auto sr = scalar_convert(vr.input, len - consumed, vr.output); + sr.count += consumed; + return sr; + } + return simdutf::result(vr.err, consumed); + } + + if (consumed == len) { + return simdutf::result(simdutf::error_code::SUCCESS, written); + } + + simdutf::result sr = scalar_convert(vr.input, len - consumed, vr.output); + if (sr.is_ok()) { + sr.count += written; + } else { + sr.count += consumed; + } + + return sr; +} +/* end file src/ppc64/templates.cpp */ + +#ifdef SIMDUTF_INTERNAL_TESTS + #if SIMDUTF_FEATURE_BASE64 + #include "ppc64_base64_internal_tests.cpp" + #endif // SIMDUTF_FEATURE_BASE64 +#endif // SIMDUTF_INTERNAL_TESTS // // Implementation-specific overrides // namespace simdutf { namespace ppc64 { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -36157,13 +43487,14 @@ implementation::detect_encodings(const char *input, if (bom_encoding != encoding_type::unspecified) { return bom_encoding; } - // todo: reimplement as a one-pass algorithm. int out = 0; + // todo: reimplement as a one-pass algorithm. if (validate_utf8(input, length)) { out |= encoding_type::UTF8; } if ((length % 2) == 0) { - if (validate_utf16(reinterpret_cast(input), length / 2)) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { out |= encoding_type::UTF16_LE; } } @@ -36172,301 +43503,571 @@ implementation::detect_encodings(const char *input, out |= encoding_type::UTF32_LE; } } - return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { return ppc64::utf8_validation::generic_validate_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { return ppc64::utf8_validation::generic_validate_utf8_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_ascii(buf, len); + return ppc64::ascii_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_ascii_with_errors(buf, len); + return ppc64::ascii_validation::generic_validate_ascii_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate(buf, len); -} + const auto res = + ppc64::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate(buf, len); + return validate_utf16be_with_errors(buf, len).is_ok(); } simdutf_warn_unused result implementation::validate_utf16le_with_errors( const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); + const auto res = + ppc64::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + auto scalar = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + scalar.count += res.count; + return scalar; + } + + return res; } simdutf_warn_unused result implementation::validate_utf16be_with_errors( const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); -} + const auto res = + ppc64::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + auto scalar = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + scalar.count += res.count; + return scalar; + } + return res; +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + return utf32::validate(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { - return scalar::utf32::validate_with_errors(buf, len); + return utf32::validate_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + const auto ret = ppc64_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + size_t n = + ppc64_convert_latin1_to_utf16(buf, len, utf16_output); + if (n < len) { + n += scalar::latin1_to_utf16::convert(buf + n, len - n, + utf16_output + n); + } + + return n; } -simdutf_warn_unused bool -implementation::validate_utf32(const char16_t *buf, size_t len) const noexcept { - return scalar::utf32::validate(buf, len); +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + size_t n = + ppc64_convert_latin1_to_utf16(buf, len, utf16_output); + if (n < len) { + n += scalar::latin1_to_utf16::convert(buf + n, len - n, + utf16_output + n); + } + + return n; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + const auto ret = ppc64_convert_latin1_to_utf32(buf, len, utf32_output); + if (ret.first != buf + len) { + const size_t processed = ret.first - buf; + scalar::latin1_to_utf32::convert(ret.first, len - processed, ret.second); + } + + return len; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); } +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return ppc64::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); } simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); } simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return result(error_code::OTHER, 0); // stub + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); } simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return result(error_code::OTHER, 0); // stub + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); } simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(buf, len, + utf16_output); } simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(buf, len, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char * /*buf*/, size_t /*len*/, - char32_t * /*utf16_output*/) const noexcept { - return 0; // stub + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); } simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char * /*buf*/, size_t /*len*/, - char32_t * /*utf16_output*/) const noexcept { - return result(error_code::OTHER, 0); // stub + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); } simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char * /*buf*/, size_t /*len*/, - char32_t * /*utf16_output*/) const noexcept { - return 0; // stub + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_impl(ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert, buf, + len, latin1_output); } +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_impl(ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert, buf, + len, latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert_with_errors, buf, + len, latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert_with_errors, buf, len, + latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, - utf8_output); + + return convert_impl(ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::convert, buf, + len, utf8_output); } simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, utf8_output); + + return convert_impl(ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::convert, buf, len, + utf8_output); } simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); + + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::convert_with_errors, buf, len, + utf8_output); } simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); + + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::convert_with_errors, buf, len, + utf8_output); } simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); + return convert_utf16le_to_utf8(buf, len, utf8_output); } simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_impl(ppc64_convert_utf32_to_latin1, + scalar::utf32_to_latin1::convert, buf, len, + latin1_output); } +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_with_errors_impl( + ppc64_convert_utf32_to_latin1, + scalar::utf32_to_latin1::convert_with_errors, buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_impl(ppc64_convert_utf32_to_latin1, + scalar::utf32_to_latin1::convert, buf, len, + latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert(buf, len, utf8_output); + return convert_impl(ppc64_convert_utf32_to_utf8, + scalar::utf32_to_utf8::convert, buf, len, utf8_output); } simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output); + return convert_with_errors_impl( + ppc64_convert_utf32_to_utf8, + scalar::utf32_to_utf8::convert_with_errors, buf, len, utf8_output); } simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); + return convert_impl(ppc64_convert_utf32_to_utf8, + scalar::utf32_to_utf8::convert, buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); + + return convert_impl(ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, + len, utf16_output); } simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); + + return convert_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, len, utf16_output); } simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); + + return convert_with_errors_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert_with_errors, buf, len, + utf16_output); } simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); + + return convert_with_errors_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert_with_errors, buf, len, + utf16_output); } simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid( - buf, len, utf16_output); + + return convert_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, len, + utf16_output); } simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid(buf, len, - utf16_output); + + return convert_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, len, utf16_output); } simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); + return convert_impl(ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert, buf, + len, utf32_output); } simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); + return convert_impl(ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert, buf, + len, utf32_output); } simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert_with_errors, buf, len, + utf32_output); } simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert_with_errors, buf, len, + utf32_output); } simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid( - buf, len, utf32_output); + return convert_utf16le_to_utf32(buf, len, utf32_output); } simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid(buf, len, - utf32_output); + return convert_utf16be_to_utf32(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { - scalar::utf16::change_endianness_utf16(input, length, output); + utf16::change_endianness_utf16(input, length, output); } simdutf_warn_unused size_t implementation::count_utf16le( const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); + return utf16::count_code_points(input, length); } simdutf_warn_unused size_t implementation::count_utf16be( const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); + return utf16::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const auto ret = ppc64_utf8_length_from_latin1(input, length); + const size_t consumed = ret.first - input; + + if (consumed == length) { + return ret.second; + } + + const auto scalar = + scalar::latin1::utf8_length_from_latin1(ret.first, length - consumed); + return scalar + ret.second; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, - length); + return utf16::utf8_length_from_utf16(input, length); } simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, length); + return utf16::utf8_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, - length); + return utf16::utf32_length_from_utf16(input, length); } simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, length); + return utf16::utf32_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { - return scalar::utf8::utf16_length_from_utf8(input, length); + return utf8::utf16_length_from_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf8_length_from_utf32(input, length); + return utf32::utf8_length_from_utf32(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { return scalar::utf32::utf16_length_from_utf32(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { - return scalar::utf8::count_code_points(input, length); + return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( const char *input, size_t length) const noexcept { return scalar::base64::maximal_binary_length_from_base64(input, length); @@ -36475,122 +44076,120 @@ simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - // skip trailing spaces - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; + if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } } simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - // skip trailing spaces - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; + if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } } size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { - return scalar::base64::binary_to_base64(input, length, output, options); + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } } +#endif // SIMDUTF_FEATURE_BASE64 + +#ifdef SIMDUTF_INTERNAL_TESTS +std::vector +implementation::internal_tests() const { + #define entry(proc) \ + TestProcedure { #proc, proc } + return {entry(base64_encoding_translate_6bit_values), + entry(base64_encoding_expand_6bit_fields), + entry(base64_decoding_valid), + entry(base64_decoding_invalid_ignore_errors), + entry(base64url_decoding_invalid_ignore_errors), + entry(base64_decoding_invalid_strict_errors), + entry(base64url_decoding_invalid_strict_errors), + entry(base64_decoding_pack), + entry(base64_compress)}; + #undef entry +} +#endif + } // namespace ppc64 } // namespace simdutf @@ -36600,11 +44199,6 @@ size_t implementation::binary_to_base64(const char *input, size_t length, #endif #if SIMDUTF_IMPLEMENTATION_RVV /* begin file src/rvv/implementation.cpp */ - - - - - /* begin file src/simdutf/rvv/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "rvv" // #define SIMDUTF_IMPLEMENTATION rvv @@ -36658,7 +44252,7 @@ rvv_utf32_store_utf16_m4(uint16_t *dst, vuint32m4_t utf32, size_t vl, /* end file src/rvv/rvv_helpers.inl.cpp */ /* begin file src/rvv/rvv_length_from.inl.cpp */ - +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::count_utf16le(const char16_t *src, size_t len) const noexcept { return utf32_length_from_utf16le(src, len); @@ -36668,37 +44262,23 @@ simdutf_warn_unused size_t implementation::count_utf16be(const char16_t *src, size_t len) const noexcept { return utf32_length_from_utf16be(src, len); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *src, size_t len) const noexcept { return utf32_length_from_utf8(src, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *src, size_t len) const noexcept { return utf32_length_from_utf8(src, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t len) const noexcept { - return len; -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t len) const noexcept { - return len; -} - -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t len) const noexcept { - return len; -} - -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t len) const noexcept { - return len; -} - +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *src, size_t len) const noexcept { size_t count = 0; @@ -36710,7 +44290,9 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8( } return count; } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 template simdutf_really_inline static size_t rvv_utf32_length_from_utf16(const char16_t *src, size_t len) { @@ -36739,7 +44321,9 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( else return rvv_utf32_length_from_utf16(src, len); } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *src, size_t len) const noexcept { size_t count = len; @@ -36750,7 +44334,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( } return count; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 template simdutf_really_inline static size_t rvv_utf8_length_from_utf16(const char16_t *src, size_t len) { @@ -36782,7 +44368,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( else return rvv_utf8_length_from_utf16(src, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *src, size_t len) const noexcept { size_t count = 0; @@ -36797,7 +44385,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( } return count; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *src, size_t len) const noexcept { size_t count = 0; @@ -36811,7 +44401,9 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf8( } return count; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *src, size_t len) const noexcept { size_t count = 0; @@ -36823,10 +44415,10 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32( } return count; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* end file src/rvv/rvv_length_from.inl.cpp */ /* begin file src/rvv/rvv_validate.inl.cpp */ - - +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *src, size_t len) const noexcept { size_t vlmax = __riscv_vsetvlmax_e8m8(); @@ -36852,7 +44444,9 @@ simdutf_warn_unused result implementation::validate_ascii_with_errors( } return result(error_code::SUCCESS, src - beg); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* Returns a close estimation of the number of valid UTF-8 bytes up to the * first invalid one, but never overestimating. */ simdutf_really_inline static size_t rvv_count_valid_utf8(const char *src, @@ -36941,26 +44535,18 @@ implementation::validate_utf8(const char *src, size_t len) const noexcept { size_t count = rvv_count_valid_utf8(src, len); return scalar::utf8::validate(src + count, len - count); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *src, size_t len) const noexcept { size_t count = rvv_count_valid_utf8(src, len); result res = scalar::utf8::validate_with_errors(src + count, len - count); return result(res.error, count + res.count); } +#endif // SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *src, - size_t len) const noexcept { - return validate_utf16le_with_errors(src, len).error == error_code::SUCCESS; -} - -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *src, - size_t len) const noexcept { - return validate_utf16be_with_errors(src, len).error == error_code::SUCCESS; -} - +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING template simdutf_really_inline static result rvv_validate_utf16_with_errors(const char16_t *src, size_t len) { @@ -36993,7 +44579,26 @@ rvv_validate_utf16_with_errors(const char16_t *src, size_t len) { return result(error_code::SUCCESS, src - beg); } } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *src, + size_t len) const noexcept { + return rvv_validate_utf16_with_errors(src, len) + .error == error_code::SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *src, + size_t len) const noexcept { + return validate_utf16be_with_errors(src, len).error == error_code::SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused result implementation::validate_utf16le_with_errors( const char16_t *src, size_t len) const noexcept { return rvv_validate_utf16_with_errors(src, len); @@ -37006,7 +44611,9 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( else return rvv_validate_utf16_with_errors(src, len); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *src, size_t len) const noexcept { size_t vlmax = __riscv_vsetvlmax_e32m8(); @@ -37025,7 +44632,9 @@ implementation::validate_utf32(const char32_t *src, size_t len) const noexcept { __riscv_vmsne_vx_u32m8_b4(maxOff, 0xFFFFF7FF, vlmax), vlmax), vlmax) < 0; } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *src, size_t len) const noexcept { const char32_t *beg = src; @@ -37053,10 +44662,11 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( } return result(error_code::SUCCESS, src - beg); } +#endif // SIMDUTF_FEATURE_UTF32 /* end file src/rvv/rvv_validate.inl.cpp */ /* begin file src/rvv/rvv_latin1_to.inl.cpp */ - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *src, size_t len, char *dst) const noexcept { char *beg = dst; @@ -37087,7 +44697,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( } return dst - beg; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *src, size_t len, char16_t *dst) const noexcept { char16_t *beg = dst; @@ -37111,7 +44723,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( } return dst - beg; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *src, size_t len, char32_t *dst) const noexcept { char32_t *beg = dst; @@ -37122,10 +44736,10 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( } return dst - beg; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* end file src/rvv/rvv_latin1_to.inl.cpp */ /* begin file src/rvv/rvv_utf16_to.inl.cpp */ -#include - +#if SIMDUTF_FEATURE_UTF16 template simdutf_really_inline static result rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) { @@ -37141,7 +44755,9 @@ rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) { } return result(error_code::SUCCESS, src - beg); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *src, size_t len, char *dst) const noexcept { result res = convert_utf16le_to_latin1_with_errors(src, len, dst); @@ -37191,7 +44807,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( } return src - beg; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 template simdutf_really_inline static result rvv_utf16_to_utf8_with_errors(const char16_t *src, size_t len, char *dst) { @@ -37369,7 +44987,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *src, size_t len, char *dst) const noexcept { return convert_utf16be_to_utf8(src, len, dst); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 template simdutf_really_inline static result rvv_utf16_to_utf32_with_errors(const char16_t *src, size_t len, char32_t *dst) { @@ -37517,9 +45137,11 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( const char16_t *src, size_t len, char32_t *dst) const noexcept { return convert_utf16be_to_utf32(src, len, dst); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* end file src/rvv/rvv_utf16_to.inl.cpp */ -/* begin file src/rvv/rvv_utf32_to.inl.cpp */ +/* begin file src/rvv/rvv_utf32_to.inl.cpp */ +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *src, size_t len, char *dst) const noexcept { result res = convert_utf32_to_latin1_with_errors(src, len, dst); @@ -37549,7 +45171,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( const char32_t *src, size_t len, char *dst) const noexcept { return convert_utf32_to_latin1(src, len, dst); } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( const char32_t *src, size_t len, char *dst) const noexcept { size_t n = len; @@ -37701,7 +45325,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *src, size_t len, char *dst) const noexcept { return convert_utf32_to_utf8(src, len, dst); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 template simdutf_really_inline static result rvv_convert_utf32_to_utf16_with_errors(const char32_t *src, size_t len, @@ -37808,8 +45434,10 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( else return rvv_convert_valid_utf32_to_utf16(src, len, dst); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* end file src/rvv/rvv_utf32_to.inl.cpp */ /* begin file src/rvv/rvv_utf8_to.inl.cpp */ +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) template simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, size_t len, Tdst *dst) { @@ -37852,6 +45480,7 @@ simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, const vuint8m1_t err3tbl = __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); + size_t vl8m1 = __riscv_vsetvlmax_e8m1(); size_t vl8m2 = __riscv_vsetvlmax_e8m2(); vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); @@ -38000,51 +45629,51 @@ simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, * vssubu.vx v, 10, (max(x-10, 0)) almost gives us what we want, we * just need to manually detect and handle the one special case: */ -#define SIMDUTF_RVV_UTF8_TO_COMMON_M1(idx) \ - vuint8m1_t c1 = __riscv_vget_v_u8m2_u8m1(b1, idx); \ - vuint8m1_t c2 = __riscv_vget_v_u8m2_u8m1(b2, idx); \ - vuint8m1_t c3 = __riscv_vget_v_u8m2_u8m1(b3, idx); \ - vuint8m1_t c4 = __riscv_vget_v_u8m2_u8m1(b4, idx); \ - /* remove prefix from trailing bytes */ \ - c2 = __riscv_vand_vx_u8m1(c2, 0b00111111, vlOut); \ - c3 = __riscv_vand_vx_u8m1(c3, 0b00111111, vlOut); \ - c4 = __riscv_vand_vx_u8m1(c4, 0b00111111, vlOut); \ - vuint8m1_t shift = __riscv_vsrl_vx_u8m1(c1, 4, vlOut); \ - shift = __riscv_vmerge_vxm_u8m1(__riscv_vssubu_vx_u8m1(shift, 10, vlOut), 3, \ - __riscv_vmseq_vx_u8m1_b8(shift, 12, vlOut), \ - vlOut); \ - c1 = __riscv_vsll_vv_u8m1(c1, shift, vlOut); \ - c1 = __riscv_vsrl_vv_u8m1(c1, shift, vlOut); \ - /* unconditionally widen and combine to c1234 */ \ - vuint16m2_t c34 = __riscv_vwaddu_wv_u16m2( \ - __riscv_vwmulu_vx_u16m2(c3, 1 << 6, vlOut), c4, vlOut); \ - vuint16m2_t c12 = __riscv_vwaddu_wv_u16m2( \ - __riscv_vwmulu_vx_u16m2(c1, 1 << 6, vlOut), c2, vlOut); \ - vuint32m4_t c1234 = __riscv_vwaddu_wv_u32m4( \ - __riscv_vwmulu_vx_u32m4(c12, 1 << 12, vlOut), c34, vlOut); \ - /* derive required right-shift amount from `shift` to reduce \ - * c1234 to the required number of bytes */ \ - c1234 = __riscv_vsrl_vv_u32m4( \ - c1234, \ - __riscv_vzext_vf4_u32m4( \ - __riscv_vmul_vx_u8m1( \ - __riscv_vrsub_vx_u8m1(__riscv_vssubu_vx_u8m1(shift, 2, vlOut), \ - 3, vlOut), \ - 6, vlOut), \ - vlOut), \ - vlOut); \ - /* store result in desired format */ \ - if (is16) \ - vlDst = rvv_utf32_store_utf16_m4((uint16_t *)dst, c1234, vlOut, \ - m4even); \ - else \ - vlDst = vlOut, __riscv_vse32_v_u32m4((uint32_t *)dst, c1234, vlOut); + #define SIMDUTF_RVV_UTF8_TO_COMMON_M1(idx) \ + vuint8m1_t c1 = __riscv_vget_v_u8m2_u8m1(b1, idx); \ + vuint8m1_t c2 = __riscv_vget_v_u8m2_u8m1(b2, idx); \ + vuint8m1_t c3 = __riscv_vget_v_u8m2_u8m1(b3, idx); \ + vuint8m1_t c4 = __riscv_vget_v_u8m2_u8m1(b4, idx); \ + /* remove prefix from trailing bytes */ \ + c2 = __riscv_vand_vx_u8m1(c2, 0b00111111, vlOut); \ + c3 = __riscv_vand_vx_u8m1(c3, 0b00111111, vlOut); \ + c4 = __riscv_vand_vx_u8m1(c4, 0b00111111, vlOut); \ + vuint8m1_t shift = __riscv_vsrl_vx_u8m1(c1, 4, vlOut); \ + shift = __riscv_vmerge_vxm_u8m1( \ + __riscv_vssubu_vx_u8m1(shift, 10, vlOut), 3, \ + __riscv_vmseq_vx_u8m1_b8(shift, 12, vlOut), vlOut); \ + c1 = __riscv_vsll_vv_u8m1(c1, shift, vlOut); \ + c1 = __riscv_vsrl_vv_u8m1(c1, shift, vlOut); \ + /* unconditionally widen and combine to c1234 */ \ + vuint16m2_t c34 = __riscv_vwaddu_wv_u16m2( \ + __riscv_vwmulu_vx_u16m2(c3, 1 << 6, vlOut), c4, vlOut); \ + vuint16m2_t c12 = __riscv_vwaddu_wv_u16m2( \ + __riscv_vwmulu_vx_u16m2(c1, 1 << 6, vlOut), c2, vlOut); \ + vuint32m4_t c1234 = __riscv_vwaddu_wv_u32m4( \ + __riscv_vwmulu_vx_u32m4(c12, 1 << 12, vlOut), c34, vlOut); \ + /* derive required right-shift amount from `shift` to reduce \ + * c1234 to the required number of bytes */ \ + c1234 = __riscv_vsrl_vv_u32m4( \ + c1234, \ + __riscv_vzext_vf4_u32m4( \ + __riscv_vmul_vx_u8m1( \ + __riscv_vrsub_vx_u8m1(__riscv_vssubu_vx_u8m1(shift, 2, vlOut), \ + 3, vlOut), \ + 6, vlOut), \ + vlOut), \ + vlOut); \ + /* store result in desired format */ \ + if (is16) \ + vlDst = rvv_utf32_store_utf16_m4((uint16_t *)dst, c1234, vlOut, \ + m4even); \ + else \ + vlDst = vlOut, __riscv_vse32_v_u32m4((uint32_t *)dst, c1234, vlOut); /* Unrolling this manually reduces register pressure and allows * us to terminate early. */ { size_t vlOutm2 = vlOut, vlDst; - vlOut = __riscv_vsetvl_e8m1(vlOut); + vlOut = __riscv_vsetvl_e8m1(vlOut < vl8m1 ? vlOut : vl8m1); SIMDUTF_RVV_UTF8_TO_COMMON_M1(0) if (vlOutm2 == vlOut) { vlOut = vlDst; @@ -38060,7 +45689,7 @@ simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, vlOut = vlDst; } -#undef SIMDUTF_RVV_UTF8_TO_COMMON_M1 + #undef SIMDUTF_RVV_UTF8_TO_COMMON_M1 } /* validate the last character and reparse it + tail */ @@ -38081,7 +45710,10 @@ simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, return 0; return (size_t)(dst - beg) + ret; } +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *src, size_t len, char *dst) const noexcept { const char *beg = dst; @@ -38170,7 +45802,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( } return dst - beg; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *src, size_t len, char16_t *dst) const noexcept { return rvv_utf8_to_common(src, len, @@ -38220,7 +45854,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return rvv_utf8_to_common( src, len, (uint16_t *)dst); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *src, size_t len, char32_t *dst) const noexcept { return rvv_utf8_to_common(src, len, @@ -38240,8 +45876,10 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( return rvv_utf8_to_common( src, len, (uint32_t *)dst); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* end file src/rvv/rvv_utf8_to.inl.cpp */ +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -38254,7 +45892,7 @@ implementation::detect_encodings(const char *input, if (validate_utf8(input, length)) out |= encoding_type::UTF8; if (length % 2 == 0) { - if (validate_utf16(reinterpret_cast(input), length / 2)) + if (validate_utf16le(reinterpret_cast(input), length / 2)) out |= encoding_type::UTF16_LE; } if (length % 4 == 0) { @@ -38264,7 +45902,9 @@ implementation::detect_encodings(const char *input, return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 template simdutf_really_inline static void rvv_change_endianness_utf16(const char16_t *src, size_t len, char16_t *dst) { @@ -38282,12 +45922,9 @@ void implementation::change_endianness_utf16(const char16_t *src, size_t len, else return rvv_change_endianness_utf16(src, len, dst); } +#endif // SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -38391,11 +46028,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( return r; } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -38499,16 +46131,13 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( return r; } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { return scalar::base64::tail_encode_base64(output, input, length, options); } +#endif // SIMDUTF_FEATURE_BASE64 + } // namespace rvv } // namespace simdutf @@ -38527,6 +46156,7 @@ SIMDUTF_UNTARGET_REGION /* begin file src/simdutf/westmere/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "westmere" // #define SIMDUTF_IMPLEMENTATION westmere +#define SIMDUTF_SIMD_HAS_BYTEMASK #if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE // nothing needed. @@ -38534,6 +46164,7 @@ SIMDUTF_UNTARGET_REGION SIMDUTF_TARGET_WESTMERE #endif /* end file src/simdutf/westmere/begin.h */ + namespace simdutf { namespace westmere { namespace { @@ -38542,25 +46173,15 @@ namespace { #endif using namespace simd; +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 simdutf_really_inline bool is_ascii(const simd8x64 &input) { return input.reduce_or().is_ascii(); } +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = - prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = - prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = - prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction - // will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > - int8_t(0); -} - +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { @@ -38570,7 +46191,9 @@ must_be_2_3_continuation(const simd8 prev2, prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 return simd8(is_third_byte | is_fourth_byte); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 /* begin file src/westmere/internal/loader.cpp */ namespace internal { namespace westmere { @@ -38647,292 +46270,31 @@ inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, } // namespace westmere } // namespace internal /* end file src/westmere/internal/loader.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/westmere/sse_validate_utf16.cpp */ -/* - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We are going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ - -/* Returns: - - pointer to the last unprocessed character (a scalar fallback should check - the rest); - - nullptr if an error was detected. -*/ template -const char16_t *sse_validate_utf16(const char16_t *input, size_t size) { - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + // we want lower bytes + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; + return simd16::pack(t0, t1); + } else { const auto t0 = in0.shr<8>(); const auto t1 = in1.shr<8>(); - const auto in = simd16::pack(t0, t1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return nullptr; - } - } + return simd16::pack(t0, t1); } - - return input; -} - -template -const result sse_validate_utf16_with_errors(const char16_t *input, - size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } - - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - - const auto in = simd16::pack(t0, t1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); } /* end file src/westmere/sse_validate_utf16.cpp */ -/* begin file src/westmere/sse_validate_utf32le.cpp */ -/* Returns: - - pointer to the last unprocessed character (a scalar fallback should check - the rest); - - nullptr if an error was detected. -*/ -const char32_t *sse_validate_utf32le(const char32_t *input, size_t size) { - const char32_t *end = input + size; - - const __m128i standardmax = _mm_set1_epi32(0x10ffff); - const __m128i offset = _mm_set1_epi32(0xffff2000); - const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); - __m128i currentmax = _mm_setzero_si128(); - __m128i currentoffsetmax = _mm_setzero_si128(); - - while (input + 4 < end) { - const __m128i in = _mm_loadu_si128((__m128i *)input); - currentmax = _mm_max_epu32(in, currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in, offset), currentoffsetmax); - input += 4; - } - __m128i is_zero = - _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { - return nullptr; - } - - is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { - return nullptr; - } - - return input; -} - -const result sse_validate_utf32le_with_errors(const char32_t *input, - size_t size) { - const char32_t *start = input; - const char32_t *end = input + size; - - const __m128i standardmax = _mm_set1_epi32(0x10ffff); - const __m128i offset = _mm_set1_epi32(0xffff2000); - const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); - __m128i currentmax = _mm_setzero_si128(); - __m128i currentoffsetmax = _mm_setzero_si128(); - - while (input + 4 < end) { - const __m128i in = _mm_loadu_si128((__m128i *)input); - currentmax = _mm_max_epu32(in, currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in, offset), currentoffsetmax); - - __m128i is_zero = - _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { - return result(error_code::TOO_LARGE, input - start); - } - - is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { - return result(error_code::SURROGATE, input - start); - } - input += 4; - } - - return result(error_code::SUCCESS, input - start); -} -/* end file src/westmere/sse_validate_utf32le.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/westmere/sse_convert_latin1_to_utf8.cpp */ std::pair sse_convert_latin1_to_utf8(const char *latin_input, @@ -39006,6 +46368,9 @@ sse_convert_latin1_to_utf8(const char *latin_input, return std::make_pair(latin_input, utf8_output); } /* end file src/westmere/sse_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/westmere/sse_convert_latin1_to_utf16.cpp */ template std::pair @@ -39029,6 +46394,9 @@ sse_convert_latin1_to_utf16(const char *latin1_input, size_t len, return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); } /* end file src/westmere/sse_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/westmere/sse_convert_latin1_to_utf32.cpp */ std::pair sse_convert_latin1_to_utf32(const char *buf, size_t len, @@ -39062,7 +46430,9 @@ sse_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf, utf32_output); } /* end file src/westmere/sse_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/westmere/sse_convert_utf8_to_utf16.cpp */ // depends on "tables/utf8_to_utf16_tables.h" @@ -39262,6 +46632,9 @@ size_t convert_masked_utf8_to_utf16(const char *input, return consumed; } /* end file src/westmere/sse_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/westmere/sse_convert_utf8_to_utf32.cpp */ // depends on "tables/utf8_to_utf16_tables.h" @@ -39405,6 +46778,9 @@ size_t convert_masked_utf8_to_utf32(const char *input, return consumed; } /* end file src/westmere/sse_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/westmere/sse_convert_utf8_to_latin1.cpp */ // depends on "tables/utf8_to_utf16_tables.h" @@ -39465,7 +46841,9 @@ size_t convert_masked_utf8_to_latin1(const char *input, return consumed; } /* end file src/westmere/sse_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/westmere/sse_convert_utf16_to_latin1.cpp */ template std::pair @@ -39523,9 +46901,8 @@ sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, } else { // Fallback to scalar code for handling errors for (int k = 0; k < 8; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); } else { @@ -39540,6 +46917,9 @@ sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, latin1_output); } /* end file src/westmere/sse_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/westmere/sse_convert_utf16_to_utf8.cpp */ /* The vectorized algorithm works on single SSE register i.e., it @@ -39781,7 +47161,7 @@ sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -39795,7 +47175,7 @@ sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -40010,7 +47390,7 @@ sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -40024,7 +47404,7 @@ sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -40046,6 +47426,9 @@ sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } /* end file src/westmere/sse_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/westmere/sse_convert_utf16_to_utf32.cpp */ /* The vectorized algorithm works on single SSE register i.e., it @@ -40098,7 +47481,7 @@ sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, /* Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. + A scalar routine should carry on the conversion of the tail. */ template std::pair @@ -40149,14 +47532,14 @@ sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -40229,14 +47612,14 @@ sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { @@ -40254,7 +47637,9 @@ sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); } /* end file src/westmere/sse_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/westmere/sse_convert_utf32_to_latin1.cpp */ std::pair sse_convert_utf32_to_latin1(const char32_t *buf, size_t len, @@ -40339,6 +47724,9 @@ sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, latin1_output); } /* end file src/westmere/sse_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/westmere/sse_convert_utf32_to_utf8.cpp */ std::pair sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { @@ -40931,6 +48319,9 @@ sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } /* end file src/westmere/sse_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */ template std::pair @@ -40939,21 +48330,16 @@ sse_convert_utf32_to_utf16(const char32_t *buf, size_t len, const char32_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); __m128i forbidden_bytemask = _mm_setzero_si128(); while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); + const __m128i in = _mm_loadu_si128((__m128i *)buf); + const __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - // Check if no bits set above 16th - if (saturation_bitmask == 0xffff) { - // Pack UTF-32 to UTF-16 + const __m128i combined = _mm_or_si128(in, nextin); + if (simdutf_likely(_mm_testz_si128(combined, v_ffff0000))) { + // No bits set above 16th, directly pack UTF-32 to UTF-16 __m128i utf16_packed = _mm_packus_epi32(in, nextin); const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); @@ -41025,20 +48411,15 @@ sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, const char32_t *start = buf; const char32_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); + const __m128i in = _mm_loadu_si128((__m128i *)buf); + const __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - // Check if no bits set above 16th - if (saturation_bitmask == 0xffff) { - // Pack UTF-32 to UTF-16 + const __m128i combined = _mm_or_si128(in, nextin); + if (simdutf_likely(_mm_testz_si128(combined, v_ffff0000))) { + // No bits set above 16th, directly pack UTF-32 to UTF-16 __m128i utf16_packed = _mm_packus_epi32(in, nextin); const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); @@ -41103,6 +48484,9 @@ sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); } /* end file src/westmere/sse_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 /* begin file src/westmere/sse_base64.cpp */ /** * References and further reading: @@ -41131,6 +48515,9 @@ sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, * Nick Kopp. 2013. Base64 Encoding on a GPU. * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). */ + +// --- encoding ---------------------------------------------------- + template __m128i lookup_pshufb_improved(const __m128i input) { // credit: Wojciech Muła // reduce 0..51 -> 0 @@ -41274,7 +48661,11 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, srclen - i, options); } -static inline void compress(__m128i data, uint16_t mask, char *output) { + +// --- decoding ----------------------------------------------- + +static simdutf_really_inline void compress(__m128i data, uint16_t mask, + char *output) { if (mask == 0) { _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); return; @@ -41308,166 +48699,7 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } -struct block64 { - __m128i chunks[4]; -}; - -template -static inline uint16_t to_base64_mask(__m128i *src, uint32_t *error) { - const __m128i ascii_space_tbl = - _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, - 0xc, 0xd, 0x0, 0x0); - // credit: aqrit - __m128i delta_asso; - if (base64_url) { - delta_asso = _mm_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, - 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); - } else { - - delta_asso = _mm_setr_epi8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); - } - __m128i delta_values; - if (base64_url) { - delta_values = _mm_setr_epi8(0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), - uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), - 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), - uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); - } else { - - delta_values = - _mm_setr_epi8(int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)); - } - __m128i check_asso; - if (base64_url) { - check_asso = _mm_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); - } else { - - check_asso = _mm_setr_epi8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); - } - __m128i check_values; - if (base64_url) { - check_values = _mm_setr_epi8(uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), - uint8_t(0xB6), uint8_t(0xA6), uint8_t(0xB5), - uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, - uint8_t(0x80), 0x0, uint8_t(0x80)); - } else { - - check_values = - _mm_setr_epi8(int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), - int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)); - } - const __m128i shifted = _mm_srli_epi32(*src, 3); - - const __m128i delta_hash = - _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, *src), shifted); - const __m128i check_hash = - _mm_avg_epu8(_mm_shuffle_epi8(check_asso, *src), shifted); - - const __m128i out = - _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), *src); - const __m128i chk = - _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), *src); - const int mask = _mm_movemask_epi8(chk); - if (!ignore_garbage && mask) { - __m128i ascii_space = - _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src); - *error = (mask ^ _mm_movemask_epi8(ascii_space)); - } - *src = out; - return (uint16_t)mask; -} - -template -static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { - uint32_t err0 = 0; - uint32_t err1 = 0; - uint32_t err2 = 0; - uint32_t err3 = 0; - uint64_t m0 = - to_base64_mask(&b->chunks[0], &err0); - uint64_t m1 = - to_base64_mask(&b->chunks[1], &err1); - uint64_t m2 = - to_base64_mask(&b->chunks[2], &err2); - uint64_t m3 = - to_base64_mask(&b->chunks[3], &err3); - if (!ignore_garbage) { - *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | - ((uint64_t)err3 << 48); - } - return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); -} - -#if defined(_MSC_VER) && !defined(__clang__) -static inline size_t simdutf_tzcnt_u64(uint64_t num) { - unsigned long ret; - if (num == 0) { - return 64; - } - _BitScanForward64(&ret, num); - return ret; -} -#else // GCC or Clang -static inline size_t simdutf_tzcnt_u64(uint64_t num) { - return num ? __builtin_ctzll(num) : 64; -} -#endif - -static inline void copy_block(block64 *b, char *output) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), b->chunks[0]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), b->chunks[1]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 32), b->chunks[2]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 48), b->chunks[3]); -} - -static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t nmask = ~mask; - compress(b->chunks[0], uint16_t(mask), output); - compress(b->chunks[1], uint16_t(mask >> 16), - output + _mm_popcnt_u64(nmask & 0xFFFF)); - compress(b->chunks[2], uint16_t(mask >> 32), - output + _mm_popcnt_u64(nmask & 0xFFFFFFFF)); - compress(b->chunks[3], uint16_t(mask >> 48), - output + _mm_popcnt_u64(nmask & 0xFFFFFFFFFFFFULL)); - return _mm_popcnt_u64(nmask); -} - -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = _mm_loadu_si128(reinterpret_cast(src)); - b->chunks[1] = _mm_loadu_si128(reinterpret_cast(src + 16)); - b->chunks[2] = _mm_loadu_si128(reinterpret_cast(src + 32)); - b->chunks[3] = _mm_loadu_si128(reinterpret_cast(src + 48)); -} - -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char16_t *src) { - __m128i m1 = _mm_loadu_si128(reinterpret_cast(src)); - __m128i m2 = _mm_loadu_si128(reinterpret_cast(src + 8)); - __m128i m3 = _mm_loadu_si128(reinterpret_cast(src + 16)); - __m128i m4 = _mm_loadu_si128(reinterpret_cast(src + 24)); - __m128i m5 = _mm_loadu_si128(reinterpret_cast(src + 32)); - __m128i m6 = _mm_loadu_si128(reinterpret_cast(src + 40)); - __m128i m7 = _mm_loadu_si128(reinterpret_cast(src + 48)); - __m128i m8 = _mm_loadu_si128(reinterpret_cast(src + 56)); - b->chunks[0] = _mm_packus_epi16(m1, m2); - b->chunks[1] = _mm_packus_epi16(m3, m4); - b->chunks[2] = _mm_packus_epi16(m5, m6); - b->chunks[3] = _mm_packus_epi16(m7, m8); -} - -static inline void base64_decode(char *out, __m128i str) { +static simdutf_really_inline void base64_decode(char *out, __m128i str) { // credit: aqrit const __m128i pack_shuffle = @@ -41480,6 +48712,7 @@ static inline void base64_decode(char *out, __m128i str) { // this writes 16 bytes, but we only need 12. _mm_storeu_si128((__m128i *)out, t2); } + // decode 64 bytes and output 48 bytes static inline void base64_decode_block(char *out, const char *src) { base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); @@ -41490,6 +48723,7 @@ static inline void base64_decode_block(char *out, const char *src) { base64_decode(out + 36, _mm_loadu_si128(reinterpret_cast(src + 48))); } + static inline void base64_decode_block_safe(char *out, const char *src) { base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); base64_decode(out + 12, @@ -41501,222 +48735,250 @@ static inline void base64_decode_block_safe(char *out, const char *src) { _mm_loadu_si128(reinterpret_cast(src + 48))); std::memcpy(out + 36, buffer, 12); } -static inline void base64_decode_block(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - base64_decode(out + 12, b->chunks[1]); - base64_decode(out + 24, b->chunks[2]); - base64_decode(out + 36, b->chunks[3]); -} -static inline void base64_decode_block_safe(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - base64_decode(out + 12, b->chunks[1]); - base64_decode(out + 24, b->chunks[2]); - char buffer[16]; - base64_decode(buffer, b->chunks[3]); - std::memcpy(out + 36, buffer, 12); -} -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (!ignore_garbage && srclen > 0 && - scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - char *end_of_safe_64byte_zone = - (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; +// --- decoding - base64 class -------------------------------- - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; +class block64 { + __m128i chunks[4]; - constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block should of size 2 or more"); - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - uint64_t error = 0; - uint64_t badcharmask = - to_base64_mask(&b, &error); - if (error && !ignore_garbage) { - src -= 64; - size_t error_offset = simdutf_tzcnt_u64(error); - return {error_code::INVALID_BASE64_CHARACTER, - size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else if (bufferptr != buffer) { - copy_block(&b, bufferptr); - bufferptr += 64; - } else { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, &b); - } else { - base64_decode_block(dst, &b); - } - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 2); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); - } else { - base64_decode_block(dst, buffer + (block_size - 2) * 64); - } - dst += 48; - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } +public: + // The caller of this function is responsible to ensure that there are 64 + // bytes available from reading at src. + simdutf_really_inline block64(const char *src) { + chunks[0] = _mm_loadu_si128(reinterpret_cast(src)); + chunks[1] = _mm_loadu_si128(reinterpret_cast(src + 16)); + chunks[2] = _mm_loadu_si128(reinterpret_cast(src + 32)); + chunks[3] = _mm_loadu_si128(reinterpret_cast(src + 48)); } - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if ((!scalar::base64::is_eight_byte(*src) || val > 64) && - !ignore_garbage) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } +public: + // The caller of this function is responsible to ensure that there are 128 + // bytes available from reading at src. The data is read into a block64 + // structure. + simdutf_really_inline block64(const char16_t *src) { + const auto m1 = _mm_loadu_si128(reinterpret_cast(src)); + const auto m2 = _mm_loadu_si128(reinterpret_cast(src + 8)); + const auto m3 = + _mm_loadu_si128(reinterpret_cast(src + 16)); + const auto m4 = + _mm_loadu_si128(reinterpret_cast(src + 24)); + const auto m5 = + _mm_loadu_si128(reinterpret_cast(src + 32)); + const auto m6 = + _mm_loadu_si128(reinterpret_cast(src + 40)); + const auto m7 = + _mm_loadu_si128(reinterpret_cast(src + 48)); + const auto m8 = + _mm_loadu_si128(reinterpret_cast(src + 56)); + chunks[0] = _mm_packus_epi16(m1, m2); + chunks[1] = _mm_packus_epi16(m3, m4); + chunks[2] = _mm_packus_epi16(m5, m6); + chunks[3] = _mm_packus_epi16(m7, m8); } - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer_start); +public: + simdutf_really_inline void copy_block(char *output) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), chunks[0]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), chunks[1]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 32), chunks[2]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 48), chunks[3]); + } + +public: + simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { + if (is_power_of_two(mask)) { + return compress_block_single(mask, output); + } + + uint64_t nmask = ~mask; + compress(chunks[0], uint16_t(mask), output); + compress(chunks[1], uint16_t(mask >> 16), + output + count_ones(nmask & 0xFFFF)); + compress(chunks[2], uint16_t(mask >> 32), + output + count_ones(nmask & 0xFFFFFFFF)); + compress(chunks[3], uint16_t(mask >> 48), + output + count_ones(nmask & 0xFFFFFFFFFFFFULL)); + return count_ones(nmask); + } + +private: + simdutf_really_inline size_t compress_block_single(uint64_t mask, + char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + switch (pos64 >> 4) { + case 0b00: { + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[0], sh); + + _mm_storeu_si128((__m128i *)(output + 0 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 1 * 16 - 1), chunks[1]); + _mm_storeu_si128((__m128i *)(output + 2 * 16 - 1), chunks[2]); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); + } break; + case 0b01: { + _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[1], sh); + + _mm_storeu_si128((__m128i *)(output + 1 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 2 * 16 - 1), chunks[2]); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); + } break; + case 0b10: { + _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); + _mm_storeu_si128((__m128i *)(output + 1 * 16), chunks[1]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[2], sh); + + _mm_storeu_si128((__m128i *)(output + 2 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); + } break; + case 0b11: { + _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); + _mm_storeu_si128((__m128i *)(output + 1 * 16), chunks[1]); + _mm_storeu_si128((__m128i *)(output + 2 * 16), chunks[2]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[3], sh); + + _mm_storeu_si128((__m128i *)(output + 3 * 16), compressed); + } break; + } + + return 63; + } + +public: + template + simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { + uint32_t err0 = 0; + uint32_t err1 = 0; + uint32_t err2 = 0; + uint32_t err3 = 0; + uint64_t m0 = to_base64_mask(&chunks[0], &err0); + uint64_t m1 = to_base64_mask(&chunks[1], &err1); + uint64_t m2 = to_base64_mask(&chunks[2], &err2); + uint64_t m3 = to_base64_mask(&chunks[3], &err3); + if (!ignore_garbage) { + *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | + ((uint64_t)err3 << 48); + } + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); + } + +private: + template + simdutf_really_inline uint16_t to_base64_mask(__m128i *src, uint32_t *error) { + const __m128i ascii_space_tbl = + _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, + 0x0, 0xc, 0xd, 0x0, 0x0); + // credit: aqrit + __m128i delta_asso; + if (base64_url) { + delta_asso = _mm_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); } else { - base64_decode_block(dst, buffer_start); - } - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 4); - dst += 3; - buffer_start += 4; + delta_asso = + _mm_setr_epi8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; + __m128i delta_values; + if (base64_url) { + delta_values = _mm_setr_epi8(0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), + uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), + 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), + uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); } else { - r.output_count += size_t(dst - dstinit); + delta_values = + _mm_setr_epi8(int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)); } - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } + __m128i check_asso; + if (base64_url) { + check_asso = _mm_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); + } else { + check_asso = + _mm_setr_epi8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); } - return r; + __m128i check_values; + if (base64_url) { + check_values = _mm_setr_epi8(uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), + uint8_t(0xB6), uint8_t(0xA6), uint8_t(0xB5), + uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, + uint8_t(0x80), 0x0, uint8_t(0x80)); + } else { + check_values = + _mm_setr_epi8(int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)); + } + const __m128i shifted = _mm_srli_epi32(*src, 3); + + const __m128i delta_hash = + _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, *src), shifted); + const __m128i check_hash = + _mm_avg_epu8(_mm_shuffle_epi8(check_asso, *src), shifted); + + const __m128i out = + _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), *src); + const __m128i chk = + _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), *src); + const int mask = _mm_movemask_epi8(chk); + if (!ignore_garbage && mask) { + __m128i ascii_space = + _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src); + *error = (mask ^ _mm_movemask_epi8(ascii_space)); + } + *src = out; + return (uint16_t)mask; } - if (equalsigns > 0 && !ignore_garbage) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } + +public: + simdutf_really_inline void base64_decode_block(char *out) { + base64_decode(out, chunks[0]); + base64_decode(out + 12, chunks[1]); + base64_decode(out + 24, chunks[2]); + base64_decode(out + 36, chunks[3]); } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} + +public: + simdutf_really_inline void base64_decode_block_safe(char *out) { + base64_decode(out, chunks[0]); + base64_decode(out + 12, chunks[1]); + base64_decode(out + 24, chunks[2]); + char buffer[16]; + base64_decode(buffer, chunks[3]); + std::memcpy(out + 36, buffer, 12); + } +}; /* end file src/westmere/sse_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 } // unnamed namespace } // namespace westmere @@ -41833,6 +49095,7 @@ simdutf_really_inline void buf_block_reader::advance() { } // namespace westmere } // namespace simdutf /* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { namespace westmere { @@ -42139,9 +49402,21 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { reinterpret_cast(input), length); } -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +} // namespace utf8_validation +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace ascii_validation { + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); uint8_t blocks[64]{}; simd::simd8x64 running_or(blocks); while (reader.has_full_block()) { @@ -42156,14 +49431,8 @@ bool generic_validate_ascii(const uint8_t *input, size_t length) { return running_or.is_ascii(); } -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} - -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); size_t count{0}; while (reader.has_full_block()) { simd::simd8x64 in(reader.full_block()); @@ -42188,19 +49457,16 @@ result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { } } -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation +} // namespace ascii_validation } // unnamed namespace } // namespace westmere } // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -// transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ namespace simdutf { namespace westmere { namespace { @@ -42277,7 +49543,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ - namespace simdutf { namespace westmere { namespace { @@ -42611,9 +49876,10 @@ struct validating_transcoder { } // namespace westmere } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ namespace simdutf { namespace westmere { namespace { @@ -42658,7 +49924,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ - namespace simdutf { namespace westmere { namespace { @@ -42978,9 +50243,118 @@ struct validating_transcoder { } // namespace westmere } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -// other functions -/* begin file src/generic/utf8.h */ +/* begin file src/generic/utf32.h */ +#include +namespace simdutf { +namespace westmere { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +size_t utf8_length_from_utf32(const char32_t *input, size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + + const auto one = vector_u32::splat(1); + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use uint32 counters, this is + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ namespace simdutf { namespace westmere { namespace { @@ -42999,6 +50373,59 @@ simdutf_really_inline size_t count_code_points(const char *in, size_t size) { return count + scalar::utf8::count_code_points(in + pos, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif + simdutf_really_inline size_t utf16_length_from_utf8(const char *in, size_t size) { size_t pos = 0; @@ -43020,6 +50447,8 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } // namespace westmere } // namespace simdutf /* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16.h */ namespace simdutf { namespace westmere { @@ -43069,6 +50498,89 @@ simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + template simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, size_t size) { @@ -43095,9 +50607,144 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace westmere } // namespace simdutf /* end file src/generic/utf16.h */ -// transcoding from UTF-8 to Latin 1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ namespace simdutf { namespace westmere { namespace { @@ -43416,7 +51063,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ - namespace simdutf { namespace westmere { namespace { @@ -43496,6 +51142,361 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf32.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf32 { + +simdutf_really_inline bool validate(const char32_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return true; + } + + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + auto currentmax = vector_u32::zero(); + auto currentoffsetmax = vector_u32::zero(); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + currentmax = max(currentmax, in); + currentoffsetmax = max(currentoffsetmax, in + offset); + input += N; + } + + const auto too_large = currentmax > standardmax; + if (too_large.any()) { + return false; + } + + const auto surrogate = currentoffsetmax > standardoffsetmax; + if (surrogate.any()) { + return false; + } + + return scalar::utf32::validate(input, end - input); +} + +simdutf_really_inline result validate_with_errors(const char32_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return result(error_code::SUCCESS, 0); + } + + const char32_t *start = input; + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + const auto too_large = in > standardmax; + const auto surrogate = (in + offset) > standardoffsetmax; + + const auto combined = too_large | surrogate; + if (simdutf_unlikely(combined.any())) { + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; + } + + input += N; + } + + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; +} + +} // namespace utf32 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/validate_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/generic/base64.h */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ +namespace simdutf { +namespace westmere { +namespace { +namespace base64 { + +/* + The following template function implements API for Base64 decoding. + + An implementation is responsible for providing the `block64` type and + associated methods that perform actual conversion. Please refer + to any vectorized implementation to learn the API of these procedures. +*/ +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + size_t equallocation = + srclen; // location of the first padding character if any + // skip trailing spaces + while (!ignore_garbage && srclen > 0 && + scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + size_t equalsigns = 0; + if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + char *end_of_safe_64byte_zone = + (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; + + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b(src); + src += 64; + uint64_t error = 0; + const uint64_t badcharmask = + b.to_base64_mask(&error); + if (!ignore_garbage && error) { + src -= 64; + const size_t error_offset = trailing_zeroes(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + bufferptr += b.compress_block(badcharmask, bufferptr); + } else if (bufferptr != buffer) { + b.copy_block(bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + b.base64_decode_block_safe(dst); + } else { + b.base64_decode_block(dst); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!ignore_garbage && + (!scalar::base64::is_eight_byte(*src) || val > 64)) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + return r; + } else { + r.output_count += size_t(dst - dstinit); + } + if (!ignore_garbage && last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + r.input_count = equallocation; + } + } + return r; + } + if (!ignore_garbage && equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} + +} // namespace base64 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/base64.h */ +#endif // SIMDUTF_FEATURE_BASE64 // // Implementation-specific overrides @@ -43504,6 +51505,7 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, namespace simdutf { namespace westmere { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -43644,28 +51646,38 @@ implementation::detect_encodings(const char *input, } return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { return westmere::utf8_validation::generic_validate_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_ascii(buf, len); + return westmere::ascii_validation::generic_validate_ascii(buf, len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_ascii_with_errors(buf, - len); + return westmere::ascii_validation::generic_validate_ascii_with_errors(buf, + len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { @@ -43674,15 +51686,21 @@ implementation::validate_utf16le(const char16_t *buf, // handling nullptr return true; } - const char16_t *tail = sse_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, - len - (tail - buf)); - } else { + const auto res = + westmere::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } -} + if (res.count == len) + return true; + + return scalar::utf16::validate(buf + res.count, + len - res.count); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { @@ -43691,20 +51709,27 @@ implementation::validate_utf16be(const char16_t *buf, // handling nullptr return true; } - const char16_t *tail = sse_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, len - (tail - buf)); - } else { + const auto res = + westmere::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } + + if (res.count == len) + return true; + + return scalar::utf16::validate(buf + res.count, + len - res.count); } simdutf_warn_unused result implementation::validate_utf16le_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = sse_validate_utf16_with_errors(buf, len); + const result res = + westmere::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); return result(scalar_res.error, res.count + scalar_res.count); } else { return res; @@ -43713,7 +51738,8 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( simdutf_warn_unused result implementation::validate_utf16be_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = sse_validate_utf16_with_errors(buf, len); + const result res = + westmere::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf16::validate_with_errors( buf + res.count, len - res.count); @@ -43722,39 +51748,23 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( return res; } } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return true; - } - const char32_t *tail = sse_validate_utf32le(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - return false; - } + return utf32::validate(buf, len); } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { - if (len == 0) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return result(error_code::SUCCESS, 0); - } - result res = sse_validate_utf32le_with_errors(buf, len); - if (res.count != len) { - result scalar_res = - scalar::utf32::validate_with_errors(buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } + return utf32::validate_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { @@ -43770,7 +51780,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -43810,7 +51822,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -43829,7 +51843,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { utf8_to_latin1::validating_transcoder converter; @@ -43846,7 +51862,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { return westmere::utf8_to_latin1::convert_valid(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { utf8_to_utf16::validating_transcoder converter; @@ -43883,7 +51901,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return utf8_to_utf16::convert_valid(input, size, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { utf8_to_utf32::validating_transcoder converter; @@ -43900,7 +51920,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( const char *input, size_t size, char32_t *utf32_output) const noexcept { return utf8_to_utf32::convert_valid(input, size, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -44008,7 +52030,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( // optimization opportunity: we could provide an optimized function. return convert_utf16le_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -44114,7 +52138,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf16be_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -44163,7 +52189,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( // optimization opportunity: we could provide an optimized function. return convert_utf32_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -44204,7 +52232,9 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( utf8_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -44300,12 +52330,16 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( utf32_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf32_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -44413,7 +52447,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { return convert_utf16be_to_utf32(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { @@ -44429,47 +52465,36 @@ simdutf_warn_unused size_t implementation::count_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); + return utf8::count_code_points_bytemask(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *buf, size_t len) const noexcept { return count_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return scalar::utf32::latin1_length_from_utf32(length); -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); + return utf16::utf8_length_from_utf16_bytemask(input, + length); } simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} - -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); + return utf16::utf8_length_from_utf16_bytemask(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t len) const noexcept { const uint8_t *str = reinterpret_cast(input); @@ -44529,7 +52554,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( return answer + scalar::latin1::utf8_length_from_latin1( reinterpret_cast(str + i), len - i); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); @@ -44539,48 +52566,23 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::utf16_length_from_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const __m128i v_00000000 = _mm_setzero_si128(); - const __m128i v_ffffff80 = _mm_set1_epi32((uint32_t)0xffffff80); - const __m128i v_fffff800 = _mm_set1_epi32((uint32_t)0xfffff800); - const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - __m128i in = _mm_loadu_si128((__m128i *)(input + pos)); - const __m128i ascii_bytes_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_ffffff80), v_00000000); - const __m128i one_two_bytes_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_fffff800), v_00000000); - const __m128i two_bytes_bytemask = - _mm_xor_si128(one_two_bytes_bytemask, ascii_bytes_bytemask); - const __m128i one_two_three_bytes_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000); - const __m128i three_bytes_bytemask = - _mm_xor_si128(one_two_three_bytes_bytemask, one_two_bytes_bytemask); - const uint16_t ascii_bytes_bitmask = - static_cast(_mm_movemask_epi8(ascii_bytes_bytemask)); - const uint16_t two_bytes_bitmask = - static_cast(_mm_movemask_epi8(two_bytes_bytemask)); - const uint16_t three_bytes_bitmask = - static_cast(_mm_movemask_epi8(three_bytes_bytemask)); - - size_t ascii_count = count_ones(ascii_bytes_bitmask) / 4; - size_t two_bytes_count = count_ones(two_bytes_bitmask) / 4; - size_t three_bytes_count = count_ones(three_bytes_bitmask) / 4; - count += 16 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; - } - return count + - scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); + return utf32::utf8_length_from_utf32(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { const __m128i v_00000000 = _mm_setzero_si128(); @@ -44599,35 +52601,34 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32( return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } @@ -44637,46 +52638,41 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } @@ -44686,28 +52682,23 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( last_chunk_handling_options last_chunk_options) const noexcept { if (options & base64_url) { if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } else { if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); } } } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { @@ -44717,6 +52708,8 @@ size_t implementation::binary_to_base64(const char *input, size_t length, return encode_base64(output, input, length, options); } } +#endif // SIMDUTF_FEATURE_BASE64 + } // namespace westmere } // namespace simdutf @@ -44727,6 +52720,7 @@ size_t implementation::binary_to_base64(const char *input, size_t length, SIMDUTF_UNTARGET_REGION #endif +#undef SIMDUTF_SIMD_HAS_BYTEMASK /* end file src/simdutf/westmere/end.h */ /* end file src/westmere/implementation.cpp */ #endif @@ -44744,6 +52738,7 @@ namespace { #endif using namespace simd; +#if SIMDUTF_FEATURE_UTF8 // convert vmskltz/vmskgez/vmsknz to // simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index const uint8_t lsx_1_2_utf8_bytes_mask[] = { @@ -44765,33 +52760,26 @@ const uint8_t lsx_1_2_utf8_bytes_mask[] = { 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255}; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { // const v16u8 shuf = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; // return __lsx_vshuf_b(__lsx_vldi(0), vec, shuf); return __lsx_vshuf4i_b(vec, 0b10110001); // return __lsx_vor_v(__lsx_vslli_h(vec, 8), __lsx_vsrli_h(vec, 8)); } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 simdutf_really_inline bool is_ascii(const simd8x64 &input) { return input.is_ascii(); } +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller - // is using ^ as well. This will work fine because we only have to report - // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2 - // overlapping multibyte characters, and if that happens, there is guaranteed - // to be at least *one* lead byte that is part of only 1 other multibyte - // character. The error will be detected there. - return is_second_byte ^ is_third_byte ^ is_fourth_byte; -} - +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { @@ -44799,7 +52787,9 @@ must_be_2_3_continuation(const simd8 prev2, simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); return is_third_byte ^ is_fourth_byte; } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) // common functions for utf8 conversions simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { // Low half contains 10bbbbbb|10cccccc @@ -44857,210 +52847,27 @@ convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { composed = __lsx_vadd_h(ascii, composed); return composed; } +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/lsx/lsx_validate_utf16.cpp */ -/* - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We're going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ - -/* Returns: - - pointer to the last unprocessed character (a scalar fallback should check - the rest); - - nullptr if an error was detected. -*/ template -const char16_t *lsx_validate_utf16(const char16_t *input, size_t size) { - const char16_t *end = input + size; +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } - const auto in = simd8(__lsx_vssrlni_bu_h(in1.value, in0.value, 8)); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return nullptr; - } - } + return simd16::pack(t0, t1); + } else { + return simd8(__lsx_vssrlni_bu_h(in1.value, in0.value, 8)); } - - return input; -} - -template -const result lsx_validate_utf16_with_errors(const char16_t *input, - size_t size) { - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } - - const auto in = simd8(__lsx_vssrlni_bu_h(in1.value, in0.value, 8)); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); } /* end file src/lsx/lsx_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/lsx/lsx_validate_utf32le.cpp */ const char32_t *lsx_validate_utf32le(const char32_t *input, size_t size) { @@ -45132,7 +52939,9 @@ const result lsx_validate_utf32le_with_errors(const char32_t *input, return result(error_code::SUCCESS, input - start); } /* end file src/lsx/lsx_validate_utf32le.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lsx/lsx_convert_latin1_to_utf8.cpp */ /* Returns a pair: the first unprocessed byte from buf and utf8_output @@ -45148,7 +52957,7 @@ lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, __m128i zero = __lsx_vldi(0); // We always write 16 bytes, of which more than the first 8 bytes // are valid. A safety margin of 8 is more than sufficient. - while (latin1_input + 16 <= end) { + while (end - latin1_input >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); uint32_t ascii = __lsx_vpickve2gr_hu(__lsx_vmskgez_b(in8), 0); if (ascii == 0xffff) { // ASCII fast path!!!! @@ -45191,6 +53000,8 @@ lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); } /* end file src/lsx/lsx_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lsx/lsx_convert_latin1_to_utf16.cpp */ std::pair lsx_convert_latin1_to_utf16le(const char *buf, size_t len, @@ -45198,7 +53009,7 @@ lsx_convert_latin1_to_utf16le(const char *buf, size_t len, const char *end = buf + len; __m128i zero = __lsx_vldi(0); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i inlow = __lsx_vilvl_b(zero, in8); @@ -45218,7 +53029,7 @@ lsx_convert_latin1_to_utf16be(const char *buf, size_t len, char16_t *utf16_output) { const char *end = buf + len; __m128i zero = __lsx_vldi(0); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i inlow = __lsx_vilvl_b(in8, zero); @@ -45232,13 +53043,15 @@ lsx_convert_latin1_to_utf16be(const char *buf, size_t len, return std::make_pair(buf, utf16_output); } /* end file src/lsx/lsx_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lsx/lsx_convert_latin1_to_utf32.cpp */ std::pair lsx_convert_latin1_to_utf32(const char *buf, size_t len, char32_t *utf32_output) { const char *end = buf + len; - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i zero = __lsx_vldi(0); @@ -45261,7 +53074,9 @@ lsx_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf, utf32_output); } /* end file src/lsx/lsx_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/lsx/lsx_convert_utf8_to_utf16.cpp */ // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask @@ -45403,6 +53218,15 @@ size_t convert_masked_utf8_to_utf16(const char *input, // of the extra memory access is less important than the early branch // overhead in shorter sequences. + __m128i expected_mask = + (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, + 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; + __m128i expected = + (__m128i)v16u8{0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, + 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; + __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); + if (__lsx_bz_b(check)) + return 12; // Swap byte pairs // 10dddddd 10cccccc|10bbbbbb 11110aaa // 10cccccc 10dddddd|11110aaa 10bbbbbb @@ -45552,6 +53376,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, } } /* end file src/lsx/lsx_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/lsx/lsx_convert_utf8_to_utf32.cpp */ // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask @@ -45711,8 +53537,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); __m128i middle = __lsx_vand_v(perm, __lsx_vldi(-3777 /*0x00003f00*/)); // 00000000 00000000 0000cccc ccdddddd - __m128i cd = - __lsx_vbitsel_v(__lsx_vsrli_w(middle, 2), ascii, __lsx_vrepli_w(0x3f)); + __m128i cd = __lsx_vor_v(__lsx_vsrli_w(middle, 2), ascii); __m128i correction = __lsx_vand_v(perm, __lsx_vldi(-3520 /*0x00400000*/)); __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); @@ -45736,6 +53561,8 @@ size_t convert_masked_utf8_to_utf32(const char *input, } } /* end file src/lsx/lsx_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lsx/lsx_convert_utf8_to_latin1.cpp */ size_t convert_masked_utf8_to_latin1(const char *input, uint64_t utf8_end_of_code_point_mask, @@ -45813,14 +53640,16 @@ size_t convert_masked_utf8_to_latin1(const char *input, return consumed; } /* end file src/lsx/lsx_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lsx/lsx_convert_utf16_to_latin1.cpp */ template std::pair lsx_convert_utf16_to_latin1(const char16_t *buf, size_t len, char *latin1_output) { const char16_t *end = buf + len; - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); if (!match_system(big_endian)) { @@ -45848,7 +53677,7 @@ lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, char *latin1_output) { const char16_t *start = buf; const char16_t *end = buf + len; - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); if (!match_system(big_endian)) { @@ -45866,9 +53695,8 @@ lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, } else { // Let us do a scalar fallback. for (int k = 0; k < 16; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); } else { @@ -45882,6 +53710,8 @@ lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, latin1_output); } /* end file src/lsx/lsx_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 /* begin file src/lsx/lsx_convert_utf16_to_utf8.cpp */ /* The vectorized algorithm works on single SSE register i.e., it @@ -45946,7 +53776,7 @@ lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { // https://github.com/simdutf/simdutf/issues/92 __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); - while (buf + 16 + safety_margin <= end) { + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lsx_swap_bytes(in); @@ -46130,9 +53960,8 @@ lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -46146,7 +53975,7 @@ lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -46185,7 +54014,7 @@ lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (buf + 16 + safety_margin <= end) { + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lsx_swap_bytes(in); @@ -46370,9 +54199,8 @@ lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -46386,7 +54214,7 @@ lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -46410,6 +54238,8 @@ lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, reinterpret_cast(utf8_output)); } /* end file src/lsx/lsx_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/lsx/lsx_convert_utf16_to_utf32.cpp */ template std::pair @@ -46422,7 +54252,7 @@ lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, __m128i v_f800 = __lsx_vldi(-2568); /*0xF800*/ __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ - while (buf + 8 <= end) { + while (end - buf >= 8) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lsx_swap_bytes(in); @@ -46450,16 +54280,15 @@ lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -46496,7 +54325,7 @@ lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, __m128i v_f800 = __lsx_vldi(-2568); /*0xF800*/ __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ - while (buf + 8 <= end) { + while (end - buf >= 8) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lsx_swap_bytes(in); @@ -46522,16 +54351,15 @@ lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -46551,7 +54379,9 @@ lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, reinterpret_cast(utf32_output)); } /* end file src/lsx/lsx_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lsx/lsx_convert_utf32_to_latin1.cpp */ std::pair lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, @@ -46560,7 +54390,7 @@ lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; __m128i v_ff = __lsx_vrepli_w(0xFF); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); @@ -46589,7 +54419,7 @@ lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; __m128i v_ff = __lsx_vrepli_w(0xFF); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); @@ -46620,6 +54450,8 @@ lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, latin1_output); } /* end file src/lsx/lsx_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/lsx/lsx_convert_utf32_to_utf8.cpp */ std::pair lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { @@ -46636,7 +54468,7 @@ lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (buf + 16 + safety_margin < end) { + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); @@ -46865,7 +54697,7 @@ lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (buf + 16 + safety_margin < end) { + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); @@ -47081,6 +54913,8 @@ lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, reinterpret_cast(utf8_output)); } /* end file src/lsx/lsx_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/lsx/lsx_convert_utf32_to_utf16.cpp */ template std::pair @@ -47092,7 +54926,7 @@ lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, __m128i forbidden_bytemask = __lsx_vrepli_h(0); __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ __m128i v_dfff = __lsx_vreplgr2vr_h(uint16_t(0xdfff)); - while (buf + 8 <= end) { + while (end - buf >= 8) { __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); @@ -47169,7 +55003,7 @@ lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ __m128i v_dfff = __lsx_vreplgr2vr_h(uint16_t(0xdfff)); - while (buf + 8 <= end) { + while (end - buf >= 8) { __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); // Check if no bits set above 16th @@ -47238,6 +55072,8 @@ lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, reinterpret_cast(utf16_output)); } /* end file src/lsx/lsx_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_BASE64 /* begin file src/lsx/lsx_base64.cpp */ /** * References and further reading: @@ -47768,7 +55604,7 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - triple = scalar::utf32::swap_bytes(triple); + triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 4); dst += 3; @@ -47780,7 +55616,7 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - triple = scalar::utf32::swap_bytes(triple); + triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 3); dst += 3; @@ -47833,6 +55669,7 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/lsx/lsx_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 } // namespace } // namespace lsx @@ -47949,6 +55786,7 @@ simdutf_really_inline void buf_block_reader::advance() { } // namespace lsx } // namespace simdutf /* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { namespace lsx { @@ -48255,9 +56093,21 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { reinterpret_cast(input), length); } -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +} // namespace utf8_validation +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace ascii_validation { + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); uint8_t blocks[64]{}; simd::simd8x64 running_or(blocks); while (reader.has_full_block()) { @@ -48272,14 +56122,8 @@ bool generic_validate_ascii(const uint8_t *input, size_t length) { return running_or.is_ascii(); } -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} - -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); size_t count{0}; while (reader.has_full_block()) { simd::simd8x64 in(reader.full_block()); @@ -48304,20 +56148,16 @@ result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { } } -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation +} // namespace ascii_validation } // unnamed namespace } // namespace lsx } // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII -// transcoding from UTF-8 to Latin 1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 /* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ - namespace simdutf { namespace lsx { namespace { @@ -48636,7 +56476,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ - namespace simdutf { namespace lsx { namespace { @@ -48716,9 +56555,11 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -// transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ namespace simdutf { namespace lsx { namespace { @@ -48795,7 +56636,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ - namespace simdutf { namespace lsx { namespace { @@ -49129,9 +56969,11 @@ struct validating_transcoder { } // namespace lsx } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ namespace simdutf { namespace lsx { namespace { @@ -49176,7 +57018,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ - namespace simdutf { namespace lsx { namespace { @@ -49496,11 +57337,10 @@ struct validating_transcoder { } // namespace lsx } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -// other functions +#if SIMDUTF_FEATURE_UTF8 /* begin file src/generic/utf8.h */ - namespace simdutf { namespace lsx { namespace { @@ -49519,6 +57359,59 @@ simdutf_really_inline size_t count_code_points(const char *in, size_t size) { return count + scalar::utf8::count_code_points(in + pos, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif + simdutf_really_inline size_t utf16_length_from_utf8(const char *in, size_t size) { size_t pos = 0; @@ -49540,6 +57433,9 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } // namespace lsx } // namespace simdutf /* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16.h */ namespace simdutf { namespace lsx { @@ -49589,6 +57485,89 @@ simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + template simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, size_t size) { @@ -49615,6 +57594,143 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace lsx } // namespace simdutf /* end file src/generic/utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING // // Implementation-specific overrides @@ -49622,6 +57738,7 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { namespace simdutf { namespace lsx { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -49648,27 +57765,35 @@ implementation::detect_encodings(const char *input, } return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { return lsx::utf8_validation::generic_validate_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { return lsx::utf8_validation::generic_validate_utf8_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return lsx::utf8_validation::generic_validate_ascii(buf, len); + return lsx::ascii_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return lsx::utf8_validation::generic_validate_ascii_with_errors(buf, len); + return lsx::ascii_validation::generic_validate_ascii_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { @@ -49676,15 +57801,23 @@ implementation::validate_utf16le(const char16_t *buf, // empty input is valid. protected the implementation from nullptr. return true; } - const char16_t *tail = lsx_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, - len - (tail - buf)); - } else { + const auto res = + lsx::utf16::validate_utf16_with_errors(buf, len); + + if (res.is_err()) { return false; } -} + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { @@ -49692,12 +57825,19 @@ implementation::validate_utf16be(const char16_t *buf, // empty input is valid. protected the implementation from nullptr. return true; } - const char16_t *tail = lsx_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, len - (tail - buf)); - } else { + const auto res = + lsx::utf16::validate_utf16_with_errors(buf, len); + + if (res.is_err()) { return false; } + + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; } simdutf_warn_unused result implementation::validate_utf16le_with_errors( @@ -49705,10 +57845,12 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( if (simdutf_unlikely(len == 0)) { return result(error_code::SUCCESS, 0); } - result res = lsx_validate_utf16_with_errors(buf, len); + const result res = + lsx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); return result(scalar_res.error, res.count + scalar_res.count); } else { return res; @@ -49720,16 +57862,20 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( if (simdutf_unlikely(len == 0)) { return result(error_code::SUCCESS, 0); } - result res = lsx_validate_utf16_with_errors(buf, len); + const result res = + lsx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); + const result scalar_res = + scalar::utf16::validate_with_errors(buf + res.count, + len - res.count); return result(scalar_res.error, res.count + scalar_res.count); } else { return res; } } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -49743,7 +57889,9 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { return false; } } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -49758,7 +57906,9 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( return res; } } +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -49772,7 +57922,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -49800,7 +57952,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -49813,7 +57967,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { utf8_to_latin1::validating_transcoder converter; @@ -49830,7 +57986,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { return lsx::utf8_to_latin1::convert_valid(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { utf8_to_utf16::validating_transcoder converter; @@ -49867,7 +58025,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return utf8_to_utf16::convert_valid(input, size, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { utf8_to_utf32::validating_transcoder converter; @@ -49884,7 +58044,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( const char *input, size_t size, char32_t *utf32_output) const noexcept { return utf8_to_utf32::convert_valid(input, size, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -49992,7 +58154,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( // optimization opportunity: implement a custom function. return convert_utf16le_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -50098,7 +58262,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf16be_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -50145,7 +58311,9 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( utf8_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -50241,7 +58409,9 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( utf32_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -50302,13 +58472,17 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( } return saved_bytes; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { // optimization opportunity: implement a custom function. return convert_utf32_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -50417,7 +58591,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { return convert_utf16be_to_utf32(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { @@ -50433,33 +58609,29 @@ simdutf_warn_unused size_t implementation::count_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *buf, size_t len) const noexcept { return count_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return length; -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return length; -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t length) const noexcept { const uint8_t *data = reinterpret_cast(input); const uint8_t *data_end = data + length; uint64_t result = 0; - while (data + 16 < data_end) { + while (data_end - data > 16) { uint64_t two_bytes = 0; __m128i input_vec = __lsx_vld(data, 0); two_bytes = @@ -50470,7 +58642,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( return result + scalar::latin1::utf8_length_from_latin1((const char *)data, data_end - data); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf8_length_from_utf16(input, length); @@ -50480,17 +58654,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf8_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return length; -} - -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return length; -} - +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); @@ -50500,12 +58666,16 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::utf16_length_from_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { const __m128i v_80 = __lsx_vrepli_w(0x80); /*0x00000080*/ @@ -50535,7 +58705,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { const __m128i v_ffff = __lsx_vldi(-2304); /*0x0000ffff*/ @@ -50551,17 +58723,16 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32( return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -50606,11 +58777,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -50655,11 +58821,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { @@ -50669,6 +58830,7 @@ size_t implementation::binary_to_base64(const char *input, size_t length, return encode_base64(output, input, length, options); } } +#endif // SIMDUTF_FEATURE_BASE64 } // namespace lsx } // namespace simdutf @@ -50690,6 +58852,7 @@ namespace { #endif using namespace simd; +#if SIMDUTF_FEATURE_UTF8 // convert vmskltz/vmskgez/vmsknz to // simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index const uint8_t lasx_1_2_utf8_bytes_mask[] = { @@ -50711,33 +58874,26 @@ const uint8_t lasx_1_2_utf8_bytes_mask[] = { 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255}; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { return __lsx_vshuf4i_b(vec, 0b10110001); } simdutf_really_inline __m256i lasx_swap_bytes(__m256i vec) { return __lasx_xvshuf4i_b(vec, 0b10110001); } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 simdutf_really_inline bool is_ascii(const simd8x64 &input) { return input.is_ascii(); } +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller - // is using ^ as well. This will work fine because we only have to report - // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2 - // overlapping multibyte characters, and if that happens, there is guaranteed - // to be at least *one* lead byte that is part of only 1 other multibyte - // character. The error will be detected there. - return is_second_byte ^ is_third_byte ^ is_fourth_byte; -} - +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { @@ -50745,7 +58901,9 @@ must_be_2_3_continuation(const simd8 prev2, simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); return is_third_byte ^ is_fourth_byte; } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) // common functions for utf8 conversions simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { // Low half contains 10bbbbbb|10cccccc @@ -50803,210 +58961,27 @@ convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { composed = __lsx_vadd_h(ascii, composed); return composed; } +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/lasx/lasx_validate_utf16.cpp */ -/* - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We're going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ - -/* Returns: - - pointer to the last unprocessed character (a scalar fallback should check - the rest); - - nullptr if an error was detected. -*/ template -const char16_t *lasx_validate_utf16(const char16_t *input, size_t size) { - const char16_t *end = input + size; +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::ELEMENTS * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = simd16(input + simd16::ELEMENTS); - - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } - - const auto in = simd8(__lasx_xvpermi_d( - __lasx_xvssrlni_bu_h(in1.value, in0.value, 8), 0b11011000)); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - if (surrogates_bitmask == 0x0) { - input += simd16::ELEMENTS * 2; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint32_t V = ~surrogates_bitmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint32_t H = vH.to_bitmask(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint32_t L = ~H & surrogates_bitmask; - - const uint32_t a = - L & (H >> 1); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint32_t b = - a << 1; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint32_t c = V | a | b; // Combine all the masks into the final one. - - if (c == 0xffffffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += simd16::ELEMENTS * 2; - } else if (c == 0x7fffffff) { - // The 31 lower code units of the input register contains valid UTF-16. - // The 31 word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += simd16::ELEMENTS * 2 - 1; - } else { - return nullptr; - } - } + return simd16::pack(t0, t1); + } else { + return simd16::pack_shifted_right<8>(in0, in1); } - - return input; -} - -template -const result lasx_validate_utf16_with_errors(const char16_t *input, - size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::ELEMENTS * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = simd16(input + simd16::ELEMENTS); - - if (big_endian) { - in0 = in0.swap_bytes(); - in1 = in1.swap_bytes(); - } - const auto in = simd8(__lasx_xvpermi_d( - __lasx_xvssrlni_bu_h(in1.value, in0.value, 8), 0b11011000)); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - if (surrogates_bitmask == 0x0) { - input += simd16::ELEMENTS * 2; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint32_t V = ~surrogates_bitmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint32_t H = vH.to_bitmask(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint32_t L = ~H & surrogates_bitmask; - - const uint32_t a = - L & (H >> 1); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint32_t b = - a << 1; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint32_t c = V | a | b; // Combine all the masks into the final one. - - if (c == 0xffffffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += simd16::ELEMENTS * 2; - } else if (c == 0x7fffffff) { - // The 31 lower code units of the input register contains valid UTF-16. - // The 31 word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += simd16::ELEMENTS * 2 - 1; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); } /* end file src/lasx/lasx_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/lasx/lasx_validate_utf32le.cpp */ const char32_t *lasx_validate_utf32le(const char32_t *input, size_t size) { @@ -51094,7 +59069,9 @@ const result lasx_validate_utf32le_with_errors(const char32_t *input, return result(error_code::SUCCESS, input - start); } /* end file src/lasx/lasx_validate_utf32le.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lasx/lasx_convert_latin1_to_utf8.cpp */ /* Returns a pair: the first unprocessed byte from buf and utf8_output @@ -51110,7 +59087,7 @@ lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, // We always write 16 bytes, of which more than the first 8 bytes // are valid. A safety margin of 8 is more than sufficient. - while (latin1_input + 16 <= end) { + while (end - latin1_input >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); uint32_t ascii_mask = __lsx_vpickve2gr_wu(__lsx_vmskgez_b(in8), 0); if (ascii_mask == 0xFFFF) { @@ -51162,6 +59139,8 @@ lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); } /* end file src/lasx/lasx_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lasx/lasx_convert_latin1_to_utf16.cpp */ std::pair lasx_convert_latin1_to_utf16le(const char *buf, size_t len, @@ -51174,7 +59153,7 @@ lasx_convert_latin1_to_utf16le(const char *buf, size_t len, buf++; } - while (buf + 32 <= end) { + while (end - buf >= 32) { __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); __m256i inlow = __lasx_vext2xv_hu_bu(in8); @@ -51240,6 +59219,8 @@ lasx_convert_latin1_to_utf16be(const char *buf, size_t len, return std::make_pair(buf, utf16_output); } /* end file src/lasx/lasx_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lasx/lasx_convert_latin1_to_utf32.cpp */ std::pair lasx_convert_latin1_to_utf32(const char *buf, size_t len, @@ -51252,7 +59233,7 @@ lasx_convert_latin1_to_utf32(const char *buf, size_t len, buf++; } - while (buf + 32 <= end) { + while (end - buf >= 32) { __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); __m256i in32_0 = __lasx_vext2xv_wu_bu(in8); @@ -51274,7 +59255,7 @@ lasx_convert_latin1_to_utf32(const char *buf, size_t len, buf += 32; } - if (buf + 16 <= end) { + if (end - buf >= 16) { __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); __m128i zero = __lsx_vldi(0); @@ -51297,7 +59278,9 @@ lasx_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf, utf32_output); } /* end file src/lasx/lasx_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/lasx/lasx_convert_utf8_to_utf16.cpp */ // Convert up to 16 bytes from utf8 to utf16 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask @@ -51439,6 +59422,15 @@ size_t convert_masked_utf8_to_utf16(const char *input, } else if (idx < 209) { // THREE (3) input code-code units if (input_utf8_end_of_code_point_mask == 0x888) { + __m128i expected_mask = + (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, + 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; + __m128i expected = + (__m128i)v16u8{0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, + 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; + __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); + if (__lsx_bz_b(check)) + return 12; // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but // it is easier when we can assume they are all pairs. This version does @@ -51593,6 +59585,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, } } /* end file src/lasx/lasx_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/lasx/lasx_convert_utf8_to_utf32.cpp */ // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask @@ -51763,8 +59757,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); __m128i middle = __lsx_vand_v(perm, __lsx_vldi(-3777 /*0x00003f00*/)); // 00000000 00000000 0000cccc ccdddddd - __m128i cd = - __lsx_vbitsel_v(__lsx_vsrli_w(middle, 2), ascii, __lsx_vrepli_w(0x3f)); + __m128i cd = __lsx_vor_v(__lsx_vsrli_w(middle, 2), ascii); __m128i correction = __lsx_vand_v(perm, __lsx_vldi(-3520 /*0x00400000*/)); __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); @@ -51788,6 +59781,8 @@ size_t convert_masked_utf8_to_utf32(const char *input, } } /* end file src/lasx/lasx_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lasx/lasx_convert_utf8_to_latin1.cpp */ size_t convert_masked_utf8_to_latin1(const char *input, uint64_t utf8_end_of_code_point_mask, @@ -51862,14 +59857,16 @@ size_t convert_masked_utf8_to_latin1(const char *input, return consumed; } /* end file src/lasx/lasx_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lasx/lasx_convert_utf16_to_latin1.cpp */ template std::pair lasx_convert_utf16_to_latin1(const char16_t *buf, size_t len, char *latin1_output) { const char16_t *end = buf + len; - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); if (!match_system(big_endian)) { @@ -51897,7 +59894,7 @@ lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, char *latin1_output) { const char16_t *start = buf; const char16_t *end = buf + len; - while (buf + 16 <= end) { + while (end - buf >= 16) { __m128i in = __lsx_vld(reinterpret_cast(buf), 0); __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); if (!match_system(big_endian)) { @@ -51915,9 +59912,8 @@ lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, } else { // Let us do a scalar fallback. for (int k = 0; k < 16; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); } else { @@ -51931,6 +59927,8 @@ lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, latin1_output); } /* end file src/lasx/lasx_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /* begin file src/lasx/lasx_convert_utf16_to_utf8.cpp */ /* The vectorized algorithm works on single LASX register i.e., it @@ -51998,7 +59996,7 @@ lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); __m256i zero = __lasx_xvldi(0); __m128i zero_128 = __lsx_vldi(0); - while (buf + 16 + safety_margin <= end) { + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lasx_swap_bytes(in); @@ -52195,9 +60193,8 @@ lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -52211,7 +60208,7 @@ lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -52254,7 +60251,7 @@ lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); __m256i zero = __lasx_xvldi(0); __m128i zero_128 = __lsx_vldi(0); - while (buf + 16 + safety_margin <= end) { + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lasx_swap_bytes(in); @@ -52451,9 +60448,8 @@ lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -52467,7 +60463,7 @@ lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -52491,6 +60487,8 @@ lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, reinterpret_cast(utf8_output)); } /* end file src/lasx/lasx_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/lasx/lasx_convert_utf16_to_utf32.cpp */ template std::pair @@ -52502,7 +60500,7 @@ lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, // Performance degradation when memory address is not 32-byte aligned while (((uint64_t)utf32_output & 0x1f) && buf < end) { uint16_t word = - !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[0]) : buf[0]; + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[0]) : buf[0]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); buf++; @@ -52513,9 +60511,8 @@ lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, } // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[1]) - : buf[1]; + uint16_t next_word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[1]) : buf[1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { return std::make_pair(nullptr, @@ -52530,7 +60527,7 @@ lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, __m256i v_f800 = __lasx_xvldi(-2568); /*0xF800*/ __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ - while (buf + 16 <= end) { + while (end - buf >= 16) { __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lasx_swap_bytes(in); @@ -52559,16 +60556,15 @@ lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -52604,16 +60600,15 @@ lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, // Performance degradation when memory address is not 32-byte aligned while (((uint64_t)utf32_output & 0x1f) && buf < end) { uint16_t word = - !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[0]) : buf[0]; + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[0]) : buf[0]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); buf++; } else if (buf + 1 < end) { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[1]) - : buf[1]; + uint16_t next_word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[1]) : buf[1]; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { return std::make_pair(result(error_code::SURROGATE, buf - start), @@ -52630,7 +60625,7 @@ lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, __m256i v_f800 = __lasx_xvldi(-2568); /*0xF800*/ __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ - while (buf + 16 <= end) { + while (end - buf >= 16) { __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); if (!match_system(big_endian)) { in = lasx_swap_bytes(in); @@ -52659,16 +60654,15 @@ lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; + uint16_t word = + !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) + ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); @@ -52688,7 +60682,9 @@ lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, reinterpret_cast(utf32_output)); } /* end file src/lasx/lasx_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /* begin file src/lasx/lasx_convert_utf32_to_latin1.cpp */ std::pair lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, @@ -52698,7 +60694,7 @@ lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); __m256i v_ff = __lasx_xvrepli_w(0xFF); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); @@ -52731,7 +60727,7 @@ lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); __m256i v_ff = __lasx_xvrepli_w(0xFF); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); @@ -52764,6 +60760,8 @@ lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, latin1_output); } /* end file src/lasx/lasx_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /* begin file src/lasx/lasx_convert_utf32_to_utf8.cpp */ std::pair lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { @@ -52809,7 +60807,7 @@ lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (buf + 16 + safety_margin < end) { + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); @@ -53104,7 +61102,7 @@ lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (buf + 16 + safety_margin < end) { + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); @@ -53355,6 +61353,8 @@ lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, reinterpret_cast(utf8_output)); } /* end file src/lasx/lasx_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /* begin file src/lasx/lasx_convert_utf32_to_utf16.cpp */ template std::pair @@ -53398,7 +61398,7 @@ lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, __m256i forbidden_bytemask = __lasx_xvrepli_h(0); __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ __m256i v_dfff = __lasx_xvreplgr2vr_h(uint16_t(0xdfff)); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); @@ -53505,7 +61505,7 @@ lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, __m256i forbidden_bytemask = __lasx_xvrepli_h(0); __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ __m256i v_dfff = __lasx_xvreplgr2vr_h(uint16_t(0xdfff)); - while (buf + 16 <= end) { + while (end - buf >= 16) { __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); @@ -53575,6 +61575,8 @@ lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, reinterpret_cast(utf16_output)); } /* end file src/lasx/lasx_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_BASE64 /* begin file src/lasx/lasx_base64.cpp */ /** * References and further reading: @@ -54121,7 +62123,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - triple = scalar::utf32::swap_bytes(triple); + triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 4); dst += 3; @@ -54133,7 +62135,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) << 8; - triple = scalar::utf32::swap_bytes(triple); + triple = scalar::u32_swap_bytes(triple); std::memcpy(dst, &triple, 3); dst += 3; @@ -54186,6 +62188,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/lasx/lasx_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 } // namespace } // namespace lasx @@ -54302,6 +62305,7 @@ simdutf_really_inline void buf_block_reader::advance() { } // namespace lasx } // namespace simdutf /* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { namespace lasx { @@ -54608,9 +62612,21 @@ result generic_validate_utf8_with_errors(const char *input, size_t length) { reinterpret_cast(input), length); } -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +} // namespace utf8_validation +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace ascii_validation { + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); uint8_t blocks[64]{}; simd::simd8x64 running_or(blocks); while (reader.has_full_block()) { @@ -54625,14 +62641,8 @@ bool generic_validate_ascii(const uint8_t *input, size_t length) { return running_or.is_ascii(); } -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} - -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); size_t count{0}; while (reader.has_full_block()) { simd::simd8x64 in(reader.full_block()); @@ -54657,20 +62667,16 @@ result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { } } -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation +} // namespace ascii_validation } // unnamed namespace } // namespace lasx } // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII -// transcoding from UTF-8 to Latin 1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 /* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ - namespace simdutf { namespace lasx { namespace { @@ -54989,7 +62995,6 @@ struct validating_transcoder { } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ - namespace simdutf { namespace lasx { namespace { @@ -55069,9 +63074,10 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -// transcoding from UTF-8 to UTF-16 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ - namespace simdutf { namespace lasx { namespace { @@ -55148,7 +63154,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ - namespace simdutf { namespace lasx { namespace { @@ -55482,9 +63487,10 @@ struct validating_transcoder { } // namespace lasx } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ - namespace simdutf { namespace lasx { namespace { @@ -55529,7 +63535,6 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ - namespace simdutf { namespace lasx { namespace { @@ -55849,11 +63854,10 @@ struct validating_transcoder { } // namespace lasx } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -// other functions +#if SIMDUTF_FEATURE_UTF8 /* begin file src/generic/utf8.h */ - namespace simdutf { namespace lasx { namespace { @@ -55872,6 +63876,59 @@ simdutf_really_inline size_t count_code_points(const char *in, size_t size) { return count + scalar::utf8::count_code_points(in + pos, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif + simdutf_really_inline size_t utf16_length_from_utf8(const char *in, size_t size) { size_t pos = 0; @@ -55893,6 +63950,9 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } // namespace lasx } // namespace simdutf /* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 /* begin file src/generic/utf16.h */ namespace simdutf { namespace lasx { @@ -55942,6 +64002,89 @@ simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, size - pos); } +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + template simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, size_t size) { @@ -55968,6 +64111,143 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace lasx } // namespace simdutf /* end file src/generic/utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING // // Implementation-specific overrides @@ -55975,6 +64255,7 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { namespace simdutf { namespace lasx { +#if SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { @@ -56001,27 +64282,35 @@ implementation::detect_encodings(const char *input, } return out; } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { return lasx::utf8_validation::generic_validate_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { return lasx::utf8_validation::generic_validate_utf8_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return lasx::utf8_validation::generic_validate_ascii(buf, len); + return lasx::ascii_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return lasx::utf8_validation::generic_validate_ascii_with_errors(buf, len); + return lasx::ascii_validation::generic_validate_ascii_with_errors(buf, len); } +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { @@ -56029,15 +64318,22 @@ implementation::validate_utf16le(const char16_t *buf, // empty input is valid. protected the implementation from nullptr. return true; } - const char16_t *tail = lasx_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, - len - (tail - buf)); - } else { + const auto res = + lasx::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } -} + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { @@ -56045,12 +64341,19 @@ implementation::validate_utf16be(const char16_t *buf, // empty input is valid. protected the implementation from nullptr. return true; } - const char16_t *tail = lasx_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, len - (tail - buf)); - } else { + + const auto res = + lasx::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { return false; } + + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; } simdutf_warn_unused result implementation::validate_utf16le_with_errors( @@ -56058,10 +64361,12 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( if (simdutf_unlikely(len == 0)) { return result(error_code::SUCCESS, 0); } - result res = lasx_validate_utf16_with_errors(buf, len); + const result res = + lasx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); return result(scalar_res.error, res.count + scalar_res.count); } else { return res; @@ -56073,16 +64378,20 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( if (simdutf_unlikely(len == 0)) { return result(error_code::SUCCESS, 0); } - result res = lasx_validate_utf16_with_errors(buf, len); + const result res = + lasx::utf16::validate_utf16_with_errors(buf, len); if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); + const result scalar_res = + scalar::utf16::validate_with_errors(buf + res.count, + len - res.count); return result(scalar_res.error, res.count + scalar_res.count); } else { return res; } } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -56096,7 +64405,9 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { return false; } } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -56111,7 +64422,9 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( return res; } } +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -56125,7 +64438,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -56153,7 +64468,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -56166,7 +64483,9 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( } return converted_chars; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { size_t pos = 0; @@ -56281,7 +64600,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( lasx::utf8_to_latin1::convert_valid(buf + pos, len - pos, latin1_output); return convert_result ? convert_size + convert_result : 0; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { utf8_to_utf16::validating_transcoder converter; @@ -56318,7 +64639,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( return utf8_to_utf16::convert_valid(input, size, utf16_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { utf8_to_utf32::validating_transcoder converter; @@ -56335,7 +64658,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( const char *input, size_t size, char32_t *utf32_output) const noexcept { return utf8_to_utf32::convert_valid(input, size, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -56443,7 +64768,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( // optimization opportunity: implement a custom function. return convert_utf16le_to_latin1(buf, len, latin1_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = @@ -56549,7 +64876,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { return convert_utf16be_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { if (simdutf_unlikely(len == 0)) { @@ -56596,7 +64925,9 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( utf8_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = @@ -56692,7 +65023,9 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( utf32_output; // Set count to the number of 8-bit code units written return ret.first; } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = @@ -56753,13 +65086,17 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( } return saved_bytes; } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { // optimization opportunity: implement a custom function. return convert_utf32_to_utf8(buf, len, utf8_output); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = @@ -56868,7 +65205,9 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { return convert_utf16be_to_utf32(buf, len, utf32_output); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 void implementation::change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept { @@ -56884,7 +65223,9 @@ simdutf_warn_unused size_t implementation::count_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { size_t pos = 0; @@ -56905,28 +65246,22 @@ implementation::count_utf8(const char *input, size_t length) const noexcept { } return count + scalar::utf8::count_code_points(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::latin1_length_from_utf8( const char *buf, size_t len) const noexcept { return count_utf8(buf, len); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return length; -} - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return length; -} - +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 simdutf_warn_unused size_t implementation::utf8_length_from_latin1( const char *input, size_t length) const noexcept { const uint8_t *data = reinterpret_cast(input); const uint8_t *data_end = data + length; uint64_t result = 0; - while (data + 16 < data_end) { + while (data_end - data > 16) { uint64_t two_bytes = 0; __m128i input_vec = __lsx_vld(data, 0); two_bytes = @@ -56937,7 +65272,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( return result + scalar::latin1::utf8_length_from_latin1((const char *)data, data_end - data); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf8_length_from_utf16(input, length); @@ -56947,17 +65284,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf8_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return length; -} - -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return length; -} - +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); @@ -56967,12 +65296,16 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::utf16_length_from_utf8(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { __m256i v_80 = __lasx_xvrepli_w(0x80); /*0x00000080*/ @@ -57008,7 +65341,9 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { __m128i v_ffff = __lsx_vldi(-2304); /*0x0000ffff*/ @@ -57024,17 +65359,16 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32( return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 simdutf_warn_unused size_t implementation::utf32_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::count_code_points(input, length); } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - +#if SIMDUTF_FEATURE_BASE64 simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -57079,11 +65413,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { @@ -57128,11 +65457,6 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} - size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { @@ -57142,6 +65466,7 @@ size_t implementation::binary_to_base64(const char *input, size_t length, return encode_base64(output, input, length, options); } } +#endif // SIMDUTF_FEATURE_BASE64 } // namespace lasx } // namespace simdutf diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index 4bec0cf3002..cd92c8ea049 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2025-01-08 17:51:07 -0500. Do not edit! */ +/* auto-generated on 2025-03-17 16:12:36 -0400. Do not edit! */ /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H @@ -81,7 +81,10 @@ #if __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L && \ !defined(SIMDUTF_SPAN_DISABLED) #define SIMDUTF_SPAN 1 - #endif + #endif // __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L + #if __cpp_lib_atomic_ref >= 201806L + #define SIMDUTF_ATOMIC_REF 1 + #endif // __cpp_lib_atomic_ref #endif /** @@ -160,9 +163,9 @@ #elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #define SIMDUTF_IS_ARM64 1 #elif defined(__PPC64__) || defined(_M_PPC64) -// #define SIMDUTF_IS_PPC64 1 -// The simdutf library does yet support SIMD acceleration under -// POWER processors. Please see https://github.com/lemire/simdutf/issues/51 + #if defined(__VEC__) && defined(__ALTIVEC__) + #define SIMDUTF_IS_PPC64 1 + #endif #elif defined(__s390__) // s390 IBM system. Big endian. #elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 @@ -602,6 +605,14 @@ struct result { simdutf_really_inline result(error_code err, size_t pos) : error{err}, count{pos} {} + + simdutf_really_inline bool is_ok() const { + return error == error_code::SUCCESS; + } + + simdutf_really_inline bool is_err() const { + return error != error_code::SUCCESS; + } }; struct full_result { @@ -641,7 +652,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "6.0.3" +#define SIMDUTF_VERSION "6.4.0" namespace simdutf { enum { @@ -652,11 +663,11 @@ enum { /** * The minor version (major.MINOR.revision) of simdutf being used. */ - SIMDUTF_VERSION_MINOR = 0, + SIMDUTF_VERSION_MINOR = 4, /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 3 + SIMDUTF_VERSION_REVISION = 0 }; } // namespace simdutf @@ -742,6 +753,13 @@ struct simdutf_riscv_hwprobe { #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17) #endif // SIMDUTF_IS_RISCV64 && defined(__linux__) +#if defined(__loongarch__) && defined(__linux__) + #include +// bits/hwcap.h +// #define HWCAP_LOONGARCH_LSX (1 << 4) +// #define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + namespace simdutf { namespace internal { @@ -960,12 +978,6 @@ static inline uint32_t detect_supported_architectures() { return host_isa; } #elif defined(__loongarch__) - #if defined(__linux__) - #include - // bits/hwcap.h - // #define HWCAP_LOONGARCH_LSX (1 << 4) - // #define HWCAP_LOONGARCH_LASX (1 << 5) - #endif static inline uint32_t detect_supported_architectures() { uint32_t host_isa = instruction_set::DEFAULT; @@ -1002,6 +1014,23 @@ static inline uint32_t detect_supported_architectures() { #include #endif +// The following defines are conditionally enabled/disabled during amalgamation. +// By default all features are enabled, regular code shouldn't check them. Only +// when user code really relies of a selected subset, it's good to verify these +// flags, like: +// +// #if !SIMDUTF_FEATURE_UTF16 +// # error("Please amalgamate simdutf with UTF-16 support") +// #endif +// +#define SIMDUTF_FEATURE_DETECT_ENCODING 1 +#define SIMDUTF_FEATURE_ASCII 1 +#define SIMDUTF_FEATURE_LATIN1 1 +#define SIMDUTF_FEATURE_UTF8 1 +#define SIMDUTF_FEATURE_UTF16 1 +#define SIMDUTF_FEATURE_UTF32 1 +#define SIMDUTF_FEATURE_BASE64 1 + namespace simdutf { #if SIMDUTF_SPAN @@ -1051,6 +1080,7 @@ concept output_span_of_byte_like = requires(T &t) { } // namespace detail #endif +#if SIMDUTF_FEATURE_DETECT_ENCODING /** * Autodetect the encoding of the input, a single encoding is recommended. * E.g., the function might return simdutf::encoding_type::UTF8, @@ -1067,7 +1097,7 @@ simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t *input, size_t length) noexcept { return autodetect_encoding(reinterpret_cast(input), length); } -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN /** * Autodetect the encoding of the input, a single encoding is recommended. * E.g., the function might return simdutf::encoding_type::UTF8, @@ -1085,7 +1115,7 @@ autodetect_encoding( return autodetect_encoding(reinterpret_cast(input.data()), input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Autodetect the possible encodings of the input in one pass. @@ -1104,14 +1134,16 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t *input, size_t length) noexcept { return detect_encodings(reinterpret_cast(input), length); } -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused int detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { return detect_encodings(reinterpret_cast(input.data()), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /** * Validate the UTF-8 string. This function may be best when you expect * the input to be almost always valid. Otherwise, consider using @@ -1124,14 +1156,16 @@ detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { * @return true if and only if the string is valid UTF-8. */ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused bool validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { return validate_utf8(reinterpret_cast(input.data()), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 /** * Validate the UTF-8 string and stop on error. * @@ -1146,14 +1180,16 @@ validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { */ simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors( const detail::input_span_of_byte_like auto &input) noexcept { return validate_utf8_with_errors(reinterpret_cast(input.data()), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII /** * Validate the ASCII string. * @@ -1164,13 +1200,13 @@ simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors( * @return true if and only if the string is valid ASCII. */ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused bool validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { return validate_ascii(reinterpret_cast(input.data()), input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Validate the ASCII string and stop on error. It might be faster than @@ -1187,14 +1223,16 @@ validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { */ simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors( const detail::input_span_of_byte_like auto &input) noexcept { return validate_ascii_with_errors( reinterpret_cast(input.data()), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 /** * Using native endianness; Validate the UTF-16 string. * This function may be best when you expect the input to be almost always @@ -1211,13 +1249,15 @@ simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors( */ simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused bool validate_utf16(std::span input) noexcept { return validate_utf16(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /** * Validate the UTF-16LE string. This function may be best when you expect * the input to be almost always valid. Otherwise, consider using @@ -1234,13 +1274,15 @@ validate_utf16(std::span input) noexcept { */ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused bool validate_utf16le(std::span input) noexcept { return validate_utf16le(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 /** * Validate the UTF-16BE string. This function may be best when you expect * the input to be almost always valid. Otherwise, consider using @@ -1257,12 +1299,12 @@ validate_utf16le(std::span input) noexcept { */ simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused bool validate_utf16be(std::span input) noexcept { return validate_utf16be(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Using native endianness; Validate the UTF-16 string and stop on error. @@ -1283,12 +1325,12 @@ validate_utf16be(std::span input) noexcept { */ simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result validate_utf16_with_errors(std::span input) noexcept { return validate_utf16_with_errors(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Validate the UTF-16LE string and stop on error. It might be faster than @@ -1308,12 +1350,12 @@ validate_utf16_with_errors(std::span input) noexcept { */ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result validate_utf16le_with_errors(std::span input) noexcept { return validate_utf16le_with_errors(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Validate the UTF-16BE string and stop on error. It might be faster than @@ -1333,13 +1375,15 @@ validate_utf16le_with_errors(std::span input) noexcept { */ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result validate_utf16be_with_errors(std::span input) noexcept { return validate_utf16be_with_errors(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING /** * Validate the UTF-32 string. This function may be best when you expect * the input to be almost always valid. Otherwise, consider using @@ -1356,13 +1400,15 @@ validate_utf16be_with_errors(std::span input) noexcept { */ simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused bool validate_utf32(std::span input) noexcept { return validate_utf32(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 /** * Validate the UTF-32 string and stop on error. It might be faster than * validate_utf32 when an error is expected to occur early. @@ -1381,13 +1427,15 @@ validate_utf32(std::span input) noexcept { */ simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result validate_utf32_with_errors(std::span input) noexcept { return validate_utf32_with_errors(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Convert Latin1 string into UTF8 string. * @@ -1401,7 +1449,7 @@ validate_utf32_with_errors(std::span input) noexcept { simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8( const detail::input_span_of_byte_like auto &latin1_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { @@ -1409,7 +1457,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8( reinterpret_cast(latin1_input.data()), latin1_input.size(), utf8_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert Latin1 string into UTF8 string with output limit. @@ -1425,7 +1473,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8( simdutf_warn_unused size_t convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output, size_t utf8_len) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { @@ -1439,8 +1487,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe( input.data(), input.size(), reinterpret_cast(utf8_output.data()), utf8_output.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly Latin1 string into UTF-16LE string. * @@ -1453,7 +1503,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe( */ simdutf_warn_unused size_t convert_latin1_to_utf16le( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le( const detail::input_span_of_byte_like auto &latin1_input, std::span utf16_output) noexcept { @@ -1461,7 +1511,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le( reinterpret_cast(latin1_input.data()), latin1_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert Latin1 string into UTF-16BE string. @@ -1475,15 +1525,36 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le( */ simdutf_warn_unused size_t convert_latin1_to_utf16be( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input, std::span output) noexcept { return convert_latin1_to_utf16be(reinterpret_cast(input.data()), input.size(), output.data()); } -#endif + #endif // SIMDUTF_SPAN +/** + * Compute the number of bytes that this UTF-16 string would require in Latin1 + * format. + * + * @param length the length of the string in Latin1 code units (char) + * @return the length of the string in Latin1 code units (char) required to + * encode the UTF-16 string as Latin1 + */ +simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; +/** + * Compute the number of code units that this Latin1 string would require in + * UTF-16 format. + * + * @param length the length of the string in Latin1 code units (char) + * @return the length of the string in 2-byte code units (char16_t) required to + * encode the Latin1 string as UTF-16 + */ +simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /** * Convert Latin1 string into UTF-32 string. * @@ -1496,7 +1567,7 @@ convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input, */ simdutf_warn_unused size_t convert_latin1_to_utf32( const char *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32( const detail::input_span_of_byte_like auto &latin1_input, std::span utf32_output) noexcept { @@ -1504,8 +1575,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32( reinterpret_cast(latin1_input.data()), latin1_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly broken UTF-8 string into latin1 string. * @@ -1521,7 +1594,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32( simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&output) noexcept { @@ -1529,8 +1602,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1( input.size(), reinterpret_cast(output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Using native endianness, convert possibly broken UTF-8 string into a UTF-16 * string. @@ -1546,15 +1621,17 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1( */ simdutf_warn_unused size_t convert_utf8_to_utf16( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input, std::span output) noexcept { return convert_utf8_to_utf16(reinterpret_cast(input.data()), input.size(), output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Using native endianness, convert a Latin1 string into a UTF-16 string. * @@ -1565,15 +1642,17 @@ convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input, */ simdutf_warn_unused size_t convert_latin1_to_utf16( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input, std::span output) noexcept { return convert_latin1_to_utf16(reinterpret_cast(input.data()), input.size(), output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Convert possibly broken UTF-8 string into UTF-16LE string. * @@ -1588,7 +1667,7 @@ convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input, */ simdutf_warn_unused size_t convert_utf8_to_utf16le( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, std::span utf16_output) noexcept { @@ -1596,7 +1675,7 @@ convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, reinterpret_cast(utf8_input.data()), utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-8 string into UTF-16BE string. @@ -1612,7 +1691,7 @@ convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, */ simdutf_warn_unused size_t convert_utf8_to_utf16be( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, std::span utf16_output) noexcept { @@ -1620,8 +1699,10 @@ convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, reinterpret_cast(utf8_input.data()), utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly broken UTF-8 string into latin1 string with errors. * If the string cannot be represented as Latin1, an error @@ -1640,7 +1721,7 @@ convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, */ simdutf_warn_unused result convert_utf8_to_latin1_with_errors( const char *input, size_t length, char *latin1_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf8_to_latin1_with_errors( const detail::input_span_of_byte_like auto &utf8_input, @@ -1649,8 +1730,10 @@ convert_utf8_to_latin1_with_errors( reinterpret_cast(utf8_input.data()), utf8_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Using native endianness, convert possibly broken UTF-8 string into UTF-16 * string and stop on error. @@ -1668,7 +1751,7 @@ convert_utf8_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf8_to_utf16_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf8_to_utf16_with_errors( const detail::input_span_of_byte_like auto &utf8_input, @@ -1677,7 +1760,7 @@ convert_utf8_to_utf16_with_errors( reinterpret_cast(utf8_input.data()), utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. @@ -1695,7 +1778,7 @@ convert_utf8_to_utf16_with_errors( */ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( const detail::input_span_of_byte_like auto &utf8_input, @@ -1704,7 +1787,7 @@ convert_utf8_to_utf16le_with_errors( reinterpret_cast(utf8_input.data()), utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. @@ -1722,7 +1805,7 @@ convert_utf8_to_utf16le_with_errors( */ simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( const detail::input_span_of_byte_like auto &utf8_input, @@ -1731,8 +1814,10 @@ convert_utf8_to_utf16be_with_errors( reinterpret_cast(utf8_input.data()), utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Convert possibly broken UTF-8 string into UTF-32 string. * @@ -1747,7 +1832,7 @@ convert_utf8_to_utf16be_with_errors( */ simdutf_warn_unused size_t convert_utf8_to_utf32( const char *input, size_t length, char32_t *utf32_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, std::span utf32_output) noexcept { @@ -1755,7 +1840,7 @@ convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, reinterpret_cast(utf8_input.data()), utf8_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. @@ -1773,7 +1858,7 @@ convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, */ simdutf_warn_unused result convert_utf8_to_utf32_with_errors( const char *input, size_t length, char32_t *utf32_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf8_to_utf32_with_errors( const detail::input_span_of_byte_like auto &utf8_input, @@ -1782,8 +1867,10 @@ convert_utf8_to_utf32_with_errors( reinterpret_cast(utf8_input.data()), utf8_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Convert valid UTF-8 string into latin1 string. * @@ -1805,7 +1892,7 @@ convert_utf8_to_utf32_with_errors( */ simdutf_warn_unused size_t convert_valid_utf8_to_latin1( const char *input, size_t length, char *latin1_output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1( const detail::input_span_of_byte_like auto &valid_utf8_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { @@ -1813,8 +1900,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size(), latin1_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Using native endianness, convert valid UTF-8 string into a UTF-16 string. * @@ -1827,7 +1916,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf16( const char *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf16_output) noexcept { @@ -1835,7 +1924,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-8 string into UTF-16LE string. @@ -1849,7 +1938,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( const char *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf16_output) noexcept { @@ -1857,7 +1946,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-8 string into UTF-16BE string. @@ -1871,7 +1960,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( const char *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf16_output) noexcept { @@ -1879,8 +1968,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Convert valid UTF-8 string into UTF-32 string. * @@ -1893,7 +1984,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf32( const char *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32( const detail::input_span_of_byte_like auto &valid_utf8_input, std::span utf32_output) noexcept { @@ -1901,8 +1992,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Return the number of bytes that this Latin1 string would require in UTF-8 * format. @@ -1913,13 +2006,13 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32( */ simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1( const detail::input_span_of_byte_like auto &latin1_input) noexcept { return utf8_length_from_latin1( reinterpret_cast(latin1_input.data()), latin1_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Compute the number of bytes that this UTF-8 string would require in Latin1 @@ -1936,15 +2029,17 @@ simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1( */ simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { return latin1_length_from_utf8( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Compute the number of 2-byte code units that this UTF-8 string would require * in UTF-16LE format. @@ -1961,15 +2056,17 @@ simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8( */ simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { return utf16_length_from_utf8( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Compute the number of 4-byte code units that this UTF-8 string would require * in UTF-32 format. @@ -1988,15 +2085,17 @@ simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8( */ simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { return utf32_length_from_utf8( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Using native endianness, convert possibly broken UTF-16 string into UTF-8 * string. @@ -2015,15 +2114,17 @@ simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8( simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Using native endianness, convert possibly broken UTF-16 string into Latin1 * string. @@ -2041,7 +2142,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8( */ simdutf_warn_unused size_t convert_utf16_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { @@ -2049,7 +2150,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1( utf16_input.data(), utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16LE string into Latin1 string. @@ -2069,7 +2170,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1( */ simdutf_warn_unused size_t convert_utf16le_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { @@ -2077,7 +2178,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1( utf16_input.data(), utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16BE string into Latin1 string. @@ -2095,7 +2196,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1( */ simdutf_warn_unused size_t convert_utf16be_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1( std::span utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { @@ -2103,8 +2204,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1( utf16_input.data(), utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Convert possibly broken UTF-16LE string into UTF-8 string. * @@ -2122,14 +2225,14 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1( simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16BE string into UTF-8 string. @@ -2148,15 +2251,17 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8( simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8( std::span utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Using native endianness, convert possibly broken UTF-16 string into Latin1 * string. @@ -2175,7 +2280,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8( */ simdutf_warn_unused result convert_utf16_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16_to_latin1_with_errors( std::span utf16_input, @@ -2184,7 +2289,7 @@ convert_utf16_to_latin1_with_errors( utf16_input.data(), utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16LE string into Latin1 string. @@ -2203,7 +2308,7 @@ convert_utf16_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( std::span utf16_input, @@ -2212,7 +2317,7 @@ convert_utf16le_to_latin1_with_errors( utf16_input.data(), utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16BE string into Latin1 string. @@ -2233,7 +2338,7 @@ convert_utf16le_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( std::span utf16_input, @@ -2242,8 +2347,10 @@ convert_utf16be_to_latin1_with_errors( utf16_input.data(), utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Using native endianness, convert possibly broken UTF-16 string into UTF-8 * string and stop on error. @@ -2263,7 +2370,7 @@ convert_utf16be_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf16_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16_to_utf8_with_errors( std::span utf16_input, @@ -2272,7 +2379,7 @@ convert_utf16_to_utf8_with_errors( utf16_input.data(), utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. @@ -2292,7 +2399,7 @@ convert_utf16_to_utf8_with_errors( */ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( std::span utf16_input, @@ -2301,7 +2408,7 @@ convert_utf16le_to_utf8_with_errors( utf16_input.data(), utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. @@ -2321,7 +2428,7 @@ convert_utf16le_to_utf8_with_errors( */ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( std::span utf16_input, @@ -2330,7 +2437,7 @@ convert_utf16be_to_utf8_with_errors( utf16_input.data(), utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Using native endianness, convert valid UTF-16 string into UTF-8 string. @@ -2341,13 +2448,13 @@ convert_utf16be_to_utf8_with_errors( * * @param input the UTF-16 string to convert * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion + * @param utf8_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf16_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { @@ -2355,8 +2462,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8( valid_utf16_input.data(), valid_utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Using native endianness, convert UTF-16 string into Latin1 string. * @@ -2378,7 +2487,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8( */ simdutf_warn_unused size_t convert_valid_utf16_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { @@ -2386,7 +2495,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1( valid_utf16_input.data(), valid_utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-16LE string into Latin1 string. @@ -2409,7 +2518,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1( */ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( std::span valid_utf16_input, @@ -2418,7 +2527,7 @@ convert_valid_utf16le_to_latin1( valid_utf16_input.data(), valid_utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-16BE string into Latin1 string. @@ -2441,7 +2550,7 @@ convert_valid_utf16le_to_latin1( */ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( std::span valid_utf16_input, @@ -2450,8 +2559,10 @@ convert_valid_utf16be_to_latin1( valid_utf16_input.data(), valid_utf16_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Convert valid UTF-16LE string into UTF-8 string. * @@ -2462,13 +2573,13 @@ convert_valid_utf16be_to_latin1( * * @param input the UTF-16LE string to convert * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion + * @param utf8_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { @@ -2476,7 +2587,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( valid_utf16_input.data(), valid_utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-16BE string into UTF-8 string. @@ -2487,13 +2598,13 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( * * @param input the UTF-16BE string to convert * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion + * @param utf8_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( std::span valid_utf16_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { @@ -2501,8 +2612,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( valid_utf16_input.data(), valid_utf16_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Using native endianness, convert possibly broken UTF-16 string into UTF-32 * string. @@ -2520,14 +2633,14 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( */ simdutf_warn_unused size_t convert_utf16_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf32(std::span utf16_input, std::span utf32_output) noexcept { return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16LE string into UTF-32 string. @@ -2545,14 +2658,14 @@ convert_utf16_to_utf32(std::span utf16_input, */ simdutf_warn_unused size_t convert_utf16le_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf32(std::span utf16_input, std::span utf32_output) noexcept { return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16BE string into UTF-32 string. @@ -2570,14 +2683,14 @@ convert_utf16le_to_utf32(std::span utf16_input, */ simdutf_warn_unused size_t convert_utf16be_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf32(std::span utf16_input, std::span utf32_output) noexcept { return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Using native endianness, convert possibly broken UTF-16 string into @@ -2598,14 +2711,14 @@ convert_utf16be_to_utf32(std::span utf16_input, */ simdutf_warn_unused result convert_utf16_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16_to_utf32_with_errors(std::span utf16_input, std::span utf32_output) noexcept { return convert_utf16_to_utf32_with_errors( utf16_input.data(), utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. @@ -2625,7 +2738,7 @@ convert_utf16_to_utf32_with_errors(std::span utf16_input, */ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( std::span utf16_input, @@ -2633,7 +2746,7 @@ convert_utf16le_to_utf32_with_errors( return convert_utf16le_to_utf32_with_errors( utf16_input.data(), utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. @@ -2653,7 +2766,7 @@ convert_utf16le_to_utf32_with_errors( */ simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( std::span utf16_input, @@ -2661,7 +2774,7 @@ convert_utf16be_to_utf32_with_errors( return convert_utf16be_to_utf32_with_errors( utf16_input.data(), utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Using native endianness, convert valid UTF-16 string into UTF-32 string. @@ -2673,20 +2786,20 @@ convert_utf16be_to_utf32_with_errors( * * @param input the UTF-16 string to convert * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold the conversion + * @param utf32_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf16_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf32(std::span valid_utf16_input, std::span utf32_output) noexcept { return convert_valid_utf16_to_utf32( valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-16LE string into UTF-32 string. @@ -2697,20 +2810,20 @@ convert_valid_utf16_to_utf32(std::span valid_utf16_input, * * @param input the UTF-16LE string to convert * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold the conversion + * @param utf32_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(std::span valid_utf16_input, std::span utf32_output) noexcept { return convert_valid_utf16le_to_utf32( valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-16BE string into UTF-32 string. @@ -2721,21 +2834,23 @@ convert_valid_utf16le_to_utf32(std::span valid_utf16_input, * * @param input the UTF-16BE string to convert * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold the conversion + * @param utf32_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(std::span valid_utf16_input, std::span utf32_output) noexcept { return convert_valid_utf16be_to_utf32( valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Compute the number of bytes that this UTF-16LE/BE string would require in * Latin1 format. @@ -2763,14 +2878,16 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; */ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_utf16(std::span valid_utf16_input) noexcept { return utf8_length_from_utf16(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Compute the number of bytes that this UTF-16LE string would require in UTF-8 * format. @@ -2784,13 +2901,13 @@ utf8_length_from_utf16(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_utf16le(std::span valid_utf16_input) noexcept { return utf8_length_from_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Compute the number of bytes that this UTF-16BE string would require in UTF-8 @@ -2805,14 +2922,16 @@ utf8_length_from_utf16le(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_utf16be(std::span valid_utf16_input) noexcept { return utf8_length_from_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Convert possibly broken UTF-32 string into UTF-8 string. * @@ -2829,14 +2948,14 @@ utf8_length_from_utf16be(std::span valid_utf16_input) noexcept { simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8( std::span utf32_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. @@ -2856,7 +2975,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8( */ simdutf_warn_unused result convert_utf32_to_utf8_with_errors( const char32_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf32_to_utf8_with_errors( std::span utf32_input, @@ -2865,7 +2984,7 @@ convert_utf32_to_utf8_with_errors( utf32_input.data(), utf32_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-32 string into UTF-8 string. @@ -2876,13 +2995,13 @@ convert_utf32_to_utf8_with_errors( * * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion + * @param utf8_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf32_to_utf8( const char32_t *input, size_t length, char *utf8_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8( std::span valid_utf32_input, detail::output_span_of_byte_like auto &&utf8_output) noexcept { @@ -2890,8 +3009,10 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8( valid_utf32_input.data(), valid_utf32_input.size(), reinterpret_cast(utf8_output.data())); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Using native endianness, convert possibly broken UTF-32 string into a UTF-16 * string. @@ -2908,14 +3029,14 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8( */ simdutf_warn_unused size_t convert_utf32_to_utf16( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf16(std::span utf32_input, std::span utf16_output) noexcept { return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-32 string into UTF-16LE string. @@ -2932,15 +3053,17 @@ convert_utf32_to_utf16(std::span utf32_input, */ simdutf_warn_unused size_t convert_utf32_to_utf16le( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf16le(std::span utf32_input, std::span utf16_output) noexcept { return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly broken UTF-32 string into Latin1 string. * @@ -2957,7 +3080,7 @@ convert_utf32_to_utf16le(std::span utf32_input, */ simdutf_warn_unused size_t convert_utf32_to_latin1( const char32_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1( std::span utf32_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { @@ -2965,7 +3088,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1( utf32_input.data(), utf32_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-32 string into Latin1 string and stop on error. @@ -2986,7 +3109,7 @@ simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1( */ simdutf_warn_unused result convert_utf32_to_latin1_with_errors( const char32_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf32_to_latin1_with_errors( std::span utf32_input, @@ -2995,7 +3118,7 @@ convert_utf32_to_latin1_with_errors( utf32_input.data(), utf32_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-32 string into Latin1 string. @@ -3013,13 +3136,13 @@ convert_utf32_to_latin1_with_errors( * * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) - * @param latin1_buffer the pointer to buffer that can hold the conversion + * @param latin1_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf32_to_latin1( const char32_t *input, size_t length, char *latin1_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1( std::span valid_utf32_input, detail::output_span_of_byte_like auto &&latin1_output) noexcept { @@ -3027,8 +3150,34 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1( valid_utf32_input.data(), valid_utf32_input.size(), reinterpret_cast(latin1_output.data())); } -#endif + #endif // SIMDUTF_SPAN +/** + * Compute the number of bytes that this UTF-32 string would require in Latin1 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-32 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param length the length of the string in 4-byte code units (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ +simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept; + +/** + * Compute the number of bytes that this Latin1 string would require in UTF-32 + * format. + * + * @param length the length of the string in Latin1 code units (char) + * @return the length of the string in 4-byte code units (char32_t) required to + * encode the Latin1 string as UTF-32 + */ +simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Convert possibly broken UTF-32 string into UTF-16BE string. * @@ -3044,14 +3193,14 @@ simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1( */ simdutf_warn_unused size_t convert_utf32_to_utf16be( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf16be(std::span utf32_input, std::span utf16_output) noexcept { return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Using native endianness, convert possibly broken UTF-32 string into UTF-16 @@ -3072,14 +3221,14 @@ convert_utf32_to_utf16be(std::span utf32_input, */ simdutf_warn_unused result convert_utf32_to_utf16_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf32_to_utf16_with_errors(std::span utf32_input, std::span utf16_output) noexcept { return convert_utf32_to_utf16_with_errors( utf32_input.data(), utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. @@ -3099,7 +3248,7 @@ convert_utf32_to_utf16_with_errors(std::span utf32_input, */ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( std::span utf32_input, @@ -3107,7 +3256,7 @@ convert_utf32_to_utf16le_with_errors( return convert_utf32_to_utf16le_with_errors( utf32_input.data(), utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. @@ -3127,7 +3276,7 @@ convert_utf32_to_utf16le_with_errors( */ simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( std::span utf32_input, @@ -3135,7 +3284,7 @@ convert_utf32_to_utf16be_with_errors( return convert_utf32_to_utf16be_with_errors( utf32_input.data(), utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Using native endianness, convert valid UTF-32 string into a UTF-16 string. @@ -3146,20 +3295,20 @@ convert_utf32_to_utf16be_with_errors( * * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold the conversion + * @param utf16_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf32_to_utf16( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf16(std::span valid_utf32_input, std::span utf16_output) noexcept { return convert_valid_utf32_to_utf16( valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-32 string into UTF-16LE string. @@ -3170,20 +3319,20 @@ convert_valid_utf32_to_utf16(std::span valid_utf32_input, * * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold the conversion + * @param utf16_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(std::span valid_utf32_input, std::span utf16_output) noexcept { return convert_valid_utf32_to_utf16le( valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert valid UTF-32 string into UTF-16BE string. @@ -3194,21 +3343,23 @@ convert_valid_utf32_to_utf16le(std::span valid_utf32_input, * * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold the conversion + * @param utf16_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(std::span valid_utf32_input, std::span utf16_output) noexcept { return convert_valid_utf32_to_utf16be( valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 /** * Change the endianness of the input. Can be used to go from UTF-16LE to * UTF-16BE or from UTF-16BE to UTF-16LE. @@ -3219,20 +3370,22 @@ convert_valid_utf32_to_utf16be(std::span valid_utf32_input, * * @param input the UTF-16 string to process * @param length the length of the string in 2-byte code units (char16_t) - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result */ void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline void change_endianness_utf16(std::span utf16_input, std::span utf16_output) noexcept { return change_endianness_utf16(utf16_input.data(), utf16_input.size(), utf16_output.data()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Compute the number of bytes that this UTF-32 string would require in UTF-8 * format. @@ -3246,14 +3399,16 @@ change_endianness_utf16(std::span utf16_input, */ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_utf32(std::span valid_utf32_input) noexcept { return utf8_length_from_utf32(valid_utf32_input.data(), valid_utf32_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Compute the number of two-byte code units that this UTF-32 string would * require in UTF-16 format. @@ -3267,13 +3422,13 @@ utf8_length_from_utf32(std::span valid_utf32_input) noexcept { */ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf32(std::span valid_utf32_input) noexcept { return utf16_length_from_utf32(valid_utf32_input.data(), valid_utf32_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Using native endianness; Compute the number of bytes that this UTF-16 @@ -3292,13 +3447,13 @@ utf16_length_from_utf32(std::span valid_utf32_input) noexcept { */ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16(std::span valid_utf16_input) noexcept { return utf32_length_from_utf16(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Compute the number of bytes that this UTF-16LE string would require in UTF-32 @@ -3317,13 +3472,13 @@ utf32_length_from_utf16(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le( std::span valid_utf16_input) noexcept { return utf32_length_from_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Compute the number of bytes that this UTF-16BE string would require in UTF-32 @@ -3342,14 +3497,16 @@ simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le( */ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be( std::span valid_utf16_input) noexcept { return utf32_length_from_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 /** * Count the number of code points (characters) in the string assuming that * it is valid. @@ -3366,12 +3523,12 @@ simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be( */ simdutf_warn_unused size_t count_utf16(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t count_utf16(std::span valid_utf16_input) noexcept { return count_utf16(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Count the number of code points (characters) in the string assuming that @@ -3389,12 +3546,12 @@ count_utf16(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t count_utf16le(std::span valid_utf16_input) noexcept { return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Count the number of code points (characters) in the string assuming that @@ -3412,13 +3569,15 @@ count_utf16le(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t count_utf16be(std::span valid_utf16_input) noexcept { return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 /** * Count the number of code points (characters) in the string assuming that * it is valid. @@ -3433,13 +3592,13 @@ count_utf16be(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t count_utf8(const char *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t count_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { return count_utf8(reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Given a valid UTF-8 string having a possibly truncated last character, @@ -3456,15 +3615,17 @@ simdutf_really_inline simdutf_warn_unused size_t count_utf8( * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes */ simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length); -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8( const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { return trim_partial_utf8( reinterpret_cast(valid_utf8_input.data()), valid_utf8_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 /** * Given a valid UTF-16BE string having a possibly truncated last character, * this function checks the end of string. If the last character is truncated @@ -3481,13 +3642,13 @@ simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8( */ simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, size_t length); -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf16be(std::span valid_utf16_input) noexcept { return trim_partial_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Given a valid UTF-16LE string having a possibly truncated last character, @@ -3505,13 +3666,13 @@ trim_partial_utf16be(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, size_t length); -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf16le(std::span valid_utf16_input) noexcept { return trim_partial_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Given a valid UTF-16 string having a possibly truncated last character, @@ -3529,13 +3690,18 @@ trim_partial_utf16le(std::span valid_utf16_input) noexcept { */ simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, size_t length); -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf16(std::span valid_utf16_input) noexcept { return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size()); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_BASE64 + #ifndef SIMDUTF_NEED_TRAILING_ZEROES + #define SIMDUTF_NEED_TRAILING_ZEROES 1 + #endif // base64_options are used to specify the base64 encoding options. // ASCII spaces are ' ', '\t', '\n', '\r', '\f' // garbage characters are characters that are not part of the base64 alphabet @@ -3577,14 +3743,14 @@ enum last_chunk_handling_options : uint64_t { */ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t maximal_binary_length_from_base64( const detail::input_span_of_byte_like auto &input) noexcept { return maximal_binary_length_from_base64( reinterpret_cast(input.data()), input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Provide the maximal binary length in bytes given the base64 input. @@ -3598,12 +3764,12 @@ maximal_binary_length_from_base64( */ simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t maximal_binary_length_from_base64(std::span input) noexcept { return maximal_binary_length_from_base64(input.data(), input.size()); } -#endif + #endif // SIMDUTF_SPAN /** * Convert a base64 input to a binary output. @@ -3646,7 +3812,7 @@ maximal_binary_length_from_base64(std::span input) noexcept { * * @param input the base64 string to process * @param length the length of the string in bytes - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least maximal_binary_length_from_base64(input, length) * bytes long). * @param options the base64 options to use, usually base64_default or @@ -3663,7 +3829,7 @@ simdutf_warn_unused result base64_to_binary( const char *input, size_t length, char *output, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = loose) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result base64_to_binary( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&binary_output, @@ -3674,7 +3840,7 @@ simdutf_really_inline simdutf_warn_unused result base64_to_binary( reinterpret_cast(binary_output.data()), options, last_chunk_options); } -#endif + #endif // SIMDUTF_SPAN /** * Provide the base64 length in bytes given the length of a binary input. @@ -3699,7 +3865,7 @@ simdutf_warn_unused size_t base64_length_from_binary( * * @param input the binary to process * @param length the length of the input in bytes - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least base64_length_from_binary(length) bytes long) * @param options the base64 options to use, can be base64_default or * base64_url, is base64_default by default. @@ -3708,7 +3874,7 @@ simdutf_warn_unused size_t base64_length_from_binary( */ size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options = base64_default) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused size_t binary_to_base64(const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&binary_output, @@ -3717,7 +3883,58 @@ binary_to_base64(const detail::input_span_of_byte_like auto &input, reinterpret_cast(input.data()), input.size(), reinterpret_cast(binary_output.data()), options); } -#endif + #endif // SIMDUTF_SPAN + + #if SIMDUTF_ATOMIC_REF +/** + * Convert a binary input to a base64 output, using atomic accesses. + * This function comes with a potentially significant performance + * penalty, but it may be useful in some cases where the input and + * output buffers are shared between threads, to avoid undefined + * behavior in case of data races. + * + * The function is for advanced users. Its main use case is when + * to silence sanitizer warnings. We have no documented use case + * where this function is actually necessary in terms of practical correctness. + * + * This function is only available when simdutf is compiled with + * C++20 support and __cpp_lib_atomic_ref >= 201806L. You may check + * the availability of this function by checking the macro + * SIMDUTF_ATOMIC_REF. + * + * The default option (simdutf::base64_default) uses the characters `+` and `/` + * as part of its alphabet. Further, it adds padding (`=`) at the end of the + * output to ensure that the output length is a multiple of four. + * + * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part + * of its alphabet. No padding is added at the end of the output. + * + * This function always succeeds. + * + * @brief atomic_binary_to_base64 + * @param input the binary to process + * @param length the length of the input in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least base64_length_from_binary(length) bytes long) + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return number of written bytes, will be equal to + * base64_length_from_binary(length, options) + */ +size_t +atomic_binary_to_base64(const char *input, size_t length, char *output, + base64_options options = base64_default) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default) noexcept { + return atomic_binary_to_base64( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), options); +} + #endif // SIMDUTF_SPAN + #endif // SIMDUTF_ATOMIC_REF /** * Convert a base64 input to a binary output. @@ -3761,7 +3978,7 @@ binary_to_base64(const detail::input_span_of_byte_like auto &input, * @param input the base64 string to process, in ASCII stored as 16-bit * units * @param length the length of the string in 16-bit units - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least maximal_binary_length_from_base64(input, length) * bytes long). * @param options the base64 options to use, can be base64_default or @@ -3780,7 +3997,7 @@ base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result base64_to_binary( std::span input, detail::output_span_of_byte_like auto &&binary_output, @@ -3790,7 +4007,7 @@ simdutf_really_inline simdutf_warn_unused result base64_to_binary( reinterpret_cast(binary_output.data()), options, last_chunk_options); } -#endif + #endif // SIMDUTF_SPAN /** * Convert a base64 input to a binary output. @@ -3838,7 +4055,7 @@ simdutf_really_inline simdutf_warn_unused result base64_to_binary( * @param input the base64 string to process, in ASCII stored as 8-bit * or 16-bit units * @param length the length of the string in 8-bit or 16-bit units. - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result. * @param outlen the number of bytes that can be written in the output * buffer. Upon return, it is modified to reflect how many bytes were written. @@ -3858,7 +4075,7 @@ base64_to_binary_safe(const char *input, size_t length, char *output, size_t &outlen, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( const detail::input_span_of_byte_like auto &input, detail::output_span_of_byte_like auto &&binary_output, @@ -3873,14 +4090,14 @@ simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( reinterpret_cast(binary_output.data()), outlen, options, last_chunk_options); } -#endif + #endif // SIMDUTF_SPAN simdutf_warn_unused result base64_to_binary_safe(const char16_t *input, size_t length, char *output, size_t &outlen, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) noexcept; -#if SIMDUTF_SPAN + #if SIMDUTF_SPAN simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( std::span input, detail::output_span_of_byte_like auto &&binary_output, @@ -3894,7 +4111,8 @@ simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( reinterpret_cast(binary_output.data()), outlen, options, last_chunk_options); } -#endif + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_BASE64 /** * An implementation of simdutf for a particular CPU architecture. @@ -3938,6 +4156,7 @@ public: */ bool supported_by_runtime_system() const; +#if SIMDUTF_FEATURE_DETECT_ENCODING /** * This function will try to detect the encoding * @param input the string to identify @@ -3955,6 +4174,7 @@ public: */ virtual int detect_encodings(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING /** * @private For internal implementation use @@ -3967,6 +4187,7 @@ public: return _required_instruction_sets; } +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING /** * Validate the UTF-8 string. * @@ -3978,7 +4199,9 @@ public: */ simdutf_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 /** * Validate the UTF-8 string and stop on errors. * @@ -3993,7 +4216,9 @@ public: */ simdutf_warn_unused virtual result validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII /** * Validate the ASCII string. * @@ -4020,7 +4245,9 @@ public: */ simdutf_warn_unused virtual result validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING /** * Validate the UTF-16LE string.This function may be best when you expect * the input to be almost always valid. Otherwise, consider using @@ -4037,7 +4264,9 @@ public: */ simdutf_warn_unused virtual bool validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 /** * Validate the UTF-16BE string. This function may be best when you expect * the input to be almost always valid. Otherwise, consider using @@ -4094,7 +4323,9 @@ public: simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING /** * Validate the UTF-32 string. * @@ -4109,7 +4340,9 @@ public: */ simdutf_warn_unused virtual bool validate_utf32(const char32_t *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 /** * Validate the UTF-32 string and stop on error. * @@ -4128,7 +4361,9 @@ public: simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Convert Latin1 string into UTF8 string. * @@ -4142,7 +4377,9 @@ public: simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly Latin1 string into UTF-16LE string. * @@ -4170,7 +4407,9 @@ public: simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /** * Convert Latin1 string into UTF-32 string. * @@ -4184,7 +4423,9 @@ public: simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly broken UTF-8 string into latin1 string. * @@ -4243,7 +4484,9 @@ public: simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Convert possibly broken UTF-8 string into UTF-16LE string. * @@ -4313,7 +4556,9 @@ public: simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors( const char *input, size_t length, char16_t *utf16_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Convert possibly broken UTF-8 string into UTF-32 string. * @@ -4347,7 +4592,9 @@ public: simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Convert valid UTF-8 string into UTF-16LE string. * @@ -4375,7 +4622,9 @@ public: simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Convert valid UTF-8 string into UTF-32 string. * @@ -4389,7 +4638,9 @@ public: simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Compute the number of 2-byte code units that this UTF-8 string would * require in UTF-16LE format. @@ -4404,7 +4655,9 @@ public: */ simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Compute the number of 4-byte code units that this UTF-8 string would * require in UTF-32 format. @@ -4421,7 +4674,9 @@ public: */ simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly broken UTF-16LE string into Latin1 string. * @@ -4555,7 +4810,9 @@ public: simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Convert possibly broken UTF-16LE string into UTF-8 string. * @@ -4648,7 +4905,7 @@ public: * @param input the UTF-16LE string to convert * @param length the length of the string in 2-byte code units * (char16_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion + * @param utf8_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ @@ -4666,14 +4923,16 @@ public: * @param input the UTF-16BE string to convert * @param length the length of the string in 2-byte code units * (char16_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion + * @param utf8_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Convert possibly broken UTF-16LE string into UTF-32 string. * @@ -4766,7 +5025,7 @@ public: * @param input the UTF-16LE string to convert * @param length the length of the string in 2-byte code units * (char16_t) - * @param utf32_buffer the pointer to buffer that can hold the conversion + * @param utf32_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ @@ -4784,14 +5043,16 @@ public: * @param input the UTF-16BE string to convert * @param length the length of the string in 2-byte code units * (char16_t) - * @param utf32_buffer the pointer to buffer that can hold the conversion + * @param utf32_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 /** * Compute the number of bytes that this UTF-16LE string would require in * UTF-8 format. @@ -4827,7 +5088,9 @@ public: simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly broken UTF-32 string into Latin1 string. * @@ -4844,11 +5107,12 @@ public: * @return number of written code units; 0 if input is not a valid UTF-32 * string */ - simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /** * Convert possibly broken UTF-32 string into Latin1 string and stop on error. * If the string cannot be represented as Latin1, an error is returned. @@ -4888,14 +5152,16 @@ public: * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units * (char32_t) - * @param latin1_buffer the pointer to buffer that can hold the conversion + * @param latin1_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Convert possibly broken UTF-32 string into UTF-8 string. * @@ -4946,14 +5212,16 @@ public: * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units * (char32_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion + * @param utf8_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Return the number of bytes that this UTF-16 string would require in Latin1 * format. @@ -4965,8 +5233,12 @@ public: * @return the number of bytes required to encode the UTF-16 string as Latin1 */ simdutf_warn_unused virtual size_t - utf16_length_from_latin1(size_t length) const noexcept = 0; + utf16_length_from_latin1(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Convert possibly broken UTF-32 string into UTF-16LE string. * @@ -5059,7 +5331,7 @@ public: * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units * (char32_t) - * @param utf16_buffer the pointer to buffer that can hold the conversion + * @param utf16_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ @@ -5077,14 +5349,16 @@ public: * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units * (char32_t) - * @param utf16_buffer the pointer to buffer that can hold the conversion + * @param utf16_buffer the pointer to a buffer that can hold the conversion * result * @return number of written code units; 0 if conversion is not possible */ simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 /** * Change the endianness of the input. Can be used to go from UTF-16LE to * UTF-16BE or from UTF-16BE to UTF-16LE. @@ -5096,12 +5370,14 @@ public: * @param input the UTF-16 string to process * @param length the length of the string in 2-byte code units * (char16_t) - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result */ virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Return the number of bytes that this Latin1 string would require in UTF-8 * format. @@ -5112,7 +5388,9 @@ public: */ simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 /** * Compute the number of bytes that this UTF-32 string would require in UTF-8 * format. @@ -5128,7 +5406,9 @@ public: simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /** * Compute the number of bytes that this UTF-32 string would require in Latin1 * format. @@ -5141,8 +5421,12 @@ public: * @return the number of bytes required to encode the UTF-32 string as Latin1 */ simdutf_warn_unused virtual size_t - latin1_length_from_utf32(size_t length) const noexcept = 0; + latin1_length_from_utf32(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 /** * Compute the number of bytes that this UTF-8 string would require in Latin1 * format. @@ -5156,7 +5440,9 @@ public: */ simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 /** * Compute the number of bytes that this UTF-16LE/BE string would require in * Latin1 format. @@ -5173,8 +5459,12 @@ public: * Latin1 */ simdutf_warn_unused virtual size_t - latin1_length_from_utf16(size_t length) const noexcept = 0; + latin1_length_from_utf16(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Compute the number of two-byte code units that this UTF-32 string would * require in UTF-16 format. @@ -5190,19 +5480,24 @@ public: simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 /** * Return the number of bytes that this UTF-32 string would require in Latin1 * format. * - * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units * (char32_t) * @return the number of bytes required to encode the UTF-32 string as Latin1 */ simdutf_warn_unused virtual size_t - utf32_length_from_latin1(size_t length) const noexcept = 0; + utf32_length_from_latin1(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 /** * Compute the number of bytes that this UTF-16LE string would require in * UTF-32 format. @@ -5244,7 +5539,9 @@ public: simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 /** * Count the number of code points (characters) in the string assuming that * it is valid. @@ -5280,7 +5577,9 @@ public: */ simdutf_warn_unused virtual size_t count_utf16be(const char16_t *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 /** * Count the number of code points (characters) in the string assuming that * it is valid. @@ -5295,7 +5594,9 @@ public: */ simdutf_warn_unused virtual size_t count_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_BASE64 /** * Provide the maximal binary length in bytes given the base64 input. * In general, if the input contains ASCII spaces, the result will be less @@ -5306,9 +5607,8 @@ public: * @param length the length of the base64 input in bytes * @return maximal number of binary bytes */ - simdutf_warn_unused virtual size_t - maximal_binary_length_from_base64(const char *input, - size_t length) const noexcept = 0; + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept; /** * Provide the maximal binary length in bytes given the base64 input. @@ -5321,9 +5621,8 @@ public: * @param length the length of the base64 input in 16-bit units * @return maximal number of binary bytes */ - simdutf_warn_unused virtual size_t - maximal_binary_length_from_base64(const char16_t *input, - size_t length) const noexcept = 0; + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept; /** * Convert a base64 input to a binary output. @@ -5347,7 +5646,7 @@ public: * * @param input the base64 string to process * @param length the length of the string in bytes - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least maximal_binary_length_from_base64(input, length) * bytes long). * @param options the base64 options to use, can be base64_default or @@ -5386,7 +5685,7 @@ public: * * @param input the base64 string to process * @param length the length of the string in bytes - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least maximal_binary_length_from_base64(input, length) * bytes long). * @param options the base64 options to use, can be base64_default or @@ -5422,7 +5721,7 @@ public: * @param input the base64 string to process, in ASCII stored as * 16-bit units * @param length the length of the string in 16-bit units - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least maximal_binary_length_from_base64(input, length) * bytes long). * @param options the base64 options to use, can be base64_default or @@ -5461,7 +5760,7 @@ public: * * @param input the base64 string to process * @param length the length of the string in bytes - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least maximal_binary_length_from_base64(input, length) * bytes long). * @param options the base64 options to use, can be base64_default or @@ -5482,9 +5781,8 @@ public: * base64_url, is base64_default by default. * @return number of base64 bytes */ - simdutf_warn_unused virtual size_t base64_length_from_binary( - size_t length, - base64_options options = base64_default) const noexcept = 0; + simdutf_warn_unused size_t base64_length_from_binary( + size_t length, base64_options options = base64_default) const noexcept; /** * Convert a binary input to a base64 output. @@ -5500,7 +5798,7 @@ public: * * @param input the binary to process * @param length the length of the input in bytes - * @param output the pointer to buffer that can hold the conversion + * @param output the pointer to a buffer that can hold the conversion * result (should be at least base64_length_from_binary(length) bytes long) * @param options the base64 options to use, can be base64_default or * base64_url, is base64_default by default. @@ -5510,6 +5808,27 @@ public: virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options = base64_default) const noexcept = 0; +#endif // SIMDUTF_FEATURE_BASE64 + +#ifdef SIMDUTF_INTERNAL_TESTS + // This method is exported only in developer mode, its purpose + // is to expose some internal test procedures from the given + // implementation and then use them through our standard test + // framework. + // + // Regular users should not use it, the tests of the public + // API are enough. + + struct TestProcedure { + // display name + std::string name; + + // procedure should return whether given test pass or not + void (*procedure)(const implementation &); + }; + + virtual std::vector internal_tests() const; +#endif protected: /** @private Construct an implementation with the given name and description.