diff --git a/src/corelib/global/qfloat16.cpp b/src/corelib/global/qfloat16.cpp index 48302a57488..0eb6ec10100 100644 --- a/src/corelib/global/qfloat16.cpp +++ b/src/corelib/global/qfloat16.cpp @@ -200,20 +200,13 @@ static inline bool hasFastF16() } #if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) -#define QT_FUNCTION_TARGET_STRING_AVX512VLBW \ - QT_FUNCTION_TARGET_STRING_AVX512VL "," \ - QT_FUNCTION_TARGET_STRING_AVX512BW "," \ - QT_FUNCTION_TARGET_STRING_F16C "," \ - QT_FUNCTION_TARGET_STRING_BMI2 /* BMI2 for BZHI */ - static bool hasFastF16Avx256() { // 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info) - constexpr quint64 CpuFeatureAVX512VLBW = CpuFeatureAVX512BW | CpuFeatureAVX512VL; - return qCpuHasFeature(AVX512VLBW); + return qCpuHasFeature(ArchSkylakeAvx512); } -static QT_FUNCTION_TARGET(AVX512VLBW) +static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept { __mmask16 mask = _bzhi_u32(-1, len); @@ -222,7 +215,7 @@ void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) n _mm_mask_storeu_epi16(out, mask, f16); }; -static QT_FUNCTION_TARGET(AVX512VLBW) +static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept { __mmask16 mask = _bzhi_u32(-1, len); diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 86bb64abd90..4c00439700e 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -250,7 +250,6 @@ asm( // // macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc // ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell). -# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell" # define ARCH_HASWELL_MACROS (__AVX2__ + __BMI__ + __BMI2__ + __F16C__ + __FMA__ + __LZCNT__) # if ARCH_HASWELL_MACROS != 0 # if ARCH_HASWELL_MACROS != 6 diff --git a/src/corelib/global/qsimd_x86.cpp b/src/corelib/global/qsimd_x86.cpp index f51536259e2..9609f21ec48 100644 --- a/src/corelib/global/qsimd_x86.cpp +++ b/src/corelib/global/qsimd_x86.cpp @@ -146,8 +146,8 @@ static const struct X86Architecture x86_architectures[] = { { cpu_icelake_server, "Ice Lake (Server)" }, { cpu_icelake_client, "Ice Lake (Client)" }, { cpu_alderlake, "Alder Lake" }, - { cpu_cooperlake, "Cooper Lake" }, { cpu_cannonlake, "Cannon Lake" }, + { cpu_cooperlake, "Cooper Lake" }, { cpu_cascadelake, "Cascade Lake" }, { cpu_skylake_avx512, "Skylake (Avx512)" }, { cpu_skylake, "Skylake" }, diff --git a/src/corelib/global/qsimd_x86_p.h b/src/corelib/global/qsimd_x86_p.h index 19cc55a3d0a..421b44b11eb 100644 --- a/src/corelib/global/qsimd_x86_p.h +++ b/src/corelib/global/qsimd_x86_p.h @@ -46,81 +46,47 @@ // in CPUID Leaf 1, EDX: #define cpu_feature_sse2 (UINT64_C(1) << 0) -#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" // in CPUID Leaf 1, ECX: #define cpu_feature_sse3 (UINT64_C(1) << 1) -#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" #define cpu_feature_ssse3 (UINT64_C(1) << 2) -#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3" #define cpu_feature_fma (UINT64_C(1) << 3) -#define QT_FUNCTION_TARGET_STRING_FMA "fma" #define cpu_feature_sse4_1 (UINT64_C(1) << 4) -#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1" #define cpu_feature_sse4_2 (UINT64_C(1) << 5) -#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2" #define cpu_feature_movbe (UINT64_C(1) << 6) -#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe" #define cpu_feature_popcnt (UINT64_C(1) << 7) -#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt" #define cpu_feature_aes (UINT64_C(1) << 8) -#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2" #define cpu_feature_avx (UINT64_C(1) << 9) -#define QT_FUNCTION_TARGET_STRING_AVX "avx" #define cpu_feature_f16c (UINT64_C(1) << 10) -#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx" #define cpu_feature_rdrnd (UINT64_C(1) << 11) -#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd" // in CPUID Leaf 7, Sub-leaf 0, EBX: #define cpu_feature_bmi (UINT64_C(1) << 12) -#define QT_FUNCTION_TARGET_STRING_BMI "bmi" #define cpu_feature_avx2 (UINT64_C(1) << 13) -#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx" #define cpu_feature_bmi2 (UINT64_C(1) << 14) -#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" #define cpu_feature_avx512f (UINT64_C(1) << 15) -#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx" #define cpu_feature_avx512dq (UINT64_C(1) << 16) -#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f" #define cpu_feature_rdseed (UINT64_C(1) << 17) -#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" #define cpu_feature_avx512ifma (UINT64_C(1) << 18) -#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f" #define cpu_feature_avx512cd (UINT64_C(1) << 19) -#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f" #define cpu_feature_sha (UINT64_C(1) << 20) -#define QT_FUNCTION_TARGET_STRING_SHA "sha" #define cpu_feature_avx512bw (UINT64_C(1) << 21) -#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f" #define cpu_feature_avx512vl (UINT64_C(1) << 22) -#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f" // in CPUID Leaf 7, Sub-leaf 0, ECX: #define cpu_feature_avx512vbmi (UINT64_C(1) << 23) -#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f" #define cpu_feature_avx512vbmi2 (UINT64_C(1) << 24) -#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f" #define cpu_feature_shstk (UINT64_C(1) << 25) -#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk" #define cpu_feature_gfni (UINT64_C(1) << 26) -#define QT_FUNCTION_TARGET_STRING_GFNI "gfni" #define cpu_feature_vaes (UINT64_C(1) << 27) -#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes" #define cpu_feature_avx512vnni (UINT64_C(1) << 28) -#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f" #define cpu_feature_avx512bitalg (UINT64_C(1) << 29) -#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f" #define cpu_feature_avx512vpopcntdq (UINT64_C(1) << 30) -#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f" // in CPUID Leaf 7, Sub-leaf 0, EDX: #define cpu_feature_hybrid (UINT64_C(1) << 31) -#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid" #define cpu_feature_ibt (UINT64_C(1) << 32) -#define QT_FUNCTION_TARGET_STRING_IBT "ibt" #define cpu_feature_avx512fp16 (UINT64_C(1) << 33) -#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c" // CPU architectures #define cpu_x86_64 (0 \ @@ -204,6 +170,83 @@ #define cpu_goldmont (cpu_glm) #define cpu_tremont (cpu_tnt) +// __attribute__ target strings for GCC and Clang +#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" +#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" +#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3" +#define QT_FUNCTION_TARGET_STRING_FMA "fma" +#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1" +#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2" +#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe" +#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt" +#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2" +#define QT_FUNCTION_TARGET_STRING_AVX "avx" +#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx" +#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd" +#define QT_FUNCTION_TARGET_STRING_BMI "bmi" +#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx" +#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" +#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx" +#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f" +#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" +#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f" +#define QT_FUNCTION_TARGET_STRING_SHA "sha" +#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f" +#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk" +#define QT_FUNCTION_TARGET_STRING_GFNI "gfni" +#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes" +#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f" +#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid" +#define QT_FUNCTION_TARGET_STRING_IBT "ibt" +#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c" +#define QT_FUNCTION_TARGET_STRING_ARCH_X86_64 "sse2" +#define QT_FUNCTION_TARGET_STRING_ARCH_CORE2 QT_FUNCTION_TARGET_STRING_ARCH_X86_64 ",sse3,ssse3,cx16" +#define QT_FUNCTION_TARGET_STRING_ARCH_NHM QT_FUNCTION_TARGET_STRING_ARCH_CORE2 ",sse4.1,sse4.2,popcnt" +#define QT_FUNCTION_TARGET_STRING_ARCH_WSM QT_FUNCTION_TARGET_STRING_ARCH_NHM +#define QT_FUNCTION_TARGET_STRING_ARCH_SNB QT_FUNCTION_TARGET_STRING_ARCH_WSM ",avx" +#define QT_FUNCTION_TARGET_STRING_ARCH_IVB QT_FUNCTION_TARGET_STRING_ARCH_SNB ",f16c,rdrnd,fsgsbase" +#define QT_FUNCTION_TARGET_STRING_ARCH_HSW QT_FUNCTION_TARGET_STRING_ARCH_IVB ",avx2,fma,bmi,bmi2,lzcnt,movbe" +#define QT_FUNCTION_TARGET_STRING_ARCH_BDW QT_FUNCTION_TARGET_STRING_ARCH_HSW ",adx,rdseed" +#define QT_FUNCTION_TARGET_STRING_ARCH_BDX QT_FUNCTION_TARGET_STRING_ARCH_BDW +#define QT_FUNCTION_TARGET_STRING_ARCH_SKL QT_FUNCTION_TARGET_STRING_ARCH_BDW ",xsavec,xsaves" +#define QT_FUNCTION_TARGET_STRING_ARCH_ADL QT_FUNCTION_TARGET_STRING_ARCH_SKL ",avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker" +#define QT_FUNCTION_TARGET_STRING_ARCH_SKX QT_FUNCTION_TARGET_STRING_ARCH_SKL ",avx512f,avx512dq,avx512cd,avx512bw,avx512vl" +#define QT_FUNCTION_TARGET_STRING_ARCH_CLX QT_FUNCTION_TARGET_STRING_ARCH_SKX ",avx512vnni" +#define QT_FUNCTION_TARGET_STRING_ARCH_CPX QT_FUNCTION_TARGET_STRING_ARCH_CLX ",avx512bf16" +#define QT_FUNCTION_TARGET_STRING_ARCH_CNL QT_FUNCTION_TARGET_STRING_ARCH_SKX ",avx512ifma,avx512vbmi" +#define QT_FUNCTION_TARGET_STRING_ARCH_ICL QT_FUNCTION_TARGET_STRING_ARCH_CNL ",avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq" +#define QT_FUNCTION_TARGET_STRING_ARCH_ICX QT_FUNCTION_TARGET_STRING_ARCH_ICL ",pconfig" +#define QT_FUNCTION_TARGET_STRING_ARCH_TGL QT_FUNCTION_TARGET_STRING_ARCH_ICL ",avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker" +#define QT_FUNCTION_TARGET_STRING_ARCH_SPR QT_FUNCTION_TARGET_STRING_ARCH_TGL ",avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr" +#define QT_FUNCTION_TARGET_STRING_ARCH_SLM QT_FUNCTION_TARGET_STRING_ARCH_WSM ",rdrnd,movbe" +#define QT_FUNCTION_TARGET_STRING_ARCH_GLM QT_FUNCTION_TARGET_STRING_ARCH_SLM ",fsgsbase,rdseed,lzcnt,xsavec,xsaves" +#define QT_FUNCTION_TARGET_STRING_ARCH_TNT QT_FUNCTION_TARGET_STRING_ARCH_GLM ",clwb,gfni,cldemote,waitpkg,movdiri,movdir64b" +#define QT_FUNCTION_TARGET_STRING_ARCH_NEHALEM QT_FUNCTION_TARGET_STRING_ARCH_NHM +#define QT_FUNCTION_TARGET_STRING_ARCH_WESTMERE QT_FUNCTION_TARGET_STRING_ARCH_WSM +#define QT_FUNCTION_TARGET_STRING_ARCH_SANDYBRIDGE QT_FUNCTION_TARGET_STRING_ARCH_SNB +#define QT_FUNCTION_TARGET_STRING_ARCH_IVYBRIDGE QT_FUNCTION_TARGET_STRING_ARCH_IVB +#define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL QT_FUNCTION_TARGET_STRING_ARCH_HSW +#define QT_FUNCTION_TARGET_STRING_ARCH_BROADWELL QT_FUNCTION_TARGET_STRING_ARCH_BDW +#define QT_FUNCTION_TARGET_STRING_ARCH_SKYLAKE QT_FUNCTION_TARGET_STRING_ARCH_SKL +#define QT_FUNCTION_TARGET_STRING_ARCH_SKYLAKE_AVX512 QT_FUNCTION_TARGET_STRING_ARCH_SKX +#define QT_FUNCTION_TARGET_STRING_ARCH_CASCADELAKE QT_FUNCTION_TARGET_STRING_ARCH_CLX +#define QT_FUNCTION_TARGET_STRING_ARCH_COOPERLAKE QT_FUNCTION_TARGET_STRING_ARCH_CPX +#define QT_FUNCTION_TARGET_STRING_ARCH_CANNONLAKE QT_FUNCTION_TARGET_STRING_ARCH_CNL +#define QT_FUNCTION_TARGET_STRING_ARCH_ICELAKE_CLIENT QT_FUNCTION_TARGET_STRING_ARCH_ICL +#define QT_FUNCTION_TARGET_STRING_ARCH_ICELAKE_SERVER QT_FUNCTION_TARGET_STRING_ARCH_ICX +#define QT_FUNCTION_TARGET_STRING_ARCH_ALDERLAKE QT_FUNCTION_TARGET_STRING_ARCH_ADL +#define QT_FUNCTION_TARGET_STRING_ARCH_SAPPHIRERAPIDS QT_FUNCTION_TARGET_STRING_ARCH_SPR +#define QT_FUNCTION_TARGET_STRING_ARCH_TIGERLAKE QT_FUNCTION_TARGET_STRING_ARCH_TGL +#define QT_FUNCTION_TARGET_STRING_ARCH_SILVERMONT QT_FUNCTION_TARGET_STRING_ARCH_SLM +#define QT_FUNCTION_TARGET_STRING_ARCH_GOLDMONT QT_FUNCTION_TARGET_STRING_ARCH_GLM +#define QT_FUNCTION_TARGET_STRING_ARCH_TREMONT QT_FUNCTION_TARGET_STRING_ARCH_TNT + static const uint64_t _compilerCpuFeatures = 0 #ifdef __SSE2__ | cpu_feature_sse2 diff --git a/util/x86simdgen/3rdparty/x86simd_generate.pl b/util/x86simdgen/3rdparty/x86simd_generate.pl index a07f858dcc9..bf942bea53e 100755 --- a/util/x86simdgen/3rdparty/x86simd_generate.pl +++ b/util/x86simdgen/3rdparty/x86simd_generate.pl @@ -28,6 +28,7 @@ open(FH, '<', $input_conf_file) or die $!; my $i = 0; my @features; +my %feature_ids; my @architecture_names; my %architectures; my @xsaveStates; @@ -58,7 +59,7 @@ while () { @basefeatures = @{$architectures{$based}->{allfeatures}} if $based ne "<>"; @extrafeatures = @{$architectures{$arch}{features}} if defined($architectures{$arch}); @extrafeatures = (@extrafeatures, split(',', $f)); - my @allfeatures = sort (@basefeatures, @extrafeatures); + my @allfeatures = sort { $feature_ids{$a} <=> $feature_ids{$b} } (@basefeatures, @extrafeatures); $architectures{$arch} = { name => $arch, @@ -91,6 +92,7 @@ while () { $id =~ s/[^A-Z0-9_]/_/g; push @features, { name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function, comment => $comment }; + $feature_ids{$name} = $i; ++$i; die("Too many features to fit a 64-bit integer") if $i > 64; } @@ -126,12 +128,6 @@ for (my $i = 0; $i < scalar @features; ++$i) { # Feature printf "#define cpu_feature_%-31s (UINT64_C(1) << %d)\n", lc($feature->{id}), $i; - - # Feature string names for Clang and GCC - my $str = $feature->{name} . ',' . $feature->{depends}; - $str =~ s/,$//; - printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n", - $feature->{id}, $str; } # Print the architecture list @@ -160,6 +156,33 @@ for (@architecture_names) { print ")"; } +print "\n// __attribute__ target strings for GCC and Clang"; +for (my $i = 0; $i < scalar @features; ++$i) { + my $feature = $features[$i]; + my $str = $feature->{name} . ',' . $feature->{depends}; + $str =~ s/,$//; + printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n", + $feature->{id}, $str; +} +for (@architecture_names) { + my $arch = $architectures{$_}; + my $base = $arch->{base}; + my $featurestr = ""; + if ($base ne "<>") { + $featurestr = "QT_FUNCTION_TARGET_STRING_ARCH_" . uc($base); + } + + my @features = @{$arch->{features}}; + #@features = map { defined($feature_ids{$_}) ? $_ : () } @features; + if (scalar @features) { + $featurestr .= ' ",' if length $featurestr; + $featurestr .= '"' unless length $featurestr; + $featurestr .= join(',', @features); + $featurestr .= '"'; + } + printf "#define QT_FUNCTION_TARGET_STRING_ARCH_%-12s %s\n", uc($arch->{id}), $featurestr; +} + print q{ static const uint64_t _compilerCpuFeatures = 0};