SIMD: Add a haswell sub-architecture selection to our support
As the comment says, Haswell is a nice divider and is a good optimization target. I'm using -march=core-avx2 instead of -march=haswell because the latter form was only added to GCC 4.9 but we still support 4.7 and that has support for AVX2. This commit changes the AVX2-optimized code in QtGui to Haswell- optimized instead. That means, for example, that qdrawhelper_avx2.cpp can now use the FMA instructions. Change-Id: If025d476890745368955fffd153129c1716ba006 Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
parent
746f15d0c2
commit
c3a4ec5d0b
@ -108,6 +108,9 @@ QMAKE_CFLAGS_NEON += -mfpu=neon
|
|||||||
QMAKE_CFLAGS_MIPS_DSP += -mdsp
|
QMAKE_CFLAGS_MIPS_DSP += -mdsp
|
||||||
QMAKE_CFLAGS_MIPS_DSPR2 += -mdspr2
|
QMAKE_CFLAGS_MIPS_DSPR2 += -mdspr2
|
||||||
|
|
||||||
|
# -march=haswell is supported as of GCC 4.9 and Clang 3.6
|
||||||
|
QMAKE_CFLAGS_ARCH_HASWELL = -march=core-avx2
|
||||||
|
|
||||||
# Wrapper tools that understand .o/.a files with GIMPLE instead of machine code
|
# Wrapper tools that understand .o/.a files with GIMPLE instead of machine code
|
||||||
QMAKE_AR_LTCG = gcc-ar cqs
|
QMAKE_AR_LTCG = gcc-ar cqs
|
||||||
QMAKE_NM_LTCG = gcc-nm -P
|
QMAKE_NM_LTCG = gcc-nm -P
|
||||||
|
@ -141,6 +141,28 @@ addSimdCompiler(neon)
|
|||||||
addSimdCompiler(mips_dsp)
|
addSimdCompiler(mips_dsp)
|
||||||
addSimdCompiler(mips_dspr2)
|
addSimdCompiler(mips_dspr2)
|
||||||
|
|
||||||
|
# Haswell sub-architecture
|
||||||
|
defineTest(addSimdArch) {
|
||||||
|
name = arch_$$1
|
||||||
|
dependencies = $$2
|
||||||
|
upname = $$upper($$name)
|
||||||
|
|
||||||
|
cpu_features_missing =
|
||||||
|
for(part, dependencies) {
|
||||||
|
!contains(QT_CPU_FEATURES, $$part): cpu_features_missing = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
CONFIG += $$name
|
||||||
|
isEmpty(cpu_features_missing): QT_CPU_FEATURES += $$name
|
||||||
|
|
||||||
|
export(QT_CPU_FEATURES)
|
||||||
|
export(CONFIG)
|
||||||
|
addSimdCompiler($$name)
|
||||||
|
}
|
||||||
|
|
||||||
|
isEmpty(QMAKE_CFLAGS_ARCH_HASWELL): QMAKE_CFLAGS_ARCH_HASWELL = $$QMAKE_CFLAGS_AVX2
|
||||||
|
avx2: addSimdArch(haswell, avx2 bmi bmi2 f16c fma lzcnt popcnt)
|
||||||
|
|
||||||
# Follow the Intel compiler's lead and define profiles of AVX512 instructions
|
# Follow the Intel compiler's lead and define profiles of AVX512 instructions
|
||||||
defineTest(addAvx512Profile) {
|
defineTest(addAvx512Profile) {
|
||||||
name = $$1
|
name = $$1
|
||||||
@ -149,7 +171,7 @@ defineTest(addAvx512Profile) {
|
|||||||
varname = QMAKE_CFLAGS_$$upname
|
varname = QMAKE_CFLAGS_$$upname
|
||||||
|
|
||||||
cpu_features_missing =
|
cpu_features_missing =
|
||||||
cflags = $$QMAKE_CFLAGS_AVX512F
|
cflags = $$QMAKE_CFLAGS_ARCH_HASWELL $$QMAKE_CFLAGS_AVX512F
|
||||||
for(part, dependencies) {
|
for(part, dependencies) {
|
||||||
!CONFIG($$part): return() # Profile isn't supported by the compiler
|
!CONFIG($$part): return() # Profile isn't supported by the compiler
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
**
|
**
|
||||||
** Copyright (C) 2016 The Qt Company Ltd.
|
** Copyright (C) 2016 The Qt Company Ltd.
|
||||||
** Copyright (C) 2016 Intel Corporation.
|
** Copyright (C) 2018 Intel Corporation.
|
||||||
** Contact: https://www.qt.io/licensing/
|
** Contact: https://www.qt.io/licensing/
|
||||||
**
|
**
|
||||||
** This file is part of the QtCore module of the Qt Toolkit.
|
** This file is part of the QtCore module of the Qt Toolkit.
|
||||||
@ -232,8 +232,49 @@
|
|||||||
# define __RDRND__ 1
|
# define __RDRND__ 1
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL)
|
||||||
|
// BMI2 instructions:
|
||||||
|
// All processors that support BMI support BMI2 (and AVX2)
|
||||||
|
// (but neither MSVC nor the Intel compiler define this macro)
|
||||||
|
# define __BMI2__ 1
|
||||||
|
# endif
|
||||||
|
|
||||||
# include "qsimd_x86_p.h"
|
# include "qsimd_x86_p.h"
|
||||||
|
|
||||||
|
// Haswell sub-architecture
|
||||||
|
//
|
||||||
|
// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
|
||||||
|
// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
|
||||||
|
// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
|
||||||
|
// same features.
|
||||||
|
//
|
||||||
|
// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
|
||||||
|
// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
|
||||||
|
# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell"
|
||||||
|
# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \
|
||||||
|
defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__)
|
||||||
|
# define __haswell__ 1
|
||||||
|
# endif
|
||||||
|
|
||||||
|
// This constant does not include all CPU features found in a Haswell, only
|
||||||
|
// those that we'd have optimized code for.
|
||||||
|
// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
|
||||||
|
QT_BEGIN_NAMESPACE
|
||||||
|
static const quint64 CpuFeatureArchHaswell = 0
|
||||||
|
| CpuFeatureSSE2
|
||||||
|
| CpuFeatureSSE3
|
||||||
|
| CpuFeatureSSSE3
|
||||||
|
| CpuFeatureSSE4_1
|
||||||
|
| CpuFeatureSSE4_2
|
||||||
|
| CpuFeatureFMA
|
||||||
|
| CpuFeaturePOPCNT
|
||||||
|
| CpuFeatureAVX
|
||||||
|
| CpuFeatureF16C
|
||||||
|
| CpuFeatureAVX2
|
||||||
|
| CpuFeatureBMI
|
||||||
|
| CpuFeatureBMI2;
|
||||||
|
QT_END_NAMESPACE
|
||||||
|
|
||||||
#endif /* Q_PROCESSOR_X86 */
|
#endif /* Q_PROCESSOR_X86 */
|
||||||
|
|
||||||
// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
|
// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
|
||||||
|
@ -127,7 +127,7 @@ SSE2_SOURCES += painting/qdrawhelper_sse2.cpp
|
|||||||
SSSE3_SOURCES += painting/qdrawhelper_ssse3.cpp
|
SSSE3_SOURCES += painting/qdrawhelper_ssse3.cpp
|
||||||
SSE4_1_SOURCES += painting/qdrawhelper_sse4.cpp \
|
SSE4_1_SOURCES += painting/qdrawhelper_sse4.cpp \
|
||||||
painting/qimagescale_sse4.cpp
|
painting/qimagescale_sse4.cpp
|
||||||
AVX2_SOURCES += painting/qdrawhelper_avx2.cpp
|
ARCH_HASWELL_SOURCES += painting/qdrawhelper_avx2.cpp
|
||||||
|
|
||||||
NEON_SOURCES += painting/qdrawhelper_neon.cpp painting/qimagescale_neon.cpp
|
NEON_SOURCES += painting/qdrawhelper_neon.cpp painting/qimagescale_neon.cpp
|
||||||
NEON_HEADERS += painting/qdrawhelper_neon_p.h
|
NEON_HEADERS += painting/qdrawhelper_neon_p.h
|
||||||
|
@ -5772,7 +5772,7 @@ static void qInitDrawhelperFunctions()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(QT_COMPILER_SUPPORTS_AVX2)
|
#if defined(QT_COMPILER_SUPPORTS_AVX2)
|
||||||
if (qCpuHasFeature(AVX2)) {
|
if (qCpuHasFeature(ArchHaswell)) {
|
||||||
extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
|
extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
|
||||||
const uchar *srcPixels, int sbpl,
|
const uchar *srcPixels, int sbpl,
|
||||||
int w, int h, int const_alpha);
|
int w, int h, int const_alpha);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user