Add AArch64 ASM to qimage_neon.cpp
Since AArch64 NEON assembly is different from Arm32 NEON we need to write a separate version. Assembly is used over intrinsics as the intrinsics have trouble efficiently using the vstX and vldX instructions. Change-Id: I5b67fc87acb2433b503e658099b742d57a9cff18 Reviewed-by: Erik Verbruggen <erik.verbruggen@theqtcompany.com>
This commit is contained in:
parent
4a4b17805c
commit
7cbbca5860
@ -2947,7 +2947,7 @@ void qInitImageConversions()
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
|
||||
#if defined(__ARM_NEON__)
|
||||
extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
|
||||
qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon;
|
||||
qimage_converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_neon;
|
||||
|
@ -35,7 +35,7 @@
|
||||
#include <private/qimage_p.h>
|
||||
#include <private/qsimd_p.h>
|
||||
|
||||
#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
|
||||
#if defined(__ARM_NEON__)
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -55,6 +55,7 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, cons
|
||||
|
||||
if ((len - offsetToAlignOn8Bytes) >= 8) {
|
||||
const quint32 *const simdEnd = end - 7;
|
||||
#if !defined(Q_PROCESSOR_ARM_64)
|
||||
register uint8x8_t fullVector asm ("d3") = vdup_n_u8(0xff);
|
||||
do {
|
||||
#if Q_BYTE_ORDER == Q_BIG_ENDIAN
|
||||
@ -76,6 +77,31 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, cons
|
||||
);
|
||||
#endif
|
||||
} while (dst < simdEnd);
|
||||
#else
|
||||
register uint8x8_t fullVector asm ("v3") = vdup_n_u8(0xff);
|
||||
do {
|
||||
#if Q_BYTE_ORDER == Q_BIG_ENDIAN
|
||||
asm volatile (
|
||||
"ld3 { v4.8b, v5.8b, v6.8b }, [%[SRC]], #24 \n\t"
|
||||
"st4 { v3.8b, v4.8b, v5.8b, v6.8b }, [%[DST]], #32 \n\t"
|
||||
: [DST]"+r" (dst), [SRC]"+r" (src)
|
||||
: "w"(fullVector)
|
||||
: "memory", "v4", "v5", "v6"
|
||||
);
|
||||
#else
|
||||
asm volatile (
|
||||
"ld3 { v0.8b, v1.8b, v2.8b }, [%[SRC]], #24 \n\t"
|
||||
"mov v4.8b, v2.8b\n\t"
|
||||
"mov v2.8b, v0.8b\n\t"
|
||||
"mov v0.8b, v4.8b\n\t"
|
||||
"st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [%[DST]], #32 \n\t"
|
||||
: [DST]"+r" (dst), [SRC]"+r" (src)
|
||||
: "w"(fullVector)
|
||||
: "memory", "v0", "v1", "v2", "v4"
|
||||
);
|
||||
#endif
|
||||
} while (dst < simdEnd);
|
||||
#endif
|
||||
}
|
||||
|
||||
while (dst != end) {
|
||||
@ -103,4 +129,4 @@ void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::I
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
|
||||
#endif // defined(__ARM_NEON__)
|
||||
|
@ -978,9 +978,8 @@ extern "C" void qt_convert_rgb888_to_rgb32_mips_dspr2_asm(quint32 *dst, const uc
|
||||
QJpegHandler::QJpegHandler()
|
||||
: d(new QJpegHandlerPrivate(this))
|
||||
{
|
||||
#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
|
||||
#if defined(__ARM_NEON__)
|
||||
// from qimage_neon.cpp
|
||||
|
||||
if (qCpuHasFeature(NEON))
|
||||
d->rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_neon;
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user