Optimize fallback RGB888 to RGB32 conversion
Improves the conversion from RGB888 to RGB32 on platforms without SIMD versions. This includes the fallback used on non-neon ARM devices. Besides image conversion the routine is also used for decoding JPEG. On x86 this version is within 0.7x of the speed of the SSSE3 version. Change-Id: Id131994d7c3c4f879d89e80f9d6c435bb5535ed7 Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
This commit is contained in:
parent
bfb92c03e0
commit
59f168c5e5
@ -35,8 +35,8 @@
|
||||
#include <private/qdrawingprimitive_sse2_p.h>
|
||||
#include <private/qguiapplication_p.h>
|
||||
#include <private/qsimd_p.h>
|
||||
|
||||
#include <private/qimage_p.h>
|
||||
#include <qendian.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -290,6 +290,108 @@ static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src
|
||||
}
|
||||
#endif
|
||||
|
||||
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dest_data, const uchar *src_data, int len)
|
||||
{
|
||||
int pixel = 0;
|
||||
// prolog: align input to 32bit
|
||||
while ((quintptr(src_data) & 0x3) && pixel < len) {
|
||||
*dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]);
|
||||
src_data += 3;
|
||||
++dest_data;
|
||||
++pixel;
|
||||
}
|
||||
|
||||
// Handle 4 pixels at a time 12 bytes input to 16 bytes output.
|
||||
for (; pixel + 3 < len; pixel += 4) {
|
||||
const quint32 *src_packed = (quint32 *) src_data;
|
||||
const quint32 src1 = qFromBigEndian(src_packed[0]);
|
||||
const quint32 src2 = qFromBigEndian(src_packed[1]);
|
||||
const quint32 src3 = qFromBigEndian(src_packed[2]);
|
||||
|
||||
dest_data[0] = 0xff000000 | (src1 >> 8);
|
||||
dest_data[1] = 0xff000000 | (src1 << 16) | (src2 >> 16);
|
||||
dest_data[2] = 0xff000000 | (src2 << 8) | (src3 >> 24);
|
||||
dest_data[3] = 0xff000000 | src3;
|
||||
|
||||
src_data += 12;
|
||||
dest_data += 4;
|
||||
}
|
||||
|
||||
// epilog: handle left over pixels
|
||||
for (; pixel < len; ++pixel) {
|
||||
*dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]);
|
||||
src_data += 3;
|
||||
++dest_data;
|
||||
}
|
||||
}
|
||||
|
||||
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgbx8888(quint32 *dest_data, const uchar *src_data, int len)
|
||||
{
|
||||
int pixel = 0;
|
||||
// prolog: align input to 32bit
|
||||
while ((quintptr(src_data) & 0x3) && pixel < len) {
|
||||
*dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]));
|
||||
src_data += 3;
|
||||
++dest_data;
|
||||
++pixel;
|
||||
}
|
||||
|
||||
// Handle 4 pixels at a time 12 bytes input to 16 bytes output.
|
||||
for (; pixel + 3 < len; pixel += 4) {
|
||||
const quint32 *src_packed = (quint32 *) src_data;
|
||||
const quint32 src1 = src_packed[0];
|
||||
const quint32 src2 = src_packed[1];
|
||||
const quint32 src3 = src_packed[2];
|
||||
|
||||
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
|
||||
dest_data[0] = 0xff000000 | src1;
|
||||
dest_data[1] = 0xff000000 | (src1 >> 24) | (src2 << 8);
|
||||
dest_data[2] = 0xff000000 | (src2 >> 16) | (src3 << 16);
|
||||
dest_data[3] = 0xff000000 | (src3 >> 8);
|
||||
#else
|
||||
dest_data[0] = 0xff | src1;
|
||||
dest_data[1] = 0xff | (src1 << 24) | (src2 >> 8);
|
||||
dest_data[2] = 0xff | (src2 << 16) | (src3 >> 16);
|
||||
dest_data[3] = 0xff | (src3 << 8);
|
||||
#endif
|
||||
|
||||
src_data += 12;
|
||||
dest_data += 4;
|
||||
}
|
||||
|
||||
// epilog: handle left over pixels
|
||||
for (; pixel < len; ++pixel) {
|
||||
*dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]));
|
||||
src_data += 3;
|
||||
++dest_data;
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (QT_FASTCALL *Rgb888ToRgbConverter)(quint32 *dst, const uchar *src, int len);
|
||||
|
||||
template <bool rgbx>
|
||||
static void convert_RGB888_to_RGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
|
||||
{
|
||||
Q_ASSERT(src->format == QImage::Format_RGB888);
|
||||
if (rgbx)
|
||||
Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied);
|
||||
else
|
||||
Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
|
||||
Q_ASSERT(src->width == dest->width);
|
||||
Q_ASSERT(src->height == dest->height);
|
||||
|
||||
const uchar *src_data = (uchar *) src->data;
|
||||
quint32 *dest_data = (quint32 *) dest->data;
|
||||
|
||||
Rgb888ToRgbConverter line_converter= rgbx ? qt_convert_rgb888_to_rgbx8888 : qt_convert_rgb888_to_rgb32;
|
||||
|
||||
for (int i = 0; i < src->height; ++i) {
|
||||
line_converter(dest_data, src_data, src->width);
|
||||
src_data += src->bytes_per_line;
|
||||
dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
|
||||
}
|
||||
}
|
||||
|
||||
extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
|
||||
|
||||
static void convert_ARGB_to_RGBx(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
|
||||
@ -2052,6 +2154,9 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
convert_RGB888_to_RGB<false>,
|
||||
convert_RGB888_to_RGB<false>,
|
||||
convert_RGB888_to_RGB<false>,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
@ -2061,12 +2166,10 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0, 0, 0, 0, 0, 0, 0
|
||||
convert_RGB888_to_RGB<true>,
|
||||
convert_RGB888_to_RGB<true>,
|
||||
convert_RGB888_to_RGB<true>,
|
||||
0, 0, 0, 0, 0, 0
|
||||
}, // Format_RGB888
|
||||
|
||||
{
|
||||
|
@ -69,18 +69,10 @@ extern "C" {
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
void QT_FASTCALL convert_rgb888_to_rgb32_C(quint32 *dst, const uchar *src, int len)
|
||||
{
|
||||
// Expand 24->32 bpp.
|
||||
for (int i = 0; i < len; ++i) {
|
||||
*dst++ = qRgb(src[0], src[1], src[2]);
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
|
||||
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dst, const uchar *src, int len);
|
||||
typedef void (QT_FASTCALL *Rgb888ToRgb32Converter)(quint32 *dst, const uchar *src, int len);
|
||||
|
||||
static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = convert_rgb888_to_rgb32_C;
|
||||
static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32;
|
||||
|
||||
struct my_error_mgr : public jpeg_error_mgr {
|
||||
jmp_buf setjmp_buffer;
|
||||
@ -1008,10 +1000,8 @@ QJpegHandler::QJpegHandler()
|
||||
#endif
|
||||
|
||||
#if defined(QT_COMPILER_SUPPORTS_SSSE3)
|
||||
// from qimage_ssse3.cpp
|
||||
|
||||
if (false) {
|
||||
} else if (qCpuHasFeature(SSSE3)) {
|
||||
// from qimage_ssse3.cpps
|
||||
if (qCpuHasFeature(SSSE3)) {
|
||||
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3;
|
||||
}
|
||||
#endif // QT_COMPILER_SUPPORTS_SSSE3
|
||||
|
@ -734,6 +734,13 @@ void tst_QImage::convertToFormat_data()
|
||||
QTest::newRow("blue rgb888 -> argb32") << int(QImage::Format_RGB888) << 0xff0000ff
|
||||
<< int(QImage::Format_ARGB32) << 0xff0000ff;
|
||||
|
||||
QTest::newRow("red rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xffff0000
|
||||
<< int(QImage::Format_RGBX8888) << 0xffff0000;
|
||||
QTest::newRow("green rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xff00ff00
|
||||
<< int(QImage::Format_RGBX8888) << 0xff00ff00;
|
||||
QTest::newRow("blue rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xff0000ff
|
||||
<< int(QImage::Format_RGBX8888) << 0xff0000ff;
|
||||
|
||||
QTest::newRow("semired argb32 -> rgb888") << int(QImage::Format_ARGB32) << 0x7fff0000u
|
||||
<< int(QImage::Format_RGB888) << 0xffff0000;
|
||||
QTest::newRow("semigreen argb32 -> rgb888") << int(QImage::Format_ARGB32) << 0x7f00ff00u
|
||||
|
@ -43,6 +43,9 @@ private slots:
|
||||
void convertRgb888ToRgb32_data();
|
||||
void convertRgb888ToRgb32();
|
||||
|
||||
void convertRgb888ToRgbx8888_data();
|
||||
void convertRgb888ToRgbx8888();
|
||||
|
||||
void convertRgb32ToRgb888_data();
|
||||
void convertRgb32ToRgb888();
|
||||
|
||||
@ -74,8 +77,8 @@ void tst_QImageConversion::convertRgb888ToRgb32_data()
|
||||
// 16 pixels, minimum for the SSSE3 implementation
|
||||
QTest::newRow("width: 16px; height: 5000px;") << generateImageRgb888(16, 5000);
|
||||
|
||||
// 50 pixels, more realistic use case
|
||||
QTest::newRow("width: 50px; height: 5000px;") << generateImageRgb888(50, 5000);
|
||||
// 200 pixels, more realistic use case
|
||||
QTest::newRow("width: 200px; height: 5000px;") << generateImageRgb888(200, 5000);
|
||||
|
||||
// 2000 pixels -> typical values for pictures
|
||||
QTest::newRow("width: 2000px; height: 2000px;") << generateImageRgb888(2000, 2000);
|
||||
@ -93,6 +96,23 @@ void tst_QImageConversion::convertRgb888ToRgb32()
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QImageConversion::convertRgb888ToRgbx8888_data()
|
||||
{
|
||||
convertRgb888ToRgb32_data();
|
||||
}
|
||||
|
||||
void tst_QImageConversion::convertRgb888ToRgbx8888()
|
||||
{
|
||||
QFETCH(QImage, inputImage);
|
||||
|
||||
QBENCHMARK {
|
||||
volatile QImage output = inputImage.convertToFormat(QImage::Format_RGBX8888);
|
||||
// we need the volatile and the following to make sure the compiler does not do
|
||||
// anything stupid :)
|
||||
(void)output;
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QImageConversion::convertRgb32ToRgb888_data()
|
||||
{
|
||||
QTest::addColumn<QImage>("inputImage");
|
||||
|
Loading…
x
Reference in New Issue
Block a user