Optimize fallback RGB888 to RGB32 conversion

Improves the conversion from RGB888 to RGB32 on platforms without SIMD
versions. This includes the fallback used on non-neon ARM devices.

Besides image conversion the routine is also used for decoding JPEG.

On x86 this version is within 0.7x of the speed of the SSSE3 version.

Change-Id: Id131994d7c3c4f879d89e80f9d6c435bb5535ed7
Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
This commit is contained in:
Allan Sandfeld Jensen 2015-02-19 11:20:52 +01:00
parent bfb92c03e0
commit 59f168c5e5
4 changed files with 143 additions and 23 deletions

View File

@ -35,8 +35,8 @@
#include <private/qdrawingprimitive_sse2_p.h>
#include <private/qguiapplication_p.h>
#include <private/qsimd_p.h>
#include <private/qimage_p.h>
#include <qendian.h>
QT_BEGIN_NAMESPACE
@ -290,6 +290,108 @@ static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src
}
#endif
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dest_data, const uchar *src_data, int len)
{
int pixel = 0;
// prolog: align input to 32bit
while ((quintptr(src_data) & 0x3) && pixel < len) {
*dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]);
src_data += 3;
++dest_data;
++pixel;
}
// Handle 4 pixels at a time 12 bytes input to 16 bytes output.
for (; pixel + 3 < len; pixel += 4) {
const quint32 *src_packed = (quint32 *) src_data;
const quint32 src1 = qFromBigEndian(src_packed[0]);
const quint32 src2 = qFromBigEndian(src_packed[1]);
const quint32 src3 = qFromBigEndian(src_packed[2]);
dest_data[0] = 0xff000000 | (src1 >> 8);
dest_data[1] = 0xff000000 | (src1 << 16) | (src2 >> 16);
dest_data[2] = 0xff000000 | (src2 << 8) | (src3 >> 24);
dest_data[3] = 0xff000000 | src3;
src_data += 12;
dest_data += 4;
}
// epilog: handle left over pixels
for (; pixel < len; ++pixel) {
*dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]);
src_data += 3;
++dest_data;
}
}
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgbx8888(quint32 *dest_data, const uchar *src_data, int len)
{
int pixel = 0;
// prolog: align input to 32bit
while ((quintptr(src_data) & 0x3) && pixel < len) {
*dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]));
src_data += 3;
++dest_data;
++pixel;
}
// Handle 4 pixels at a time 12 bytes input to 16 bytes output.
for (; pixel + 3 < len; pixel += 4) {
const quint32 *src_packed = (quint32 *) src_data;
const quint32 src1 = src_packed[0];
const quint32 src2 = src_packed[1];
const quint32 src3 = src_packed[2];
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
dest_data[0] = 0xff000000 | src1;
dest_data[1] = 0xff000000 | (src1 >> 24) | (src2 << 8);
dest_data[2] = 0xff000000 | (src2 >> 16) | (src3 << 16);
dest_data[3] = 0xff000000 | (src3 >> 8);
#else
dest_data[0] = 0xff | src1;
dest_data[1] = 0xff | (src1 << 24) | (src2 >> 8);
dest_data[2] = 0xff | (src2 << 16) | (src3 >> 16);
dest_data[3] = 0xff | (src3 << 8);
#endif
src_data += 12;
dest_data += 4;
}
// epilog: handle left over pixels
for (; pixel < len; ++pixel) {
*dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]));
src_data += 3;
++dest_data;
}
}
typedef void (QT_FASTCALL *Rgb888ToRgbConverter)(quint32 *dst, const uchar *src, int len);
template <bool rgbx>
static void convert_RGB888_to_RGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_RGB888);
if (rgbx)
Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied);
else
Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
Q_ASSERT(src->width == dest->width);
Q_ASSERT(src->height == dest->height);
const uchar *src_data = (uchar *) src->data;
quint32 *dest_data = (quint32 *) dest->data;
Rgb888ToRgbConverter line_converter= rgbx ? qt_convert_rgb888_to_rgbx8888 : qt_convert_rgb888_to_rgb32;
for (int i = 0; i < src->height; ++i) {
line_converter(dest_data, src_data, src->width);
src_data += src->bytes_per_line;
dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
}
}
extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
static void convert_ARGB_to_RGBx(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
@ -2052,6 +2154,9 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat
0,
0,
0,
convert_RGB888_to_RGB<false>,
convert_RGB888_to_RGB<false>,
convert_RGB888_to_RGB<false>,
0,
0,
0,
@ -2061,12 +2166,10 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat
0,
0,
0,
0,
0,
0,
0,
0,
0, 0, 0, 0, 0, 0, 0
convert_RGB888_to_RGB<true>,
convert_RGB888_to_RGB<true>,
convert_RGB888_to_RGB<true>,
0, 0, 0, 0, 0, 0
}, // Format_RGB888
{

View File

@ -69,18 +69,10 @@ extern "C" {
QT_BEGIN_NAMESPACE
void QT_FASTCALL convert_rgb888_to_rgb32_C(quint32 *dst, const uchar *src, int len)
{
// Expand 24->32 bpp.
for (int i = 0; i < len; ++i) {
*dst++ = qRgb(src[0], src[1], src[2]);
src += 3;
}
}
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dst, const uchar *src, int len);
typedef void (QT_FASTCALL *Rgb888ToRgb32Converter)(quint32 *dst, const uchar *src, int len);
static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = convert_rgb888_to_rgb32_C;
static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32;
struct my_error_mgr : public jpeg_error_mgr {
jmp_buf setjmp_buffer;
@ -1008,10 +1000,8 @@ QJpegHandler::QJpegHandler()
#endif
#if defined(QT_COMPILER_SUPPORTS_SSSE3)
// from qimage_ssse3.cpp
if (false) {
} else if (qCpuHasFeature(SSSE3)) {
// from qimage_ssse3.cpps
if (qCpuHasFeature(SSSE3)) {
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3;
}
#endif // QT_COMPILER_SUPPORTS_SSSE3

View File

@ -734,6 +734,13 @@ void tst_QImage::convertToFormat_data()
QTest::newRow("blue rgb888 -> argb32") << int(QImage::Format_RGB888) << 0xff0000ff
<< int(QImage::Format_ARGB32) << 0xff0000ff;
QTest::newRow("red rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xffff0000
<< int(QImage::Format_RGBX8888) << 0xffff0000;
QTest::newRow("green rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xff00ff00
<< int(QImage::Format_RGBX8888) << 0xff00ff00;
QTest::newRow("blue rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xff0000ff
<< int(QImage::Format_RGBX8888) << 0xff0000ff;
QTest::newRow("semired argb32 -> rgb888") << int(QImage::Format_ARGB32) << 0x7fff0000u
<< int(QImage::Format_RGB888) << 0xffff0000;
QTest::newRow("semigreen argb32 -> rgb888") << int(QImage::Format_ARGB32) << 0x7f00ff00u

View File

@ -43,6 +43,9 @@ private slots:
void convertRgb888ToRgb32_data();
void convertRgb888ToRgb32();
void convertRgb888ToRgbx8888_data();
void convertRgb888ToRgbx8888();
void convertRgb32ToRgb888_data();
void convertRgb32ToRgb888();
@ -74,8 +77,8 @@ void tst_QImageConversion::convertRgb888ToRgb32_data()
// 16 pixels, minimum for the SSSE3 implementation
QTest::newRow("width: 16px; height: 5000px;") << generateImageRgb888(16, 5000);
// 50 pixels, more realistic use case
QTest::newRow("width: 50px; height: 5000px;") << generateImageRgb888(50, 5000);
// 200 pixels, more realistic use case
QTest::newRow("width: 200px; height: 5000px;") << generateImageRgb888(200, 5000);
// 2000 pixels -> typical values for pictures
QTest::newRow("width: 2000px; height: 2000px;") << generateImageRgb888(2000, 2000);
@ -93,6 +96,23 @@ void tst_QImageConversion::convertRgb888ToRgb32()
}
}
void tst_QImageConversion::convertRgb888ToRgbx8888_data()
{
convertRgb888ToRgb32_data();
}
void tst_QImageConversion::convertRgb888ToRgbx8888()
{
QFETCH(QImage, inputImage);
QBENCHMARK {
volatile QImage output = inputImage.convertToFormat(QImage::Format_RGBX8888);
// we need the volatile and the following to make sure the compiler does not do
// anything stupid :)
(void)output;
}
}
void tst_QImageConversion::convertRgb32ToRgb888_data()
{
QTest::addColumn<QImage>("inputImage");