Cleanup convert_ARGB_to_ARGB_PM_inplace_sse2
Changes it to follow standard SIMD patterns so it can use ALIGNMENT_PROLOGUE_16BYTES and SIMD_EPILOGUE helpers. Should also improve performance by using aligned memory access. Change-Id: I14a48b82e3f3de83bd7572aa82bed07f28ad944c Reviewed-by: Erik Verbruggen <erik.verbruggen@qt.io>
This commit is contained in:
parent
0382bb2ab8
commit
506aa694a9
@ -51,51 +51,66 @@ bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionF
|
||||
{
|
||||
Q_ASSERT(data->format == QImage::Format_ARGB32 || data->format == QImage::Format_RGBA8888);
|
||||
|
||||
// extra pixels on each line
|
||||
const int spare = data->width & 3;
|
||||
// width in pixels of the pad at the end of each line
|
||||
const int pad = (data->bytes_per_line >> 2) - data->width;
|
||||
const int iter = data->width >> 2;
|
||||
int height = data->height;
|
||||
const int width = data->width;
|
||||
const int height = data->height;
|
||||
const int bpl = data->bytes_per_line;
|
||||
|
||||
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
const __m128i nullVector = _mm_setzero_si128();
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
|
||||
__m128i *d = reinterpret_cast<__m128i*>(data->data);
|
||||
while (height--) {
|
||||
const __m128i *end = d + iter;
|
||||
|
||||
for (; d != end; ++d) {
|
||||
const __m128i srcVector = _mm_loadu_si128(d);
|
||||
uchar *d = data->data;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int i = 0;
|
||||
quint32 *d32 = reinterpret_cast<quint32 *>(d);
|
||||
ALIGNMENT_PROLOGUE_16BYTES(d, i, width) {
|
||||
const quint32 p = d32[i];
|
||||
if (p <= 0x00ffffff)
|
||||
d32[i] = 0;
|
||||
else if (p < 0xff000000)
|
||||
d32[i] = qPremultiply(p);
|
||||
}
|
||||
__m128i *d128 = reinterpret_cast<__m128i *>(d32 + i);
|
||||
for (; i < (width - 3); i += 4) {
|
||||
const __m128i srcVector = _mm_load_si128(d128);
|
||||
#ifdef __SSE4_1__
|
||||
if (_mm_testc_si128(srcVector, alphaMask)) {
|
||||
// opaque, data is unchanged
|
||||
} else if (_mm_testz_si128(srcVector, alphaMask)) {
|
||||
// fully transparent
|
||||
_mm_store_si128(d128, nullVector);
|
||||
} else {
|
||||
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask);
|
||||
#else
|
||||
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask);
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) {
|
||||
// opaque, data is unchanged
|
||||
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) == 0xffff) {
|
||||
// fully transparent
|
||||
_mm_storeu_si128(d, nullVector);
|
||||
_mm_store_si128(d128, nullVector);
|
||||
} else {
|
||||
#endif
|
||||
__m128i alphaChannel = _mm_srli_epi32(srcVector, 24);
|
||||
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16));
|
||||
|
||||
__m128i result;
|
||||
BYTE_MUL_SSE2(result, srcVector, alphaChannel, colorMask, half);
|
||||
result = _mm_or_si128(_mm_andnot_si128(alphaMask, result), srcVectorAlpha);
|
||||
_mm_storeu_si128(d, result);
|
||||
_mm_store_si128(d128, result);
|
||||
}
|
||||
d128++;
|
||||
}
|
||||
|
||||
QRgb *p = reinterpret_cast<QRgb*>(d);
|
||||
QRgb *pe = p+spare;
|
||||
for (; p != pe; ++p) {
|
||||
if (*p < 0x00ffffff)
|
||||
*p = 0;
|
||||
else if (*p < 0xff000000)
|
||||
*p = qPremultiply(*p);
|
||||
SIMD_EPILOGUE(i, width, 3) {
|
||||
const quint32 p = d32[i];
|
||||
if (p <= 0x00ffffff)
|
||||
d32[i] = 0;
|
||||
else if (p < 0xff000000)
|
||||
d32[i] = qPremultiply(p);
|
||||
}
|
||||
|
||||
d = reinterpret_cast<__m128i*>(p+pad);
|
||||
d += bpl;
|
||||
}
|
||||
|
||||
if (data->format == QImage::Format_ARGB32)
|
||||
|
Loading…
x
Reference in New Issue
Block a user