Remove unfinished NEON version of vectorized rotate sampling

A vectorized codepath for rotating transforms were added in 5.3.0, but
was only properly tested for SSE2. The NEON version remains unfinished.
Since it was never working, this patch reverts the NEON version.

Task-number: QTBUG-39445
Change-Id: Ifbce0e03781d217ad976c6b18ac88381055cba66
Reviewed-by: Gunnar Sletta <gunnar.sletta@jollamobile.com>
This commit is contained in:
Allan Sandfeld Jensen 2014-06-05 11:18:47 +02:00 committed by The Qt Project
parent a1c5198387
commit 3441738d5f

View File

@ -1798,71 +1798,6 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
}
fx = v_fx.i[0];
fy = v_fy.i[0];
#elif defined(__ARM_NEON__)
BILINEAR_ROTATE_BOUNDS_PROLOG
const int16x8_t colorMask = vdupq_n_s16(0x00ff);
const int16x8_t invColorMask = vmvnq_s16(colorMask);
const int16x8_t v_256 = vdupq_n_s16(256);
int32x4_t v_fdx = vdupq_n_s32(fdx*4);
int32x4_t v_fdy = vdupq_n_s32(fdy*4);
const uchar *textureData = data->texture.imageData;
const int bytesPerLine = data->texture.bytesPerLine;
union Vect_buffer { int32x4_t vect; quint32 i[4]; };
Vect_buffer v_fx, v_fy;
for (int i = 0; i < 4; i++) {
v_fx.i[i] = fx;
v_fy.i[i] = fy;
fx += fdx;
fy += fdy;
}
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
while (b < boundedEnd) {
if (fdx > 0 && (v_fx.i[3] >> 16) >= image_x2)
break;
if (fdx < 0 && (v_fx.i[3] >> 16) < image_x1)
break;
if (fdy > 0 && (v_fy.i[3] >> 16) >= image_y2)
break;
if (fdy < 0 && (v_fy.i[3] >> 16) < image_y1)
break;
Vect_buffer tl, tr, bl, br;
Vect_buffer v_fx_shifted, v_fy_shifted;
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
v_fy_shifted.vect = vshrq_n_s32(v_fy.vect, 16);
for (int i = 0; i < 4; i++) {
const int x1 = v_fx_shifted.i[i];
const int y1 = v_fy_shifted.i[i];
const uchar *sl = textureData + bytesPerLine * y1;
const uint *s1 = (const uint *)sl;
const uint *s2 = (const uint *)(sl + bytesPerLine);
tl.i[i] = s1[x1];
tr.i[i] = s1[x1+1];
bl.i[i] = s2[x1];
br.i[i] = s2[x1+1];
}
int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
int32x4_t v_disty = vshrq_n_s32(vandq_s32(v_fy.vect, v_ffff_mask), 12);
v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b);
b+=4;
v_fx.vect = vaddq_s32(v_fx.vect, v_fdx);
v_fy.vect = vaddq_s32(v_fy.vect, v_fdy);
}
fx = v_fx.i[0];
fy = v_fy.i[0];
#endif
}