diff --git a/src/gui/painting/qcolortransform.cpp b/src/gui/painting/qcolortransform.cpp index f54dac9f268..52fbebfa5db 100644 --- a/src/gui/painting/qcolortransform.cpp +++ b/src/gui/painting/qcolortransform.cpp @@ -448,6 +448,12 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); constexpr bool isARGB = isArgb(); + const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution); for (qsizetype i = 0; i < len; ++i) { __m128i v; loadP(src[i], v); @@ -468,12 +474,19 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); - vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); + if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) { + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); + vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); - _mm_storeu_ps(&buffer[i].x, vf); + _mm_storeu_ps(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } @@ -483,7 +496,11 @@ void loadPremultiplied(QColorVector *buffer, const QRgbaFloat32 *s const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256)); const __m128 vZero = _mm_set1_ps(0.0f); - const __m128 vOne = _mm_set1_ps(1.0f); + const float factor = 1.f / float(QColorTrcLut::Resolution); + const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor, + INFINITY); for (qsizetype i = 0; i < len; ++i) { __m128 vf = _mm_loadu_ps(&src[i].r); // Approximate 1/a: @@ -499,7 +516,7 @@ void loadPremultiplied(QColorVector *buffer, const QRgbaFloat32 *s // LUT const __m128 under = _mm_cmplt_ps(vf, vZero); - const __m128 over = _mm_cmpgt_ps(vf, vOne); + const __m128 over = _mm_cmpgt_ps(vf, vRangeMax); if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) { // Within gamut __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); @@ -556,17 +573,30 @@ void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len { constexpr bool isARGB = isArgb(); const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); + const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution); for (qsizetype i = 0; i < len; ++i) { __m128i v; loadPU(src[i], v); const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); - __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); - _mm_storeu_ps(&buffer[i].x, vf); + if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) { + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); + __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); + _mm_storeu_ps(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } @@ -576,11 +606,15 @@ void loadUnpremultiplied(QColorVector *buffer, const QRgbaFloat32 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); const __m128 vZero = _mm_set1_ps(0.0f); - const __m128 vOne = _mm_set1_ps(1.0f); + const float factor = 1.f / float(QColorTrcLut::Resolution); + const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor, + INFINITY); for (qsizetype i = 0; i < len; ++i) { __m128 vf = _mm_loadu_ps(&src[i].r); const __m128 under = _mm_cmplt_ps(vf, vZero); - const __m128 over = _mm_cmpgt_ps(vf, vOne); + const __m128 over = _mm_cmpgt_ps(vf, vRangeMax); if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) { // Within gamut __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); @@ -618,11 +652,28 @@ inline void loadP(const QRgba64 &p, uint32x4_t &v) v = vmovl_u16(vreinterpret_u16_u64(vld1_u64(reinterpret_cast(&p)))); } +static inline bool test_all_zero(uint32x4_t p) +{ +#if defined(Q_PROCESSOR_ARM_64) + return vaddvq_u32(p) == 0; +#else + const uint32x2_t tmp = vpadd_u32(vget_low_u32(p), vget_high_u32(p)); + return vget_lane_u32(vpadd_u32(tmp, tmp), 0) == 0; +#endif +} + template static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) { constexpr bool isARGB = isArgb(); const float iFF00 = 1.0f / (255 * 256); + const uint32x4_t vRangeMax = { + isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution }; for (qsizetype i = 0; i < len; ++i) { uint32x4_t v; loadP(src[i], v); @@ -648,12 +699,19 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0); const int gidx = vgetq_lane_u32(v, 1); const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); - vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); + if (test_all_zero(vcgtq_u32(v, vRangeMax))) { + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); + vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); - vst1q_f32(&buffer[i].x, vf); + vst1q_f32(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } @@ -682,17 +740,31 @@ void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len { constexpr bool isARGB = isArgb(); const float iFF00 = 1.0f / (255 * 256); + const uint32x4_t vRangeMax = { + isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution }; for (qsizetype i = 0; i < len; ++i) { uint32x4_t v; loadPU(src[i], v); const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0); const int gidx = vgetq_lane_u32(v, 1); const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); - float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); - vst1q_f32(&buffer[i].x, vf); + if (test_all_zero(vcgtq_u32(v, vRangeMax))) { + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); + float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); + vst1q_f32(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } #else diff --git a/src/gui/painting/qcolortrclut.cpp b/src/gui/painting/qcolortrclut.cpp index 1357aa41a69..798f6a8b423 100644 --- a/src/gui/painting/qcolortrclut.cpp +++ b/src/gui/painting/qcolortrclut.cpp @@ -37,8 +37,10 @@ void QColorTrcLut::setFromGamma(float gamma, Direction dir) if (dir & ToLinear) { if (!m_toLinear) m_toLinear.reset(new ushort[Resolution + 1]); - for (int i = 0; i <= Resolution; ++i) - m_toLinear[i] = ushort(qRound(qBound(0.f, qPow(i * iRes, gamma), 1.f) * (255 * 256))); + for (int i = 0; i <= Resolution; ++i) { + const int val = qRound(qPow(i * iRes, gamma) * (255 * 256)); + m_toLinear[i] = qBound(0, val, 65280); + } } if (dir & FromLinear) { @@ -56,8 +58,12 @@ void QColorTrcLut::setFromTransferFunction(const QColorTransferFunction &fun, Di if (dir & ToLinear) { if (!m_toLinear) m_toLinear.reset(new ushort[Resolution + 1]); - for (int i = 0; i <= Resolution; ++i) - m_toLinear[i] = ushort(qRound(qBound(0.f, fun.apply(i * iRes), 1.f) * (255 * 256))); + for (int i = 0; i <= Resolution; ++i) { + const int val = qRound(fun.apply(i * iRes)* (255 * 256)); + if (val > 65280 && i < m_unclampedToLinear) + m_unclampedToLinear = i; + m_toLinear[i] = qBound(0, val, 65280); + } } if (dir & FromLinear) { @@ -75,8 +81,12 @@ void QColorTrcLut::setFromTransferGenericFunction(const QColorTransferGenericFun if (dir & ToLinear) { if (!m_toLinear) m_toLinear.reset(new ushort[Resolution + 1]); - for (int i = 0; i <= Resolution; ++i) - m_toLinear[i] = ushort(qRound(qBound(0.f, fun.apply(i * iRes), 1.f) * (255 * 256))); + for (int i = 0; i <= Resolution; ++i) { + const int val = qRound(fun.apply(i * iRes) * (255 * 256)); + if (val > 65280 && i < m_unclampedToLinear) + m_unclampedToLinear = i; + m_toLinear[i] = qBound(0, val, 65280); + } } if (dir & FromLinear) { diff --git a/src/gui/painting/qcolortrclut_p.h b/src/gui/painting/qcolortrclut_p.h index 15f7348836c..220b504a83e 100644 --- a/src/gui/painting/qcolortrclut_p.h +++ b/src/gui/painting/qcolortrclut_p.h @@ -206,6 +206,7 @@ public: // and to keep the tables small enough to fit in most inner caches. std::unique_ptr m_toLinear; // [0->Resolution] -> [0-65280] std::unique_ptr m_fromLinear; // [0->Resolution] -> [0-65280] + ushort m_unclampedToLinear = Resolution; private: QColorTrcLut() = default; diff --git a/tests/auto/gui/painting/qcolorspace/tst_qcolorspace.cpp b/tests/auto/gui/painting/qcolorspace/tst_qcolorspace.cpp index 04887a3350a..e23b3d21a11 100644 --- a/tests/auto/gui/painting/qcolorspace/tst_qcolorspace.cpp +++ b/tests/auto/gui/painting/qcolorspace/tst_qcolorspace.cpp @@ -1068,9 +1068,7 @@ void tst_QColorSpace::scaleAlphaValue() void tst_QColorSpace::hdrColorSpaces() { - QColorSpace bt2020linear = QColorSpace::Bt2020; - bt2020linear.setTransferFunction(QColorSpace::TransferFunction::Linear); - + QColorSpace bt2020linear(QColorSpace::Primaries::Bt2020, QColorSpace::TransferFunction::Linear); QColorTransform pqToLinear = QColorSpace(QColorSpace::Bt2100Pq).transformationToColorSpace(bt2020linear); QColorTransform hlgToLinear = QColorSpace(QColorSpace::Bt2100Hlg).transformationToColorSpace(bt2020linear); @@ -1085,6 +1083,47 @@ void tst_QColorSpace::hdrColorSpaces() QCOMPARE(pqToLinear.map(maxWhite).redF(), 64.f); QCOMPARE(pqToLinear.map(maxWhite).greenF(), 64.f); QCOMPARE(pqToLinear.map(maxWhite).blueF(), 64.f); + + { + QImage image(1, 1, QImage::Format_RGBA32FPx4); + image.setPixel(0, 0, qRgba(255, 255, 255, 255)); + image.setColorSpace(QColorSpace::Bt2100Pq); + QImage image2 = image.convertedToColorSpace(bt2020linear); + QCOMPARE(image2.pixelColor(0, 0).redF(), 64.f); + image.setColorSpace(QColorSpace::Bt2100Hlg); + image2 = image.convertedToColorSpace(bt2020linear); + QCOMPARE(image2.pixelColor(0, 0).redF(), 12.f); + } + { + QImage image(1, 1, QImage::Format_RGBA32FPx4_Premultiplied); + image.setPixel(0, 0, qRgba(255, 255, 255, 255)); + image.setColorSpace(QColorSpace::Bt2100Pq); + QImage image2 = image.convertedToColorSpace(bt2020linear); + QCOMPARE(image2.pixelColor(0, 0).redF(), 64.f); + image.setColorSpace(QColorSpace::Bt2100Hlg); + image2 = image.convertedToColorSpace(bt2020linear); + QCOMPARE(image2.pixelColor(0, 0).redF(), 12.f); + } + { + QImage image(1, 1, QImage::Format_ARGB32); + image.setPixel(0, 0, qRgba(255, 255, 255, 255)); + image.setColorSpace(QColorSpace::Bt2100Pq); + QImage image2 = image.convertedToColorSpace(bt2020linear, QImage::Format_RGBA32FPx4); + QCOMPARE(image2.pixelColor(0, 0).redF(), 64.f); + image.setColorSpace(QColorSpace::Bt2100Hlg); + image2 = image.convertedToColorSpace(bt2020linear, QImage::Format_RGBA32FPx4); + QCOMPARE(image2.pixelColor(0, 0).redF(), 12.f); + } + { + QImage image(1, 1, QImage::Format_ARGB32_Premultiplied); + image.setPixel(0, 0, qRgba(255, 255, 255, 255)); + image.setColorSpace(QColorSpace::Bt2100Pq); + QImage image2 = image.convertedToColorSpace(bt2020linear, QImage::Format_RGBA32FPx4); + QCOMPARE(image2.pixelColor(0, 0).redF(), 64.f); + image.setColorSpace(QColorSpace::Bt2100Hlg); + image2 = image.convertedToColorSpace(bt2020linear, QImage::Format_RGBA32FPx4); + QCOMPARE(image2.pixelColor(0, 0).redF(), 12.f); + } } QTEST_MAIN(tst_QColorSpace)