QSharedPointer: optimize casts on rvalue shared pointers

When we are casting an rvalue QSharedPointer, we do not need to
pay the cost for the atomic refcount increment / decrement. Optimize
this by adding rvalue overloads that handle this specific case
directly.

Note that this is arguably a micro optimization since in most cases
the cost to create the pointer in the first place is going to dwarf
the cost for the atomic increment / decrement. But it starts to matter
for situations like `someConstObject.ptrGetter().dynamicCast()` - in
the common case the `ptrGetter()` returns by value and the cast can
then operate on an rvalue.

On my system, the benchmark speaks for itself:

```
./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 objectCast objectCast_rvalue
********* Start testing of tst_QSharedPointer *********
Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240805), arch unknown
PASS   : tst_QSharedPointer::initTestCase()
PASS   : tst_QSharedPointer::objectCast()
RESULT : tst_QSharedPointer::objectCast():
     147.05521 CPU cycles per iteration (total: 14,705,522, iterations: 100000)
     147.00058 instructions per iteration, 1.000 instr/cycle (total: 14,700,058, iterations: 100000)
PASS   : tst_QSharedPointer::objectCast_rvalue()
RESULT : tst_QSharedPointer::objectCast_rvalue():
     52.00227 CPU cycles per iteration (total: 5,200,227, iterations: 100000)
     110.00056 instructions per iteration, 2.115 instr/cycle (total: 11,000,057, iterations: 100000)
PASS   : tst_QSharedPointer::cleanupTestCase()
Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 45ms
********* Finished testing of tst_QSharedPointer *********

./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 dynamicCast dynamicCast_rvalue
********* Start testing of tst_QSharedPointer *********
Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240802), arch unknown
PASS   : tst_QSharedPointer::initTestCase()
PASS   : tst_QSharedPointer::dynamicCast()
RESULT : tst_QSharedPointer::dynamicCast():
     148.34457 CPU cycles per iteration (total: 14,834,457, iterations: 100000)
     120.00057 instructions per iteration, 0.809 instr/cycle (total: 12,000,058, iterations: 100000)
PASS   : tst_QSharedPointer::dynamicCast_rvalue()
RESULT : tst_QSharedPointer::dynamicCast_rvalue():
     25.00210 CPU cycles per iteration (total: 2,500,211, iterations: 100000)
     81.00057 instructions per iteration, 3.240 instr/cycle (total: 8,100,058, iterations: 100000)
PASS   : tst_QSharedPointer::cleanupTestCase()
Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 45ms
********* Finished testing of tst_QSharedPointer *********

./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 staticCast staticCast_rvalue
********* Start testing of tst_QSharedPointer *********
Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240802), arch unknown
PASS   : tst_QSharedPointer::initTestCase()
PASS   : tst_QSharedPointer::staticCast()
RESULT : tst_QSharedPointer::staticCast():
     142.95894 CPU cycles per iteration (total: 14,295,894, iterations: 100000)
     54.00057 instructions per iteration, 0.378 instr/cycle (total: 5,400,058, iterations: 100000)
PASS   : tst_QSharedPointer::staticCast_rvalue()
RESULT : tst_QSharedPointer::staticCast_rvalue():
     14.00205 CPU cycles per iteration (total: 1,400,205, iterations: 100000)
     22.00056 instructions per iteration, 1.571 instr/cycle (total: 2,200,057, iterations: 100000)
PASS   : tst_QSharedPointer::cleanupTestCase()
Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 50ms
********* Finished testing of tst_QSharedPointer *********

./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 constCast constCast_rvalue
********* Start testing of tst_QSharedPointer *********
Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240802), arch unknown
PASS   : tst_QSharedPointer::initTestCase()
PASS   : tst_QSharedPointer::constCast()
RESULT : tst_QSharedPointer::constCast():
     142.38115 CPU cycles per iteration (total: 14,238,116, iterations: 100000)
     54.00057 instructions per iteration, 0.379 instr/cycle (total: 5,400,058, iterations: 100000)
PASS   : tst_QSharedPointer::constCast_rvalue()
RESULT : tst_QSharedPointer::constCast_rvalue():
     13.00243 CPU cycles per iteration (total: 1,300,243, iterations: 100000)
     22.00057 instructions per iteration, 1.692 instr/cycle (total: 2,200,058, iterations: 100000)
PASS   : tst_QSharedPointer::cleanupTestCase()
Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 42ms
********* Finished testing of tst_QSharedPointer *********
```

[ChangeLog][QtCore][QSharedPointer] Optimized casts on rvalue shared
pointers.

Change-Id: I7dfb4d92253d6c60286d3903bc7aef66acab5689
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Milian Wolff 2024-08-06 10:51:35 +02:00
parent 7a65abb50e
commit e95fb04202
3 changed files with 180 additions and 4 deletions

View File

@ -50,13 +50,21 @@ class QVariant;
template <class X, class T>
QSharedPointer<X> qSharedPointerCast(const QSharedPointer<T> &ptr);
template <class X, class T>
QSharedPointer<X> qSharedPointerCast(QSharedPointer<T> &&ptr);
template <class X, class T>
QSharedPointer<X> qSharedPointerDynamicCast(const QSharedPointer<T> &ptr);
template <class X, class T>
QSharedPointer<X> qSharedPointerDynamicCast(QSharedPointer<T> &&ptr);
template <class X, class T>
QSharedPointer<X> qSharedPointerConstCast(const QSharedPointer<T> &ptr);
template <class X, class T>
QSharedPointer<X> qSharedPointerConstCast(QSharedPointer<T> &&ptr);
#ifndef QT_NO_QOBJECT
template <class X, class T>
QSharedPointer<X> qSharedPointerObjectCast(const QSharedPointer<T> &ptr);
template <class X, class T>
QSharedPointer<X> qSharedPointerObjectCast(QSharedPointer<T> &&ptr);
#endif
namespace QtPrivate {
@ -67,6 +75,7 @@ namespace QtSharedPointer {
template <class T> class ExternalRefCount;
template <class X, class Y> QSharedPointer<X> copyAndSetPointer(X * ptr, const QSharedPointer<Y> &src);
template <class X, class Y> QSharedPointer<X> movePointer(X * ptr, QSharedPointer<Y> &&src);
// used in debug mode to verify the reuse of pointers
Q_CORE_EXPORT void internalSafetyCheckAdd(const void *, const volatile void *);
@ -369,29 +378,53 @@ public:
{ QSharedPointer copy(t, deleter); swap(copy); }
template <class X>
QSharedPointer<X> staticCast() const
QSharedPointer<X> staticCast() const &
{
return qSharedPointerCast<X, T>(*this);
}
template <class X>
QSharedPointer<X> dynamicCast() const
QSharedPointer<X> staticCast() &&
{
return qSharedPointerCast<X, T>(std::move(*this));
}
template <class X>
QSharedPointer<X> dynamicCast() const &
{
return qSharedPointerDynamicCast<X, T>(*this);
}
template <class X>
QSharedPointer<X> constCast() const
QSharedPointer<X> dynamicCast() &&
{
return qSharedPointerDynamicCast<X, T>(std::move(*this));
}
template <class X>
QSharedPointer<X> constCast() const &
{
return qSharedPointerConstCast<X, T>(*this);
}
template <class X>
QSharedPointer<X> constCast() &&
{
return qSharedPointerConstCast<X, T>(std::move(*this));
}
#ifndef QT_NO_QOBJECT
template <class X>
QSharedPointer<X> objectCast() const
QSharedPointer<X> objectCast() const &
{
return qSharedPointerObjectCast<X, T>(*this);
}
template <class X>
QSharedPointer<X> objectCast() &&
{
return qSharedPointerObjectCast<X, T>(std::move(*this));
}
#endif
inline void clear() { QSharedPointer copy; swap(copy); }
@ -519,6 +552,7 @@ private:
template <class X> friend class QSharedPointer;
template <class X> friend class QWeakPointer;
template <class X, class Y> friend QSharedPointer<X> QtSharedPointer::copyAndSetPointer(X * ptr, const QSharedPointer<Y> &src);
template <class X, class Y> friend QSharedPointer<X> QtSharedPointer::movePointer(X * ptr, QSharedPointer<Y> &&src);
void ref() const noexcept { d->weakref.ref(); d->strongref.ref(); }
inline void internalSet(Data *o, T *actual)
@ -848,6 +882,16 @@ namespace QtSharedPointer {
result.internalSet(src.d, ptr);
return result;
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> movePointer(X *ptr, QSharedPointer<T> &&src)
{
QSharedPointer<X> result;
result.d = std::exchange(src.d, nullptr);
result.value.reset(ptr);
src.value.reset(nullptr);
return result;
}
}
// cast operators
@ -858,6 +902,12 @@ Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerCast(const QSharedPointer<T> &
return QtSharedPointer::copyAndSetPointer(ptr, src);
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerCast(QSharedPointer<T> &&src)
{
X *ptr = static_cast<X *>(src.data()); // if you get an error in this line, the cast is invalid
return QtSharedPointer::movePointer(ptr, std::move(src));
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerCast(const QWeakPointer<T> &src)
{
return qSharedPointerCast<X, T>(src.toStrongRef());
@ -872,6 +922,14 @@ Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerDynamicCast(const QSharedPoint
return QtSharedPointer::copyAndSetPointer(ptr, src);
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerDynamicCast(QSharedPointer<T> &&src)
{
X *ptr = dynamic_cast<X *>(src.data()); // if you get an error in this line, the cast is invalid
if (!ptr)
return QSharedPointer<X>();
return QtSharedPointer::movePointer(ptr, std::move(src));
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerDynamicCast(const QWeakPointer<T> &src)
{
return qSharedPointerDynamicCast<X, T>(src.toStrongRef());
@ -884,6 +942,12 @@ Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerConstCast(const QSharedPointer
return QtSharedPointer::copyAndSetPointer(ptr, src);
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerConstCast(QSharedPointer<T> &&src)
{
X *ptr = const_cast<X *>(src.data()); // if you get an error in this line, the cast is invalid
return QtSharedPointer::movePointer(ptr, std::move(src));
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerConstCast(const QWeakPointer<T> &src)
{
return qSharedPointerConstCast<X, T>(src.toStrongRef());
@ -906,6 +970,14 @@ Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerObjectCast(const QSharedPointe
return QtSharedPointer::copyAndSetPointer(ptr, src);
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerObjectCast(QSharedPointer<T> &&src)
{
X *ptr = qobject_cast<X *>(src.data());
if (!ptr)
return QSharedPointer<X>();
return QtSharedPointer::movePointer(ptr, std::move(src));
}
template <class X, class T>
Q_INLINE_TEMPLATE QSharedPointer<X> qSharedPointerObjectCast(const QWeakPointer<T> &src)
{
return qSharedPointerObjectCast<X>(src.toStrongRef());
@ -919,6 +991,12 @@ qobject_cast(const QSharedPointer<T> &src)
}
template <class X, class T>
inline QSharedPointer<typename QtSharedPointer::RemovePointer<X>::Type>
qobject_cast(QSharedPointer<T> &&src)
{
return qSharedPointerObjectCast<typename QtSharedPointer::RemovePointer<X>::Type, T>(std::move(src));
}
template <class X, class T>
inline QSharedPointer<typename QtSharedPointer::RemovePointer<X>::Type>
qobject_cast(const QWeakPointer<T> &src)
{
return qSharedPointerObjectCast<typename QtSharedPointer::RemovePointer<X>::Type, T>(src);

View File

@ -1041,6 +1041,11 @@ void tst_QSharedPointer::objectCast()
// again:
ptr = qobject_cast<QSharedPointer<OtherObject> >(baseptr);
QVERIFY(ptr == data);
// again:
ptr = qobject_cast<OtherObject *>(std::move(baseptr));
QVERIFY(ptr == data);
QVERIFY(!baseptr);
}
safetyCheck();
@ -1067,6 +1072,11 @@ void tst_QSharedPointer::objectCast()
// again:
ptr = qobject_cast<QSharedPointer<const OtherObject> >(baseptr);
QVERIFY(ptr == data);
// again:
ptr = qobject_cast<const OtherObject *>(std::move(baseptr));
QVERIFY(ptr == data);
QVERIFY(!baseptr);
}
safetyCheck();
@ -1116,10 +1126,12 @@ void tst_QSharedPointer::objectCastFailureNoLeak()
auto ptr = QSharedPointer<QObject>::create();
auto qptr = QPointer(ptr.data());
auto ptr2 = ptr.objectCast<tst_QSharedPointer>();
auto ptr3 = std::move(ptr).objectCast<tst_QSharedPointer>();
QVERIFY(ptr);
QVERIFY(qptr);
QVERIFY(!ptr2);
QVERIFY(!ptr3);
ptr.reset();
QVERIFY(!ptr);
@ -1405,6 +1417,12 @@ void tst_QSharedPointer::dynamicCast()
}
QCOMPARE(int(refCountData(baseptr)->weakref.loadRelaxed()), 1);
QCOMPARE(int(refCountData(baseptr)->strongref.loadRelaxed()), 1);
{
QSharedPointer<DerivedData> derivedptr = std::move(baseptr).dynamicCast<DerivedData>();
QCOMPARE(derivedptr.data(), aData);
}
QVERIFY(!baseptr);
}
void tst_QSharedPointer::dynamicCastDifferentPointers()
@ -1451,6 +1469,12 @@ void tst_QSharedPointer::dynamicCastDifferentPointers()
QCOMPARE(otherbaseptr.data(), nakedptr);
QCOMPARE(static_cast<DiffPtrDerivedData*>(otherbaseptr.data()), aData);
}
{
QSharedPointer<DiffPtrDerivedData> derivedptr = std::move(baseptr).dynamicCast<DiffPtrDerivedData>();
QCOMPARE(derivedptr.data(), aData);
}
QVERIFY(!baseptr);
}
void tst_QSharedPointer::dynamicCastVirtualBase()

View File

@ -32,6 +32,14 @@ BOOST_NORETURN void boost::throw_exception(const std::exception &, const boost::
# define ONLY_IF_BOOST(x) QSKIP("This benchmark requires Boost.SharedPtr.")
#endif
class SomeQObjectType : public QObject
{
Q_OBJECT
public:
using QObject::QObject;
~SomeQObjectType() override = default;
};
class tst_QSharedPointer : public QObject
{
Q_OBJECT
@ -104,6 +112,72 @@ private:
[[maybe_unused]] auto copy = sp;
}
}
private Q_SLOTS:
void constCast()
{
auto source = QSharedPointer<const QObject>::create();
QBENCHMARK {
auto copy = source.constCast<QObject>();
source = copy.constCast<const QObject>();
}
}
void constCast_rvalue()
{
auto source = QSharedPointer<const QObject>::create();
QBENCHMARK {
auto moved = std::move(source).constCast<QObject>();
source = std::move(moved).constCast<const QObject>();
}
}
void staticCast()
{
auto source = QSharedPointer<SomeQObjectType>::create();
QBENCHMARK {
auto copy = source.staticCast<QObject>();
source = copy.staticCast<SomeQObjectType>();
}
}
void staticCast_rvalue()
{
auto source = QSharedPointer<SomeQObjectType>::create();
QBENCHMARK {
auto moved = std::move(source).staticCast<QObject>();
source = std::move(moved).staticCast<SomeQObjectType>();
}
}
void dynamicCast()
{
auto source = QSharedPointer<SomeQObjectType>::create();
QBENCHMARK {
auto copy = source.dynamicCast<QObject>();
source = copy.dynamicCast<SomeQObjectType>();
}
}
void dynamicCast_rvalue()
{
auto source = QSharedPointer<SomeQObjectType>::create();
QBENCHMARK {
auto moved = std::move(source).dynamicCast<QObject>();
source = std::move(moved).dynamicCast<SomeQObjectType>();
}
}
void objectCast()
{
auto source = QSharedPointer<SomeQObjectType>::create();
QBENCHMARK {
auto copy = source.objectCast<QObject>();
source = copy.objectCast<SomeQObjectType>();
}
}
void objectCast_rvalue()
{
auto source = QSharedPointer<SomeQObjectType>::create();
QBENCHMARK {
auto moved = std::move(source).objectCast<QObject>();
source = std::move(moved).objectCast<SomeQObjectType>();
}
}
};
QTEST_MAIN(tst_QSharedPointer)