From e95fb04202b3f786037c10942fd4e912644fd7d2 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Tue, 6 Aug 2024 10:51:35 +0200 Subject: [PATCH] QSharedPointer: optimize casts on rvalue shared pointers When we are casting an rvalue QSharedPointer, we do not need to pay the cost for the atomic refcount increment / decrement. Optimize this by adding rvalue overloads that handle this specific case directly. Note that this is arguably a micro optimization since in most cases the cost to create the pointer in the first place is going to dwarf the cost for the atomic increment / decrement. But it starts to matter for situations like `someConstObject.ptrGetter().dynamicCast()` - in the common case the `ptrGetter()` returns by value and the cast can then operate on an rvalue. On my system, the benchmark speaks for itself: ``` ./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 objectCast objectCast_rvalue ********* Start testing of tst_QSharedPointer ********* Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240805), arch unknown PASS : tst_QSharedPointer::initTestCase() PASS : tst_QSharedPointer::objectCast() RESULT : tst_QSharedPointer::objectCast(): 147.05521 CPU cycles per iteration (total: 14,705,522, iterations: 100000) 147.00058 instructions per iteration, 1.000 instr/cycle (total: 14,700,058, iterations: 100000) PASS : tst_QSharedPointer::objectCast_rvalue() RESULT : tst_QSharedPointer::objectCast_rvalue(): 52.00227 CPU cycles per iteration (total: 5,200,227, iterations: 100000) 110.00056 instructions per iteration, 2.115 instr/cycle (total: 11,000,057, iterations: 100000) PASS : tst_QSharedPointer::cleanupTestCase() Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 45ms ********* Finished testing of tst_QSharedPointer ********* ./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 dynamicCast dynamicCast_rvalue ********* Start testing of tst_QSharedPointer ********* Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240802), arch unknown PASS : tst_QSharedPointer::initTestCase() PASS : tst_QSharedPointer::dynamicCast() RESULT : tst_QSharedPointer::dynamicCast(): 148.34457 CPU cycles per iteration (total: 14,834,457, iterations: 100000) 120.00057 instructions per iteration, 0.809 instr/cycle (total: 12,000,058, iterations: 100000) PASS : tst_QSharedPointer::dynamicCast_rvalue() RESULT : tst_QSharedPointer::dynamicCast_rvalue(): 25.00210 CPU cycles per iteration (total: 2,500,211, iterations: 100000) 81.00057 instructions per iteration, 3.240 instr/cycle (total: 8,100,058, iterations: 100000) PASS : tst_QSharedPointer::cleanupTestCase() Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 45ms ********* Finished testing of tst_QSharedPointer ********* ./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 staticCast staticCast_rvalue ********* Start testing of tst_QSharedPointer ********* Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240802), arch unknown PASS : tst_QSharedPointer::initTestCase() PASS : tst_QSharedPointer::staticCast() RESULT : tst_QSharedPointer::staticCast(): 142.95894 CPU cycles per iteration (total: 14,295,894, iterations: 100000) 54.00057 instructions per iteration, 0.378 instr/cycle (total: 5,400,058, iterations: 100000) PASS : tst_QSharedPointer::staticCast_rvalue() RESULT : tst_QSharedPointer::staticCast_rvalue(): 14.00205 CPU cycles per iteration (total: 1,400,205, iterations: 100000) 22.00056 instructions per iteration, 1.571 instr/cycle (total: 2,200,057, iterations: 100000) PASS : tst_QSharedPointer::cleanupTestCase() Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 50ms ********* Finished testing of tst_QSharedPointer ********* ./tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr -perf -perfcounter cycles,instructions -iterations 100000 constCast constCast_rvalue ********* Start testing of tst_QSharedPointer ********* Config: Using QtTest library 6.9.0, Qt 6.9.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 14.2.1 20240802), arch unknown PASS : tst_QSharedPointer::initTestCase() PASS : tst_QSharedPointer::constCast() RESULT : tst_QSharedPointer::constCast(): 142.38115 CPU cycles per iteration (total: 14,238,116, iterations: 100000) 54.00057 instructions per iteration, 0.379 instr/cycle (total: 5,400,058, iterations: 100000) PASS : tst_QSharedPointer::constCast_rvalue() RESULT : tst_QSharedPointer::constCast_rvalue(): 13.00243 CPU cycles per iteration (total: 1,300,243, iterations: 100000) 22.00057 instructions per iteration, 1.692 instr/cycle (total: 2,200,058, iterations: 100000) PASS : tst_QSharedPointer::cleanupTestCase() Totals: 4 passed, 0 failed, 0 skipped, 0 blacklisted, 42ms ********* Finished testing of tst_QSharedPointer ********* ``` [ChangeLog][QtCore][QSharedPointer] Optimized casts on rvalue shared pointers. Change-Id: I7dfb4d92253d6c60286d3903bc7aef66acab5689 Reviewed-by: Thiago Macieira --- src/corelib/tools/qsharedpointer_impl.h | 86 ++++++++++++++++++- .../qsharedpointer/tst_qsharedpointer.cpp | 24 ++++++ .../qsharedpointer/tst_bench_shared_ptr.cpp | 74 ++++++++++++++++ 3 files changed, 180 insertions(+), 4 deletions(-) diff --git a/src/corelib/tools/qsharedpointer_impl.h b/src/corelib/tools/qsharedpointer_impl.h index 9816c6198fc..4a6a2398b0f 100644 --- a/src/corelib/tools/qsharedpointer_impl.h +++ b/src/corelib/tools/qsharedpointer_impl.h @@ -50,13 +50,21 @@ class QVariant; template QSharedPointer qSharedPointerCast(const QSharedPointer &ptr); template +QSharedPointer qSharedPointerCast(QSharedPointer &&ptr); +template QSharedPointer qSharedPointerDynamicCast(const QSharedPointer &ptr); template +QSharedPointer qSharedPointerDynamicCast(QSharedPointer &&ptr); +template QSharedPointer qSharedPointerConstCast(const QSharedPointer &ptr); +template +QSharedPointer qSharedPointerConstCast(QSharedPointer &&ptr); #ifndef QT_NO_QOBJECT template QSharedPointer qSharedPointerObjectCast(const QSharedPointer &ptr); +template +QSharedPointer qSharedPointerObjectCast(QSharedPointer &&ptr); #endif namespace QtPrivate { @@ -67,6 +75,7 @@ namespace QtSharedPointer { template class ExternalRefCount; template QSharedPointer copyAndSetPointer(X * ptr, const QSharedPointer &src); + template QSharedPointer movePointer(X * ptr, QSharedPointer &&src); // used in debug mode to verify the reuse of pointers Q_CORE_EXPORT void internalSafetyCheckAdd(const void *, const volatile void *); @@ -369,29 +378,53 @@ public: { QSharedPointer copy(t, deleter); swap(copy); } template - QSharedPointer staticCast() const + QSharedPointer staticCast() const & { return qSharedPointerCast(*this); } template - QSharedPointer dynamicCast() const + QSharedPointer staticCast() && + { + return qSharedPointerCast(std::move(*this)); + } + + template + QSharedPointer dynamicCast() const & { return qSharedPointerDynamicCast(*this); } template - QSharedPointer constCast() const + QSharedPointer dynamicCast() && + { + return qSharedPointerDynamicCast(std::move(*this)); + } + + template + QSharedPointer constCast() const & { return qSharedPointerConstCast(*this); } + template + QSharedPointer constCast() && + { + return qSharedPointerConstCast(std::move(*this)); + } + #ifndef QT_NO_QOBJECT template - QSharedPointer objectCast() const + QSharedPointer objectCast() const & { return qSharedPointerObjectCast(*this); } + + template + QSharedPointer objectCast() && + { + return qSharedPointerObjectCast(std::move(*this)); + } #endif inline void clear() { QSharedPointer copy; swap(copy); } @@ -519,6 +552,7 @@ private: template friend class QSharedPointer; template friend class QWeakPointer; template friend QSharedPointer QtSharedPointer::copyAndSetPointer(X * ptr, const QSharedPointer &src); + template friend QSharedPointer QtSharedPointer::movePointer(X * ptr, QSharedPointer &&src); void ref() const noexcept { d->weakref.ref(); d->strongref.ref(); } inline void internalSet(Data *o, T *actual) @@ -848,6 +882,16 @@ namespace QtSharedPointer { result.internalSet(src.d, ptr); return result; } + + template + Q_INLINE_TEMPLATE QSharedPointer movePointer(X *ptr, QSharedPointer &&src) + { + QSharedPointer result; + result.d = std::exchange(src.d, nullptr); + result.value.reset(ptr); + src.value.reset(nullptr); + return result; + } } // cast operators @@ -858,6 +902,12 @@ Q_INLINE_TEMPLATE QSharedPointer qSharedPointerCast(const QSharedPointer & return QtSharedPointer::copyAndSetPointer(ptr, src); } template +Q_INLINE_TEMPLATE QSharedPointer qSharedPointerCast(QSharedPointer &&src) +{ + X *ptr = static_cast(src.data()); // if you get an error in this line, the cast is invalid + return QtSharedPointer::movePointer(ptr, std::move(src)); +} +template Q_INLINE_TEMPLATE QSharedPointer qSharedPointerCast(const QWeakPointer &src) { return qSharedPointerCast(src.toStrongRef()); @@ -872,6 +922,14 @@ Q_INLINE_TEMPLATE QSharedPointer qSharedPointerDynamicCast(const QSharedPoint return QtSharedPointer::copyAndSetPointer(ptr, src); } template +Q_INLINE_TEMPLATE QSharedPointer qSharedPointerDynamicCast(QSharedPointer &&src) +{ + X *ptr = dynamic_cast(src.data()); // if you get an error in this line, the cast is invalid + if (!ptr) + return QSharedPointer(); + return QtSharedPointer::movePointer(ptr, std::move(src)); +} +template Q_INLINE_TEMPLATE QSharedPointer qSharedPointerDynamicCast(const QWeakPointer &src) { return qSharedPointerDynamicCast(src.toStrongRef()); @@ -884,6 +942,12 @@ Q_INLINE_TEMPLATE QSharedPointer qSharedPointerConstCast(const QSharedPointer return QtSharedPointer::copyAndSetPointer(ptr, src); } template +Q_INLINE_TEMPLATE QSharedPointer qSharedPointerConstCast(QSharedPointer &&src) +{ + X *ptr = const_cast(src.data()); // if you get an error in this line, the cast is invalid + return QtSharedPointer::movePointer(ptr, std::move(src)); +} +template Q_INLINE_TEMPLATE QSharedPointer qSharedPointerConstCast(const QWeakPointer &src) { return qSharedPointerConstCast(src.toStrongRef()); @@ -906,6 +970,14 @@ Q_INLINE_TEMPLATE QSharedPointer qSharedPointerObjectCast(const QSharedPointe return QtSharedPointer::copyAndSetPointer(ptr, src); } template +Q_INLINE_TEMPLATE QSharedPointer qSharedPointerObjectCast(QSharedPointer &&src) +{ + X *ptr = qobject_cast(src.data()); + if (!ptr) + return QSharedPointer(); + return QtSharedPointer::movePointer(ptr, std::move(src)); +} +template Q_INLINE_TEMPLATE QSharedPointer qSharedPointerObjectCast(const QWeakPointer &src) { return qSharedPointerObjectCast(src.toStrongRef()); @@ -919,6 +991,12 @@ qobject_cast(const QSharedPointer &src) } template inline QSharedPointer::Type> +qobject_cast(QSharedPointer &&src) +{ + return qSharedPointerObjectCast::Type, T>(std::move(src)); +} +template +inline QSharedPointer::Type> qobject_cast(const QWeakPointer &src) { return qSharedPointerObjectCast::Type, T>(src); diff --git a/tests/auto/corelib/tools/qsharedpointer/tst_qsharedpointer.cpp b/tests/auto/corelib/tools/qsharedpointer/tst_qsharedpointer.cpp index 1579d04bdf0..8d80ebb29f8 100644 --- a/tests/auto/corelib/tools/qsharedpointer/tst_qsharedpointer.cpp +++ b/tests/auto/corelib/tools/qsharedpointer/tst_qsharedpointer.cpp @@ -1041,6 +1041,11 @@ void tst_QSharedPointer::objectCast() // again: ptr = qobject_cast >(baseptr); QVERIFY(ptr == data); + + // again: + ptr = qobject_cast(std::move(baseptr)); + QVERIFY(ptr == data); + QVERIFY(!baseptr); } safetyCheck(); @@ -1067,6 +1072,11 @@ void tst_QSharedPointer::objectCast() // again: ptr = qobject_cast >(baseptr); QVERIFY(ptr == data); + + // again: + ptr = qobject_cast(std::move(baseptr)); + QVERIFY(ptr == data); + QVERIFY(!baseptr); } safetyCheck(); @@ -1116,10 +1126,12 @@ void tst_QSharedPointer::objectCastFailureNoLeak() auto ptr = QSharedPointer::create(); auto qptr = QPointer(ptr.data()); auto ptr2 = ptr.objectCast(); + auto ptr3 = std::move(ptr).objectCast(); QVERIFY(ptr); QVERIFY(qptr); QVERIFY(!ptr2); + QVERIFY(!ptr3); ptr.reset(); QVERIFY(!ptr); @@ -1405,6 +1417,12 @@ void tst_QSharedPointer::dynamicCast() } QCOMPARE(int(refCountData(baseptr)->weakref.loadRelaxed()), 1); QCOMPARE(int(refCountData(baseptr)->strongref.loadRelaxed()), 1); + + { + QSharedPointer derivedptr = std::move(baseptr).dynamicCast(); + QCOMPARE(derivedptr.data(), aData); + } + QVERIFY(!baseptr); } void tst_QSharedPointer::dynamicCastDifferentPointers() @@ -1451,6 +1469,12 @@ void tst_QSharedPointer::dynamicCastDifferentPointers() QCOMPARE(otherbaseptr.data(), nakedptr); QCOMPARE(static_cast(otherbaseptr.data()), aData); } + + { + QSharedPointer derivedptr = std::move(baseptr).dynamicCast(); + QCOMPARE(derivedptr.data(), aData); + } + QVERIFY(!baseptr); } void tst_QSharedPointer::dynamicCastVirtualBase() diff --git a/tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr.cpp b/tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr.cpp index 6197863601e..726eea963b2 100644 --- a/tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr.cpp +++ b/tests/benchmarks/corelib/tools/qsharedpointer/tst_bench_shared_ptr.cpp @@ -32,6 +32,14 @@ BOOST_NORETURN void boost::throw_exception(const std::exception &, const boost:: # define ONLY_IF_BOOST(x) QSKIP("This benchmark requires Boost.SharedPtr.") #endif +class SomeQObjectType : public QObject +{ + Q_OBJECT +public: + using QObject::QObject; + ~SomeQObjectType() override = default; +}; + class tst_QSharedPointer : public QObject { Q_OBJECT @@ -104,6 +112,72 @@ private: [[maybe_unused]] auto copy = sp; } } + +private Q_SLOTS: + void constCast() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto copy = source.constCast(); + source = copy.constCast(); + } + } + void constCast_rvalue() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto moved = std::move(source).constCast(); + source = std::move(moved).constCast(); + } + } + void staticCast() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto copy = source.staticCast(); + source = copy.staticCast(); + } + } + void staticCast_rvalue() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto moved = std::move(source).staticCast(); + source = std::move(moved).staticCast(); + } + } + void dynamicCast() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto copy = source.dynamicCast(); + source = copy.dynamicCast(); + } + } + void dynamicCast_rvalue() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto moved = std::move(source).dynamicCast(); + source = std::move(moved).dynamicCast(); + } + } + void objectCast() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto copy = source.objectCast(); + source = copy.objectCast(); + } + } + void objectCast_rvalue() + { + auto source = QSharedPointer::create(); + QBENCHMARK { + auto moved = std::move(source).objectCast(); + source = std::move(moved).objectCast(); + } + } }; QTEST_MAIN(tst_QSharedPointer)