Revert "Cleanup QSemaphore and make it always 64bit large"

This reverts commit ff69227a49119c00f703cf89c9d72db7eeeeaa66. Reason for revert: 64-bit atomics on 32-bit systems are often (but not always) worse than the 32-bit semaphore as it was implemented. Plus the High32 and Low32 functions are returning the same thing, forgetting the endianness check. Change-Id: I5d5ade6e9bc7086600ff2302546385151e32142b Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
2020-09-06 21:58:36 +00:00 · 2020-09-06 21:58:36 +00:00 · 475464906c
commit 475464906c
parent e33d40a884
2 changed files with 101 additions and 57 deletions
--- a/src/corelib/thread/qsemaphore.cpp
+++ b/src/corelib/thread/qsemaphore.cpp
@ -126,19 +126,29 @@ using namespace QtFutex;
    unspecified, but it's likely single-token threads will get woken up first.
 */
-static const quint64 futexNeedsWakeAllBit =
+#if defined(FUTEX_OP) && QT_POINTER_SIZE > 4
-        Q_UINT64_C(1) << (sizeof(quint64) * CHAR_BIT - 1);
+static constexpr bool futexHasWaiterCount = true;
 #else
 static constexpr bool futexHasWaiterCount = false;
 #endif
-static int futexAvailCounter(quint64 v)
+static const quintptr futexNeedsWakeAllBit =
        Q_UINT64_C(1) << (sizeof(quintptr) * CHAR_BIT - 1);
 static int futexAvailCounter(quintptr v)
 {
    // the low 31 bits
    if (futexHasWaiterCount) {
        // the high bit of the low word isn't used
        Q_ASSERT((v & 0x80000000U) == 0);
        // so we can be a little faster
        return int(unsigned(v));
    }
    return int(v & 0x7fffffffU);
 }
-static bool futexNeedsWake(quint64 v)
+static bool futexNeedsWake(quintptr v)
 {
    // If we're counting waiters, the number of waiters is stored in the low 31
    // bits of the high word (that is, bits 32-62). If we're not, then we use
@ -147,22 +157,26 @@ static bool futexNeedsWake(quint64 v)
    return v >> 31;
 }
-static QBasicAtomicInteger<quint32> *futexLow32(QBasicAtomicInteger<quint64> *ptr)
+static QBasicAtomicInteger<quint32> *futexLow32(QBasicAtomicInteger<quintptr> *ptr)
 {
    auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr);
 #if Q_BYTE_ORDER == Q_BIG_ENDIAN && QT_POINTER_SIZE > 4
    ++result;
 #endif
    return result;
 }
-static QBasicAtomicInteger<quint32> *futexHigh32(QBasicAtomicInteger<quint64> *ptr)
+static QBasicAtomicInteger<quint32> *futexHigh32(QBasicAtomicInteger<quintptr> *ptr)
 {
    auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr);
 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN && QT_POINTER_SIZE > 4
    ++result;
 #endif
    return result;
 }
 template <bool IsTimed> bool
-futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue, quint64 nn, int timeout)
+futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quintptr> &u, quintptr curValue, quintptr nn, int timeout)
 {
    QDeadlineTimer timer(IsTimed ? QDeadlineTimer(timeout) : QDeadlineTimer());
    qint64 remainingTime = timeout * Q_INT64_C(1000) * 1000;
@ -174,7 +188,7 @@ futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue,
    forever {
        if (futexAvailCounter(curValue) >= n) {
            // try to acquire
-            quint64 newValue = curValue - nn;
+            quintptr newValue = curValue - nn;
            if (u.testAndSetOrdered(curValue, newValue, curValue))
                return true;        // succeeded!
            continue;
@ -187,11 +201,14 @@ futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue,
        // indicate we're waiting
 start_wait:
        auto ptr = futexLow32(&u);
-        if (n > 1) {
+        if (n > 1 || !futexHasWaiterCount) {
            u.fetchAndOrRelaxed(futexNeedsWakeAllBit);
            curValue |= futexNeedsWakeAllBit;
            if (n > 1 && futexHasWaiterCount) {
                ptr = futexHigh32(&u);
-            curValue >>= 32;
+                //curValue >>= 32;  // but this is UB in 32-bit, so roundabout:
                curValue = quint64(curValue) >> 32;
            }
        }
        if (IsTimed && remainingTime > 0) {
@ -208,17 +225,18 @@ start_wait:
    }
 }
-template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint64> &u, int n, int timeout)
+template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quintptr> &u, int n, int timeout)
 {
    // Try to acquire without waiting (we still loop because the testAndSet
    // call can fail).
-    quint64 nn = unsigned(n);
+    quintptr nn = unsigned(n);
    if (futexHasWaiterCount)
        nn |= quint64(nn) << 32;    // token count replicated in high word
-    quint64 curValue = u.loadAcquire();
+    quintptr curValue = u.loadAcquire();
    while (futexAvailCounter(curValue) >= n) {
        // try to acquire
-        quint64 newValue = curValue - nn;
+        quintptr newValue = curValue - nn;
        if (u.testAndSetOrdered(curValue, newValue, curValue))
            return true;        // succeeded!
    }
@ -226,7 +244,8 @@ template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint6
        return false;
    // we need to wait
-    quint64 oneWaiter = quint64(Q_UINT64_C(1) << 32); // zero on 32-bit
+    quintptr oneWaiter = quintptr(Q_UINT64_C(1) << 32); // zero on 32-bit
    if (futexHasWaiterCount) {
        // increase the waiter count
        u.fetchAndAddRelaxed(oneWaiter);
@ -238,13 +257,16 @@ template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint6
        // Also adjust nn to subtract oneWaiter when we succeed in acquiring.
        nn += oneWaiter;
    }
    if (futexSemaphoreTryAcquire_loop<IsTimed>(u, curValue, nn, timeout))
        return true;
    if (futexHasWaiterCount) {
        // decrement the number of threads waiting
        Q_ASSERT(futexHigh32(&u)->loadRelaxed() & 0x7fffffffU);
        u.fetchAndSubRelaxed(oneWaiter);
    }
    return false;
 }
@ -268,8 +290,9 @@ QSemaphore::QSemaphore(int n)
 {
    Q_ASSERT_X(n >= 0, "QSemaphore", "parameter 'n' must be non-negative");
    if (futexAvailable()) {
-        quint64 nn = unsigned(n);
+        quintptr nn = unsigned(n);
-        nn |= nn << 32;    // token count replicated in high word
+        if (futexHasWaiterCount)
            nn |= quint64(nn) << 32;    // token count replicated in high word
        u.storeRelaxed(nn);
    } else {
        d = new QSemaphorePrivate(n);
@ -328,13 +351,33 @@ void QSemaphore::release(int n)
    Q_ASSERT_X(n >= 0, "QSemaphore::release", "parameter 'n' must be non-negative");
    if (futexAvailable()) {
-        quint64 nn = unsigned(n);
+        quintptr nn = unsigned(n);
-        nn |= nn << 32;    // token count replicated in high word
+        if (futexHasWaiterCount)
-        quint64 prevValue = u.fetchAndAddRelease(nn);
+            nn |= quint64(nn) << 32;    // token count replicated in high word
        quintptr prevValue = u.fetchAndAddRelease(nn);
        if (futexNeedsWake(prevValue)) {
 #ifdef FUTEX_OP
            if (!futexHasWaiterCount) {
                /*
-               The single-token waiters wait on the low half
+                   On 32-bit systems, all waiters are waiting on the same address,
                   so we'll wake them all and ask the kernel to clear the high bit.
                   atomic {
                      int oldval = u;
                      u = oldval & ~(1 << 31);
                      futexWake(u, INT_MAX);
                      if (oldval == 0)       // impossible condition
                          futexWake(u, INT_MAX);
                   }
                */
                quint32 op = FUTEX_OP_ANDN | FUTEX_OP_OPARG_SHIFT;
                quint32 oparg = 31;
                quint32 cmp = FUTEX_OP_CMP_EQ;
                quint32 cmparg = 0;
                futexWakeOp(u, INT_MAX, INT_MAX, u, FUTEX_OP(op, oparg, cmp, cmparg));
            } else {
                /*
                   On 64-bit systems, the single-token waiters wait on the low half
                   and the multi-token waiters wait on the upper half. So we ask
                   the kernel to wake up n single-token waiters and all multi-token
                   waiters (if any), then clear the multi-token wait bit.
@ -352,6 +395,7 @@ void QSemaphore::release(int n)
                quint32 cmp = FUTEX_OP_CMP_LT;
                quint32 cmparg = 0;
                futexWakeOp(*futexLow32(&u), n, INT_MAX, *futexHigh32(&u), FUTEX_OP(op, oparg, cmp, cmparg));
            }
 #else
            // Unset the bit and wake everyone. There are two possibibilies
            // under which a thread can set the bit between the AND and the
--- a/src/corelib/thread/qsemaphore.h
+++ b/src/corelib/thread/qsemaphore.h
@ -67,7 +67,7 @@ private:
    union {
        QSemaphorePrivate *d;
-        QBasicAtomicInteger<quint64> u;
+        QBasicAtomicInteger<quintptr> u;        // ### Qt6: make 64-bit
    };
 };