MSVC x86: implement add_overflow for quint64

There's no 64-bit ADD instruction, so we make do with ADD+ADC. This is what Clang generates. ICC uses the two as well, but then performs some subtractions to find out if it overflowed. GCC for some inexplicable reason attempts to use SSE2 if that's enabled, otherwise it performs the subtractions like ICC. Alternative implementation which generates better code, but violates strict aliasing: uint *low = reinterpret_cast<uint *>(r); uint *high = low + 1; return _addcarry_u32(_addcarry_u32(0, unsigned(v1), unsigned(v2), low), v1 >> 32, v2 >> 32, high); Manual testing shows this works. tst_qnumeric passes in debug mode. MSVC 2017 15.9 still miscompiles in release mode (reported to MS as [1]). [1] https://developercommunity.visualstudio.com/content/problem/409039/-addcarry-u32-wrong-results-with-constant-inputs.html Change-Id: I61ce366d57bc46c89db5fffd15704d53ebd4af3c Reviewed-by: Thomas Miller <thomaslmiller91@gmail.com> Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
2018-12-14 12:34:55 -08:00 · 2018-12-14 12:34:55 -08:00 · a7cea16005
commit a7cea16005
parent 9ab04795e2
1 changed files with 11 additions and 3 deletions
--- a/src/corelib/global/qnumeric_p.h
+++ b/src/corelib/global/qnumeric_p.h
@ -373,10 +373,18 @@ template <> inline bool add_overflow(unsigned v1, unsigned v2, unsigned *r)

 // 32-bit mul_overflow is fine with the generic code above

-#    if defined(Q_PROCESSOR_X86_64)
 template <> inline bool add_overflow(quint64 v1, quint64 v2, quint64 *r)
-{ return _addcarry_u64(0, v1, v2, reinterpret_cast<unsigned __int64 *>(r)); }
-#    endif // x86-64
+{
+#    if defined(Q_PROCESSOR_X86_64)
+    return _addcarry_u64(0, v1, v2, reinterpret_cast<unsigned __int64 *>(r));
+#    else
+    uint low, high;
+    uchar carry = _addcarry_u32(0, unsigned(v1), unsigned(v2), &low);
+    carry = _addcarry_u32(carry, v1 >> 32, v2 >> 32, &high);
+    *r = (quint64(high) << 32) | low;
+    return carry;
+#    endif // !x86-64
+}
 #  endif // MSVC X86
 #endif // !GCC
 }