diff --git a/config.tests/arch/arch.cpp b/config.tests/arch/arch.cpp
index 44ec7214320..334e48f30d3 100644
--- a/config.tests/arch/arch.cpp
+++ b/config.tests/arch/arch.cpp
@@ -239,7 +239,7 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
 #endif
 
 // -- ARM --
-#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(_M_ARM64)
 " neon"
 #endif
 #ifdef __IWMMXT__
diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp
index 2bd1aed573e..1d3214b273e 100644
--- a/src/corelib/global/qsimd.cpp
+++ b/src/corelib/global/qsimd.cpp
@@ -151,7 +151,7 @@ static inline quint64 detectProcessorFeatures()
         features |= CpuFeatureAES;
     return features;
 #endif
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#if defined(__ARM_NEON__)
     features |= CpuFeatureNEON;
 #endif
 #if defined(__ARM_FEATURE_CRC32)
diff --git a/src/corelib/global/qsimd.h b/src/corelib/global/qsimd.h
index 4ef925ca337..8ee3e9b15d8 100644
--- a/src/corelib/global/qsimd.h
+++ b/src/corelib/global/qsimd.h
@@ -34,7 +34,7 @@
 
 #define QT_COMPILER_USES(feature) (1/QT_COMPILER_USES_##feature == 1)
 
-#if defined(Q_PROCESSOR_ARM) && defined(__ARM_NEON) || defined(__ARM_NEON__)
+#if defined(Q_PROCESSOR_ARM) && defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(_M_ARM64)
 #  include <arm_neon.h>
 #  define QT_COMPILER_USES_neon 1
 #else
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h
index 012eb6cf4f4..b9cd296c9e8 100644
--- a/src/corelib/global/qsimd_p.h
+++ b/src/corelib/global/qsimd_p.h
@@ -257,7 +257,7 @@ static_assert(ARCH_SKX_MACROS, "Undeclared identifiers indicate which features a
 
 // NEON intrinsics
 // note: as of GCC 4.9, does not support function targets for ARM
-#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(_M_ARM64)
 #if defined(Q_CC_CLANG)
 #define QT_FUNCTION_TARGET_STRING_NEON      "neon"
 #else
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 9a09675eb7e..c4c4c528ccd 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -749,7 +749,11 @@ const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
                                    [=](qsizetype i) { return n + i; });
 #  endif
 #elif defined(__ARM_NEON__)
+#ifdef _MSC_VER
+    const uint16x8_t vmask = { 0x0008000400020001ULL, 0x0080004000200010ULL };
+#else
     const uint16x8_t vmask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+#endif
     const uint16x8_t ch_vec = vdupq_n_u16(c);
     for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
         uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
@@ -1290,7 +1294,11 @@ static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
 #  elif defined(__ARM_NEON__)
     if (l >= 8) {
         const char16_t *end = a + l;
+#ifdef _MSC_VER
+        const uint16x8_t mask = { 0x0008000400020001ULL, 0x0080004000200010ULL };
+#else
         const uint16x8_t mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+#endif
         while (end - a > 7) {
             uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
             uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 9fc3318d716..57e0e30d9c8 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -351,7 +351,12 @@ static void simdCompareAscii(const qchar8_t *&src8, const qchar8_t *end8, const
 static inline bool simdEncodeAscii(uchar *&dst, const char16_t *&nextAscii, const char16_t *&src, const char16_t *end)
 {
     uint16x8_t maxAscii = vdupq_n_u16(0x7f);
-    uint16x8_t mask1 = { 1,      1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
+#ifdef _MSC_VER
+    uint16_t mask1t[8] = { 1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
+    uint16x8_t mask1 = vld1q_u16(mask1t);
+#else
+    uint16x8_t mask1 = { 1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
+#endif
     uint16x8_t mask2 = vshlq_n_u16(mask1, 1);
 
     // do sixteen characters at a time
@@ -389,7 +394,12 @@ static inline bool simdDecodeAscii(char16_t *&dst, const uchar *&nextAscii, cons
 {
     // do eight characters at a time
     uint8x8_t msb_mask = vdup_n_u8(0x80);
+#ifdef _MSC_VER
+    uint8_t add_maskt[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+    uint8x8_t add_mask = vld1_u8(add_maskt);
+#else
     uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+#endif
     for ( ; end - src >= 8; src += 8, dst += 8) {
         uint8x8_t c = vld1_u8(src);
         uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
@@ -425,7 +435,12 @@ static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end,
 
     // do eight characters at a time
     uint8x8_t msb_mask = vdup_n_u8(0x80);
+#ifdef _MSC_VER
+    uint8_t add_maskt[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+    uint8x8_t add_mask = vld1_u8(add_maskt);
+#else
     uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+#endif
     for ( ; end - src >= 8; src += 8) {
         uint8x8_t c = vld1_u8(src);
         uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
diff --git a/src/gui/painting/qcolortransform.cpp b/src/gui/painting/qcolortransform.cpp
index 52fbebfa5db..14200ccf9d5 100644
--- a/src/gui/painting/qcolortransform.cpp
+++ b/src/gui/painting/qcolortransform.cpp
@@ -662,18 +662,27 @@ static inline bool test_all_zero(uint32x4_t p)
 #endif
 }
 
+static inline uint32x4_t vsetq_u32(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+#ifdef _MSC_VER
+    return uint32x4_t{ (uint64_t(b) << 32) | a, (uint64_t(d) << 32) | c };
+#else
+    return uint32x4_t{ a, b, c, d };
+#endif
+}
+
 template<typename T>
 static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
 {
     constexpr bool isARGB = isArgb<T>();
     const float iFF00 = 1.0f / (255 * 256);
-    const uint32x4_t vRangeMax = {
-        isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
-              : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
-        d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
-        isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
-              : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
-        QColorTrcLut::Resolution };
+    const uint32x4_t vRangeMax = vsetq_u32(
+            isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
+                   : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
+            d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
+            isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
+                   : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
+            QColorTrcLut::Resolution);
     for (qsizetype i = 0; i < len; ++i) {
         uint32x4_t v;
         loadP<T>(src[i], v);
@@ -740,13 +749,13 @@ void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len
 {
     constexpr bool isARGB = isArgb<T>();
     const float iFF00 = 1.0f / (255 * 256);
-    const uint32x4_t vRangeMax = {
-        isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
-               : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
-        d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
-        isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
-               : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
-        QColorTrcLut::Resolution };
+    const uint32x4_t vRangeMax = vsetq_u32(
+            isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
+                   : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
+            d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
+            isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
+                   : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
+            QColorTrcLut::Resolution);
     for (qsizetype i = 0; i < len; ++i) {
         uint32x4_t v;
         loadPU<T>(src[i], v);
diff --git a/src/gui/painting/qcolortrclut_p.h b/src/gui/painting/qcolortrclut_p.h
index 220b504a83e..da5e9e60793 100644
--- a/src/gui/painting/qcolortrclut_p.h
+++ b/src/gui/painting/qcolortrclut_p.h
@@ -18,13 +18,14 @@
 #include <QtGui/private/qtguiglobal_p.h>
 #include <QtGui/qrgb.h>
 #include <QtGui/qrgba64.h>
+#include <QtCore/private/qsimd_p.h>
 
 #include <cmath>
 #include <memory>
 
 #if defined(__SSE2__)
 #include <emmintrin.h>
-#elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#elif defined(__ARM_NEON__)
 #include <arm_neon.h>
 #endif
 
@@ -75,7 +76,7 @@ public:
         QRgba64 rgba64;
         _mm_storel_epi64(reinterpret_cast<__m128i *>(&rgba64), v);
         return rgba64;
-#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+#elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
         uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32));
         uint16x4_t v16 = vget_low_u16(vmovl_u8(v8));
         const uint16x4_t vidx = vshl_n_u16(v16, ShiftUp);
@@ -145,7 +146,7 @@ public:
         v = _mm_srli_epi16(v, 8);
         v = _mm_packus_epi16(v, v);
         return _mm_cvtsi128_si32(v);
-#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+#elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
         uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
         v = vsub_u16(v, vshr_n_u16(v, 8));
         const uint16x4_t vidx = vshr_n_u16(v, ShiftDown);
@@ -236,7 +237,7 @@ private:
         QRgba64 rgba64;
         _mm_storel_epi64(reinterpret_cast<__m128i *>(&rgba64), v);
         return rgba64;
-#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+#elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
         uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
         v = vsub_u16(v, vshr_n_u16(v, 8));
         const uint16x4_t vidx = vshr_n_u16(v, ShiftDown);
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index 1fceb83710b..f8de0a85116 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -1008,8 +1008,18 @@ void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h,
 class QSimdNeon
 {
 public:
-    typedef int32x4_t Int32x4;
-    typedef float32x4_t Float32x4;
+    struct Int32x4 {
+        Int32x4() = default;
+        Int32x4(int32x4_t v) : v(v) {}
+        int32x4_t v;
+        operator int32x4_t() const { return v; }
+    };
+    struct Float32x4 {
+        Float32x4() = default;
+        Float32x4(float32x4_t v) : v(v) {};
+        float32x4_t v;
+        operator float32x4_t() const { return v; }
+    };
 
     union Vect_buffer_i { Int32x4 v; int i[4]; };
     union Vect_buffer_f { Float32x4 v; float f[4]; };
@@ -1059,7 +1069,9 @@ const uint * QT_FASTCALL qt_fetchUntransformed_888_neon(uint *buffer, const Oper
 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
 static inline uint32x4_t vrgba2argb(uint32x4_t srcVector)
 {
-#if defined(Q_PROCESSOR_ARM_64)
+#if defined(Q_PROCESSOR_ARM_64) && defined(_MSC_VER)
+    const uint8x16_t rgbaMask  = { 0x0704050603000102ULL, 0x0F0C0D0E0B08090AULL };
+#elif defined(Q_PROCESSOR_ARM_64)
     const uint8x16_t rgbaMask  = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
 #else
     const uint8x8_t rgbaMask  = { 2, 1, 0, 3, 6, 5, 4, 7 };
@@ -1079,7 +1091,11 @@ template<bool RGBA>
 static inline void convertARGBToARGB32PM_neon(uint *buffer, const uint *src, int count)
 {
     int i = 0;
+#ifdef _MSC_VER
+    const uint8x8_t shuffleMask = { 0x0707070703030303ULL };
+#else
     const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7};
+#endif
     const uint32x4_t blendMask = vdupq_n_u32(0xff000000);
 
     for (; i < count - 3; i += 4) {
@@ -1131,7 +1147,11 @@ static inline void convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src
     if (count <= 0)
         return;
 
+#ifdef _MSC_VER
+    const uint8x8_t shuffleMask = { 0x0707070703030303ULL };
+#else
     const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7};
+#endif
     const uint64x2_t blendMask = vdupq_n_u64(Q_UINT64_C(0xffff000000000000));
 
     int i = 0;
diff --git a/src/gui/painting/qpixellayout.cpp b/src/gui/painting/qpixellayout.cpp
index 4f2f0ae13a0..31646d2f23d 100644
--- a/src/gui/painting/qpixellayout.cpp
+++ b/src/gui/painting/qpixellayout.cpp
@@ -1087,7 +1087,9 @@ static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *
         return;
 
     const uint32x4_t amask = vdupq_n_u32(0xff000000);
-#if defined(Q_PROCESSOR_ARM_64)
+#if defined(Q_PROCESSOR_ARM_64) && defined(_MSC_VER)
+    const uint8x16_t rgbaMask  = { 0x0704050603000102ULL, 0x0F0C0D0E0B08090AULL };
+#elif defined(Q_PROCESSOR_ARM_64)
     const uint8x16_t rgbaMask  = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
 #else
     const uint8x8_t rgbaMask  = { 2, 1, 0, 3, 6, 5, 4, 7 };
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index 6fd7d7ff2c1..6809f1d52cf 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -232,7 +232,11 @@ static inline uint toArgb32(QRgba64 rgba64)
 #elif defined __ARM_NEON__
     uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+#ifdef _MSC_VER
+    const uint8x8_t shuffleMask = { 0x0706010003020504ULL };
+#else
     const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 };
+#endif
     v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask));
 #else
     v = vext_u16(v, v, 3);