]> git.saurik.com Git - apple/libpthread.git/blobdiff - src/pthread_mutex.c
libpthread-301.20.1.tar.gz
[apple/libpthread.git] / src / pthread_mutex.c
index 794b6ff2403d8ebc2d6a129aeed856d8d0e95ada..4f1d06fd534a811f290fa184010db46b303b2b14 100644 (file)
@@ -2,14 +2,14 @@
  * Copyright (c) 2000-2003, 2007, 2008 Apple Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * compliance with the License. Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this
  * file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -17,7 +17,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_LICENSE_HEADER_END@
  */
 /*
  * -- Mutex variable support
  */
 
+#include "resolver.h"
 #include "internal.h"
 #include "kern/kern_trace.h"
-#include <sys/syscall.h>
+
+extern int __unix_conforming;
+
+#ifndef BUILDING_VARIANT /* [ */
 
 #ifdef PLOCKSTAT
 #include "plockstat.h"
+/* This function is never called and exists to provide never-fired dtrace
+ * probes so that user d scripts don't get errors.
+ */
+PTHREAD_NOEXPORT PTHREAD_USED
+void
+_plockstat_never_fired(void)
+{
+       PLOCKSTAT_MUTEX_SPIN(NULL);
+       PLOCKSTAT_MUTEX_SPUN(NULL, 0, 0);
+}
 #else /* !PLOCKSTAT */
 #define        PLOCKSTAT_MUTEX_SPIN(x)
 #define        PLOCKSTAT_MUTEX_SPUN(x, y, z)
 #define        PLOCKSTAT_MUTEX_RELEASE(x, y)
 #endif /* PLOCKSTAT */
 
-extern int __unix_conforming;
+#define BLOCK_FAIL_PLOCKSTAT    0
+#define BLOCK_SUCCESS_PLOCKSTAT 1
 
-#ifndef BUILDING_VARIANT
-PTHREAD_NOEXPORT int __mtx_markprepost(_pthread_mutex *mutex, uint32_t oupdateval, int firstfit);
-#endif /* BUILDING_VARIANT */
+#define PTHREAD_MUTEX_INIT_UNUSED 1
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_unlock_slow(pthread_mutex_t *omutex);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_corruption_abort(_pthread_mutex *mutex);
+
+
+PTHREAD_ALWAYS_INLINE
+static inline int _pthread_mutex_init(_pthread_mutex *mutex,
+               const pthread_mutexattr_t *attr, uint32_t static_type);
 
 #define DEBUG_TRACE_POINTS 0
 
 #if DEBUG_TRACE_POINTS
-extern int __syscall(int number, ...);
-#define DEBUG_TRACE(x, a, b, c, d) __syscall(SYS_kdebug_trace, TRACE_##x, a, b, c, d)
+#include <sys/kdebug.h>
+#define DEBUG_TRACE(x, a, b, c, d) kdebug_trace(TRACE_##x, a, b, c, d)
 #else
 #define DEBUG_TRACE(x, a, b, c, d) do { } while(0)
 #endif
 
-#include <machine/cpu_capabilities.h>
+typedef union mutex_seq {
+       uint32_t seq[2];
+       struct { uint32_t lgenval; uint32_t ugenval; };
+       struct { uint32_t mgen; uint32_t ugen; };
+       uint64_t seq_LU;
+       uint64_t _Atomic atomic_seq_LU;
+} mutex_seq;
 
-static int _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint32_t static_type);
+_Static_assert(sizeof(mutex_seq) == 2 * sizeof(uint32_t),
+               "Incorrect mutex_seq size");
 
 #if !__LITTLE_ENDIAN__
 #error MUTEX_GETSEQ_ADDR assumes little endian layout of 2 32-bit sequence words
 #endif
 
-static void
-MUTEX_GETSEQ_ADDR(_pthread_mutex *mutex,
-                 volatile uint64_t **seqaddr)
+PTHREAD_ALWAYS_INLINE
+static inline void
+MUTEX_GETSEQ_ADDR(_pthread_mutex *mutex, mutex_seq **seqaddr)
 {
-       if (mutex->mtxopts.options.misalign) {
-               *seqaddr = (volatile uint64_t *)&mutex->m_seq[1];
-       } else {
-               *seqaddr = (volatile uint64_t *)&mutex->m_seq[0];
-       }
+       // 64-bit aligned address inside m_seq array (&m_seq[0] for aligned mutex)
+       // We don't require more than byte alignment on OS X. rdar://22278325
+       *seqaddr = (void *)(((uintptr_t)mutex->m_seq + 0x7ul) & ~0x7ul);
 }
 
-static void
-MUTEX_GETTID_ADDR(_pthread_mutex *mutex,
-                                 volatile uint64_t **tidaddr)
+PTHREAD_ALWAYS_INLINE
+static inline void
+MUTEX_GETTID_ADDR(_pthread_mutex *mutex, uint64_t **tidaddr)
 {
-       if (mutex->mtxopts.options.misalign) {
-               *tidaddr = (volatile uint64_t *)&mutex->m_tid[1];
-       } else {
-               *tidaddr = (volatile uint64_t *)&mutex->m_tid[0];
-       }
+       // 64-bit aligned address inside m_tid array (&m_tid[0] for aligned mutex)
+       // We don't require more than byte alignment on OS X. rdar://22278325
+       *tidaddr = (void*)(((uintptr_t)mutex->m_tid + 0x7ul) & ~0x7ul);
 }
 
-#ifndef BUILDING_VARIANT /* [ */
+PTHREAD_ALWAYS_INLINE
+static inline void
+mutex_seq_load(mutex_seq *seqaddr, mutex_seq *oldseqval)
+{
+       oldseqval->seq_LU = seqaddr->seq_LU;
+}
 
-#define BLOCK_FAIL_PLOCKSTAT    0
-#define BLOCK_SUCCESS_PLOCKSTAT 1
+PTHREAD_ALWAYS_INLINE
+static inline void
+mutex_seq_atomic_load_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval)
+{
+       oldseqval->seq_LU = os_atomic_load(&seqaddr->atomic_seq_LU, relaxed);
+}
 
-/* This function is never called and exists to provide never-fired dtrace
- * probes so that user d scripts don't get errors.
- */
-__private_extern__ __attribute__((used)) void
-_plockstat_never_fired(void) 
+#define mutex_seq_atomic_load(seqaddr, oldseqval, m) \
+               mutex_seq_atomic_load_##m(seqaddr, oldseqval)
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
+               mutex_seq *newseqval)
 {
-       PLOCKSTAT_MUTEX_SPIN(NULL);
-       PLOCKSTAT_MUTEX_SPUN(NULL, 0, 0);
+       return os_atomic_cmpxchgv(&seqaddr->atomic_seq_LU, oldseqval->seq_LU,
+                       newseqval->seq_LU, &oldseqval->seq_LU, relaxed);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
+               mutex_seq *newseqval)
+{
+       return os_atomic_cmpxchgv(&seqaddr->atomic_seq_LU, oldseqval->seq_LU,
+                       newseqval->seq_LU, &oldseqval->seq_LU, acquire);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+mutex_seq_atomic_cmpxchgv_release(mutex_seq *seqaddr, mutex_seq *oldseqval,
+               mutex_seq *newseqval)
+{
+       return os_atomic_cmpxchgv(&seqaddr->atomic_seq_LU, oldseqval->seq_LU,
+                       newseqval->seq_LU, &oldseqval->seq_LU, release);
 }
 
+#define mutex_seq_atomic_cmpxchgv(seqaddr, oldseqval, newseqval, m)\
+               mutex_seq_atomic_cmpxchgv_##m(seqaddr, oldseqval, newseqval)
 
 /*
  * Initialize a mutex variable, possibly with additional attributes.
  * Public interface - so don't trust the lock - initialize it first.
  */
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_init(pthread_mutex_t *omutex, const pthread_mutexattr_t *attr)
 {
 #if 0
        /* conformance tests depend on not having this behavior */
        /* The test for this behavior is optional */
-       if (mutex->sig == _PTHREAD_MUTEX_SIG)
+       if (_pthread_mutex_check_signature(mutex))
                return EBUSY;
 #endif
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       LOCK_INIT(mutex->lock);
+       _PTHREAD_LOCK_INIT(mutex->lock);
        return (_pthread_mutex_init(mutex, attr, 0x7));
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_getprioceiling(const pthread_mutex_t *omutex, int *prioceiling)
 {
        int res = EINVAL;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (mutex->sig == _PTHREAD_MUTEX_SIG) {
-               LOCK(mutex->lock);
+       if (_pthread_mutex_check_signature(mutex)) {
+               _PTHREAD_LOCK(mutex->lock);
                *prioceiling = mutex->prioceiling;
                res = 0;
-               UNLOCK(mutex->lock);
+               _PTHREAD_UNLOCK(mutex->lock);
        }
        return res;
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
-pthread_mutex_setprioceiling(pthread_mutex_t *omutex, int prioceiling, int *old_prioceiling)
+pthread_mutex_setprioceiling(pthread_mutex_t *omutex, int prioceiling,
+               int *old_prioceiling)
 {
        int res = EINVAL;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (mutex->sig == _PTHREAD_MUTEX_SIG) {
-               LOCK(mutex->lock);
-               if (prioceiling >= -999 || prioceiling <= 999) {
+       if (_pthread_mutex_check_signature(mutex)) {
+               _PTHREAD_LOCK(mutex->lock);
+               if (prioceiling >= -999 && prioceiling <= 999) {
                        *old_prioceiling = mutex->prioceiling;
-                       mutex->prioceiling = prioceiling;
+                       mutex->prioceiling = (int16_t)prioceiling;
                        res = 0;
                }
-               UNLOCK(mutex->lock);
+               _PTHREAD_UNLOCK(mutex->lock);
        }
        return res;
 }
 
+
 int
-pthread_mutexattr_getprioceiling(const pthread_mutexattr_t *attr, int *prioceiling)
+pthread_mutexattr_getprioceiling(const pthread_mutexattr_t *attr,
+               int *prioceiling)
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
@@ -237,7 +304,7 @@ pthread_mutexattr_setprioceiling(pthread_mutexattr_t *attr, int prioceiling)
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
-               if (prioceiling >= -999 || prioceiling <= 999) {
+               if (prioceiling >= -999 && prioceiling <= 999) {
                        attr->prioceiling = prioceiling;
                        res = 0;
                }
@@ -296,23 +363,6 @@ pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type)
        return res;
 }
 
-// XXX remove
-void
-cthread_yield(void) 
-{
-       sched_yield();
-}
-
-void
-pthread_yield_np(void) 
-{
-       sched_yield();
-}
-
-
-/*
- * Temp: till pshared is fixed correctly
- */
 int
 pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
 {
@@ -325,18 +375,28 @@ pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
 
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
 #if __DARWIN_UNIX03
-               if (( pshared == PTHREAD_PROCESS_PRIVATE) || (pshared == PTHREAD_PROCESS_SHARED))
+               if (( pshared == PTHREAD_PROCESS_PRIVATE) ||
+                               (pshared == PTHREAD_PROCESS_SHARED))
 #else /* __DARWIN_UNIX03 */
                if ( pshared == PTHREAD_PROCESS_PRIVATE)
 #endif /* __DARWIN_UNIX03 */
                {
-                       attr->pshared = pshared; 
+                       attr->pshared = pshared;
                        res = 0;
                }
        }
        return res;
 }
 
+PTHREAD_NOEXPORT PTHREAD_NOINLINE PTHREAD_NORETURN
+int
+_pthread_mutex_corruption_abort(_pthread_mutex *mutex)
+{
+       PTHREAD_ABORT("pthread_mutex corruption: mutex owner changed in the "
+                       "middle of lock/unlock");
+}
+
+
 /*
  * Sequence numbers and TID:
  *
@@ -346,40 +406,50 @@ pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
  * the unlock path will then transition to D=[L5 U4 TID0] and then finally
  * E=[L5 U5 TID0].
  *
- * If a contender comes in after B, the mutex will instead transition to E=[L6+KE U4 TID0]
- * and then F=[L6+KE U4 TID940]. If a contender comes in after C, it will transition to
- * F=[L6+KE U4 TID940] directly. In both cases, the contender will enter the kernel with either
- * mutexwait(U4, TID0) or mutexwait(U4, TID940). The first owner will unlock the mutex
- * by first updating the owner to G=[L6+KE U4 TID-1] and then doing the actual unlock to
- * H=[L6+KE U5 TID=-1] before entering the kernel with mutexdrop(U5, -1) to signal the next waiter
- * (potentially as a prepost). When the waiter comes out of the kernel, it will update the owner to
- * I=[L6+KE U5 TID941]. An unlock at this point is simply J=[L6 U5 TID0] and then K=[L6 U6 TID0].
+ * If a contender comes in after B, the mutex will instead transition to
+ * E=[L6+KE U4 TID0] and then F=[L6+KE U4 TID940]. If a contender comes in after
+ * C, it will transition to F=[L6+KE U4 TID940] directly. In both cases, the
+ * contender will enter the kernel with either mutexwait(U4, TID0) or
+ * mutexwait(U4, TID940). The first owner will unlock the mutex by first
+ * updating the owner to G=[L6+KE U4 TID-1] and then doing the actual unlock to
+ * H=[L6+KE U5 TID=-1] before entering the kernel with mutexdrop(U5, -1) to
+ * signal the next waiter (potentially as a prepost). When the waiter comes out
+ * of the kernel, it will update the owner to I=[L6+KE U5 TID941]. An unlock at
+ * this point is simply J=[L6 U5 TID0] and then K=[L6 U6 TID0].
  *
- * At various points along these timelines, since the sequence words and TID are written independently,
- * a thread may get preempted and another thread might see inconsistent data. In the worst case, another
- * thread may see the TID in the SWITCHING (-1) state or unlocked (0) state for longer because the
- * owning thread was preempted.
+ * At various points along these timelines, since the sequence words and TID are
+ * written independently, a thread may get preempted and another thread might
+ * see inconsistent data. In the worst case, another thread may see the TID in
+ * the SWITCHING (-1) state or unlocked (0) state for longer because the owning
+ * thread was preempted.
+ */
 
 /*
- * Drop the mutex unlock references from cond_wait. or mutex_unlock.
+ * Drop the mutex unlock references from cond_wait or mutex_unlock.
  */
-__private_extern__ int
-__mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
+               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
 {
-       bool firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
-       uint32_t lgenval, ugenval, flags;
-       uint64_t oldtid, newtid;
-       volatile uint64_t *tidaddr;
-       MUTEX_GETTID_ADDR(mutex, &tidaddr);
-
-       flags = mutex->mtxopts.value;
+       bool firstfit = (mutex->mtxopts.options.policy ==
+                       _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+       uint32_t flags = mutex->mtxopts.value;
        flags &= ~_PTHREAD_MTX_OPT_NOTIFY; // no notification by default
 
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid, newtid;
+
        if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
                uint64_t selfid = _pthread_selfid_direct();
-
-               if (*tidaddr != selfid) {
-                       //PTHREAD_ABORT("dropping recur or error mutex not owned by the thread\n");
+               if (os_atomic_load(tidaddr, relaxed) != selfid) {
                        PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, EPERM);
                        return EPERM;
                } else if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE &&
@@ -392,39 +462,36 @@ __mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32
                }
        }
 
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
-       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
        bool clearprepost, clearnotify, spurious;
        do {
-               oldval64 = *seqaddr;
-               oldtid = *tidaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
 
                clearprepost = false;
                clearnotify = false;
                spurious = false;
 
-               int numwaiters = diff_genseq(lgenval, ugenval); // pending waiters
-
+               // pending waiters
+               int numwaiters = diff_genseq(oldseq.lgenval, oldseq.ugenval);
                if (numwaiters == 0) {
-                       // spurious unlock; do not touch tid
+                       // spurious unlock (unlock of unlocked lock)
                        spurious = true;
                } else {
-                       ugenval += PTHRW_INC;
+                       newseq.ugenval += PTHRW_INC;
 
-                       if ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK)) {
-                               // our unlock sequence matches to lock sequence, so if the CAS is successful, the mutex is unlocked
+                       if ((oldseq.lgenval & PTHRW_COUNT_MASK) ==
+                                       (newseq.ugenval & PTHRW_COUNT_MASK)) {
+                               // our unlock sequence matches to lock sequence, so if the
+                               // CAS is successful, the mutex is unlocked
 
                                /* do not reset Ibit, just K&E */
-                               lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
+                               newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
                                clearnotify = true;
                                newtid = 0; // clear owner
                        } else {
                                if (firstfit) {
-                                       lgenval &= ~PTH_RWL_EBIT; // reset E bit so another can acquire meanwhile
+                                       // reset E bit so another can acquire meanwhile
+                                       newseq.lgenval &= ~PTH_RWL_EBIT;
                                        newtid = 0;
                                } else {
                                        newtid = PTHREAD_MTX_TID_SWITCHING;
@@ -432,41 +499,39 @@ __mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32
                                // need to signal others waiting for mutex
                                flags |= _PTHREAD_MTX_OPT_NOTIFY;
                        }
-                       
+
                        if (newtid != oldtid) {
-                               // We're giving up the mutex one way or the other, so go ahead and update the owner to SWITCHING
-                               // or 0 so that once the CAS below succeeds, there is no stale ownership information.
-                               // If the CAS of the seqaddr fails, we may loop, but it's still valid for the owner
-                               // to be SWITCHING/0
-                               if (!OSAtomicCompareAndSwap64(oldtid, newtid, (volatile int64_t *)tidaddr)) {
+                               // We're giving up the mutex one way or the other, so go ahead
+                               // and update the owner to 0 so that once the CAS below
+                               // succeeds, there is no stale ownership information. If the
+                               // CAS of the seqaddr fails, we may loop, but it's still valid
+                               // for the owner to be SWITCHING/0
+                               if (!os_atomic_cmpxchg(tidaddr, oldtid, newtid, relaxed)) {
                                        // we own this mutex, nobody should be updating it except us
-                                       __builtin_trap();
+                                       return _pthread_mutex_corruption_abort(mutex);
                                }
                        }
                }
 
                if (clearnotify || spurious) {
                        flags &= ~_PTHREAD_MTX_OPT_NOTIFY;
-                       if (firstfit && ((lgenval & PTH_RWL_PBIT) != 0)) {
+                       if (firstfit && (newseq.lgenval & PTH_RWL_PBIT)) {
                                clearprepost = true;
-                               lgenval &= ~PTH_RWL_PBIT;
+                               newseq.lgenval &= ~PTH_RWL_PBIT;
                        }
                }
-               
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
-
-       } while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) != TRUE);
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
 
        if (clearprepost) {
-                __psynch_cvclrprepost(mutex, lgenval, ugenval, 0, 0, lgenval, (flags | _PTHREAD_MTX_OPT_MUTEX));
+               __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
+                               newseq.lgenval, flags | _PTHREAD_MTX_OPT_MUTEX);
        }
 
        if (mgenp != NULL) {
-               *mgenp = lgenval;
+               *mgenp = newseq.lgenval;
        }
        if (ugenp != NULL) {
-               *ugenp = ugenval;
+               *ugenp = newseq.ugenval;
        }
        if (pmtxp != NULL) {
                *pmtxp = (uint32_t *)mutex;
@@ -478,32 +543,46 @@ __mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32
        return 0;
 }
 
-static int
-__mtx_updatebits(_pthread_mutex *mutex, uint64_t selfid)
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
+               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+{
+       return _pthread_mutex_unlock_updatebits(mutex, flagsp, pmtxp, mgenp, ugenp);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 {
        int res = 0;
-       int firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
-       int isebit = 0;
+       bool firstfit = (mutex->mtxopts.options.policy ==
+                       _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+       bool isebit = false, updated = false;
 
-       uint32_t lgenval, ugenval;
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
+       mutex_seq *seqaddr;
        MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-       uint64_t oldtid;
-       volatile uint64_t *tidaddr;
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid;
 
        do {
-               do {
-                       oldval64 = *seqaddr;
-                       oldtid = *tidaddr;
-                       lgenval = (uint32_t)oldval64;
-                       ugenval = (uint32_t)(oldval64 >> 32);
+               if (firstfit && isebit && updated) {
+                       mutex_seq_atomic_load(seqaddr, &oldseq, relaxed);
+               }
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
 
+               if (isebit && !(oldseq.lgenval & PTH_RWL_EBIT)) {
                        // E bit was set on first pass through the loop but is no longer
                        // set. Apparently we spin until it arrives.
                        // XXX: verify this is desired behavior.
-               } while (isebit && (lgenval & PTH_RWL_EBIT) == 0);
+                       continue;
+               }
 
                if (isebit) {
                        // first fit mutex now has the E bit set. Return 1.
@@ -512,162 +591,179 @@ __mtx_updatebits(_pthread_mutex *mutex, uint64_t selfid)
                }
 
                if (firstfit) {
-                       isebit = (lgenval & PTH_RWL_EBIT) != 0;
-               } else if ((lgenval & (PTH_RWL_KBIT|PTH_RWL_EBIT)) == (PTH_RWL_KBIT|PTH_RWL_EBIT)) {
+                       isebit = (oldseq.lgenval & PTH_RWL_EBIT);
+               } else if ((oldseq.lgenval & (PTH_RWL_KBIT|PTH_RWL_EBIT)) ==
+                               (PTH_RWL_KBIT|PTH_RWL_EBIT)) {
                        // fairshare mutex and the bits are already set, just update tid
                        break;
                }
 
                // either first fit or no E bit set
                // update the bits
-               lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
-
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
+               newseq.lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
 
-               // set s and b bit
-               // Retry if CAS fails, or if it succeeds with firstfit and E bit already set
-       } while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) != TRUE ||
-                (firstfit && isebit));
+               // Retry if CAS fails, or if it succeeds with firstfit and E bit
+               // already set
+       } while (!(updated = mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       relaxed)) || (firstfit && isebit));
 
        if (res == 0) {
-               if (!OSAtomicCompareAndSwap64Barrier(oldtid, selfid, (volatile int64_t *)tidaddr)) {
+               if (!os_atomic_cmpxchg(tidaddr, oldtid, selfid, relaxed)) {
                        // we own this mutex, nobody should be updating it except us
-                       __builtin_trap();
+                       return _pthread_mutex_corruption_abort(mutex);
                }
        }
 
        return res;
 }
 
-int
-__mtx_markprepost(_pthread_mutex *mutex, uint32_t updateval, int firstfit)
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_markprepost(_pthread_mutex *mutex, uint32_t updateval)
 {
-       uint32_t flags;
-       uint32_t lgenval, ugenval;
-       uint64_t oldval64, newval64;
-
-       volatile uint64_t *seqaddr;
+       mutex_seq *seqaddr;
        MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
-       if (firstfit != 0 && (updateval & PTH_RWL_PBIT) != 0) {
-               int clearprepost;
-               do {                            
-                       clearprepost = 0;
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
 
-                       flags = mutex->mtxopts.value;
-
-                       oldval64 = *seqaddr;
-                       lgenval = (uint32_t)oldval64;
-                       ugenval = (uint32_t)(oldval64 >> 32);
+       bool clearprepost;
+       do {
+               clearprepost = false;
+               newseq = oldseq;
 
-                       /* update the bits */
-                       if ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK)) {
-                               clearprepost = 1;       
-                               lgenval &= ~PTH_RWL_PBIT;
-                       } else {
-                               lgenval |= PTH_RWL_PBIT;
-                       }
-                       newval64 = (((uint64_t)ugenval) << 32);
-                       newval64 |= lgenval;
-               } while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) != TRUE);
-               
-               if (clearprepost != 0) {
-                       __psynch_cvclrprepost(mutex, lgenval, ugenval, 0, 0, lgenval, (flags | _PTHREAD_MTX_OPT_MUTEX));
+               /* update the bits */
+               if ((oldseq.lgenval & PTHRW_COUNT_MASK) ==
+                               (oldseq.ugenval & PTHRW_COUNT_MASK)) {
+                       clearprepost = true;
+                       newseq.lgenval &= ~PTH_RWL_PBIT;
+               } else {
+                       newseq.lgenval |= PTH_RWL_PBIT;
                }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, relaxed));
+
+       if (clearprepost) {
+               __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
+                               newseq.lgenval, mutex->mtxopts.value | _PTHREAD_MTX_OPT_MUTEX);
        }
+
        return 0;
 }
 
-static inline bool
-_pthread_mutex_check_init_fast(_pthread_mutex *mutex)
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_check_init_slow(pthread_mutex_t *omutex)
 {
-       return (mutex->sig == _PTHREAD_MUTEX_SIG);
+       int res = EINVAL;
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+
+       if (_pthread_mutex_check_signature_init(mutex)) {
+               _PTHREAD_LOCK(mutex->lock);
+               if (_pthread_mutex_check_signature_init(mutex)) {
+                       // initialize a statically initialized mutex to provide
+                       // compatibility for misbehaving applications.
+                       // (unlock should not be the first operation on a mutex)
+                       res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
+               } else if (_pthread_mutex_check_signature(mutex)) {
+                       res = 0;
+               }
+               _PTHREAD_UNLOCK(mutex->lock);
+       } else if (_pthread_mutex_check_signature(mutex)) {
+               res = 0;
+       }
+       if (res != 0) {
+               PLOCKSTAT_MUTEX_ERROR(omutex, res);
+       }
+       return res;
 }
 
-static int __attribute__((noinline))
+PTHREAD_ALWAYS_INLINE
+static inline int
 _pthread_mutex_check_init(pthread_mutex_t *omutex)
 {
        int res = 0;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       
-       if (mutex->sig != _PTHREAD_MUTEX_SIG) {
-               res = EINVAL;
-               if ((mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK) == _PTHREAD_MUTEX_SIG_CMP) {
-                       LOCK(mutex->lock);
-                       if ((mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK) == _PTHREAD_MUTEX_SIG_CMP) {
-                               // initialize a statically initialized mutex to provide
-                               // compatibility for misbehaving applications.
-                               // (unlock should not be the first operation on a mutex)
-                               res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
-                       } else if (mutex->sig == _PTHREAD_MUTEX_SIG) {
-                               res = 0;
-                       }
-                       UNLOCK(mutex->lock);
-               }
-               if (res != 0) {
-                       PLOCKSTAT_MUTEX_ERROR(omutex, res);
-               }
+
+       if (!_pthread_mutex_check_signature(mutex)) {
+               return _pthread_mutex_check_init_slow(omutex);
        }
        return res;
 }
 
+PTHREAD_NOINLINE
 static int
-_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_lock_wait(pthread_mutex_t *omutex, mutex_seq newseq,
+               uint64_t oldtid)
 {
-       int res;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
 
-       if (os_slowpath(!_pthread_mutex_check_init_fast(mutex))) {
-               res = _pthread_mutex_check_init(omutex);
-               if (res != 0) {
-                       return res;
-               }
-       }
-
-       uint64_t oldtid;
-       volatile uint64_t *tidaddr;
+       uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t selfid = _pthread_selfid_direct();
 
+       PLOCKSTAT_MUTEX_BLOCK(omutex);
+       do {
+               uint32_t updateval;
+               do {
+                       updateval = __psynch_mutexwait(omutex, newseq.lgenval,
+                                       newseq.ugenval, oldtid, mutex->mtxopts.value);
+                       oldtid = os_atomic_load(tidaddr, relaxed);
+               } while (updateval == (uint32_t)-1);
+
+               // returns 0 on succesful update; in firstfit it may fail with 1
+       } while (_pthread_mutex_lock_updatebits(mutex, selfid) == 1);
+       PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
+
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
+{
+       int res, recursive = 0;
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+
+       res = _pthread_mutex_check_init(omutex);
+       if (res != 0) return res;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid, selfid = _pthread_selfid_direct();
+
        if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-               if (*tidaddr == selfid) {
+               if (os_atomic_load(tidaddr, relaxed) == selfid) {
                        if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
                                if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
                                        mutex->mtxopts.options.lock_count++;
-                                       PLOCKSTAT_MUTEX_ACQUIRE(omutex, 1, 0);
+                                       recursive = 1;
                                        res = 0;
                                } else {
                                        res = EAGAIN;
-                                       PLOCKSTAT_MUTEX_ERROR(omutex, res);
                                }
                        } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
                                // <rdar://problem/16261552> as per OpenGroup, trylock cannot
                                // return EDEADLK on a deadlock, it should return EBUSY.
                                res = EBUSY;
-                               PLOCKSTAT_MUTEX_ERROR(omutex, res);
                        } else  { /* PTHREAD_MUTEX_ERRORCHECK */
                                res = EDEADLK;
-                               PLOCKSTAT_MUTEX_ERROR(omutex, res);
                        }
-                       return res;
+                       goto out;
                }
        }
 
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
-       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
-       uint32_t lgenval, ugenval;
-       bool gotlock = false;
-
+       bool gotlock;
        do {
-               oldval64 = *seqaddr;
-               oldtid = *tidaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
 
-               gotlock = ((lgenval & PTH_RWL_EBIT) == 0);
+               gotlock = ((oldseq.lgenval & PTH_RWL_EBIT) == 0);
 
                if (trylock && !gotlock) {
                        // A trylock on a held lock will fail immediately. But since
@@ -676,57 +772,107 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
                } else {
                        // Increment the lock sequence number and force the lock into E+K
                        // mode, whether "gotlock" is true or not.
-                       lgenval += PTHRW_INC;
-                       lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
+                       newseq.lgenval += PTHRW_INC;
+                       newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
                }
-
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
-               
-               // Set S and B bit
-       } while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) == FALSE);
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
 
        if (gotlock) {
-               if (!OSAtomicCompareAndSwap64Barrier(oldtid, selfid, (volatile int64_t *)tidaddr)) {
-                       while (!OSAtomicCompareAndSwap64Barrier(*tidaddr, selfid, (volatile int64_t *)tidaddr));
-               }
+               os_atomic_store(tidaddr, selfid, relaxed);
                res = 0;
                DEBUG_TRACE(psynch_mutex_ulock, omutex, lgenval, ugenval, selfid);
-               PLOCKSTAT_MUTEX_ACQUIRE(omutex, 0, 0);
        } else if (trylock) {
                res = EBUSY;
-               DEBUG_TRACE(psynch_mutex_utrylock_failed, omutex, lgenval, ugenval, oldtid);
-               PLOCKSTAT_MUTEX_ERROR(omutex, res);
+               DEBUG_TRACE(psynch_mutex_utrylock_failed, omutex, lgenval, ugenval,
+                               oldtid);
        } else {
-               PLOCKSTAT_MUTEX_BLOCK(omutex);
-               do {
-                       uint32_t updateval;
-                       do {
-                               updateval = __psynch_mutexwait(omutex, lgenval, ugenval, oldtid, mutex->mtxopts.value);
-                               oldtid = *tidaddr;
-                       } while (updateval == (uint32_t)-1);
-
-                       // returns 0 on succesful update; in firstfit it may fail with 1
-               } while (__mtx_updatebits(mutex, selfid) == 1);
-               res = 0;
-               PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
+               res = _pthread_mutex_lock_wait(omutex, newseq, oldtid);
        }
 
        if (res == 0 && mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
                mutex->mtxopts.options.lock_count = 1;
        }
 
-       PLOCKSTAT_MUTEX_ACQUIRE(omutex, 0, 0);
+out:
+#if PLOCKSTAT
+       if (res == 0) {
+               PLOCKSTAT_MUTEX_ACQUIRE(omutex, recursive, 0);
+       } else {
+               PLOCKSTAT_MUTEX_ERROR(omutex, res);
+       }
+#endif
 
        return res;
 }
 
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+{
+#if PLOCKSTAT || DEBUG_TRACE_POINTS
+       if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED() ||
+                       DEBUG_TRACE_POINTS) {
+               return _pthread_mutex_lock_slow(omutex, trylock);
+       }
+#endif
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_lock_slow(omutex, trylock);
+       }
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
+               return _pthread_mutex_lock_slow(omutex, trylock);
+       }
+
+       bool gotlock;
+       do {
+               newseq = oldseq;
+
+               gotlock = ((oldseq.lgenval & PTH_RWL_EBIT) == 0);
+
+               if (trylock && !gotlock) {
+                       // A trylock on a held lock will fail immediately. But since
+                       // we did not load the sequence words atomically, perform a
+                       // no-op CAS64 to ensure that nobody has unlocked concurrently.
+               } else if (os_likely(gotlock)) {
+                       // Increment the lock sequence number and force the lock into E+K
+                       // mode, whether "gotlock" is true or not.
+                       newseq.lgenval += PTHRW_INC;
+                       newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
+               } else {
+                       return _pthread_mutex_lock_slow(omutex, trylock);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       acquire)));
+
+       if (os_likely(gotlock)) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+               return 0;
+       } else if (trylock) {
+               return EBUSY;
+       } else {
+               __builtin_trap();
+       }
+}
+
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_lock(pthread_mutex_t *mutex)
 {
        return _pthread_mutex_lock(mutex, false);
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_trylock(pthread_mutex_t *mutex)
 {
@@ -737,67 +883,141 @@ pthread_mutex_trylock(pthread_mutex_t *mutex)
  * Unlock a mutex.
  * TODO: Priority inheritance stuff
  */
-int
-pthread_mutex_unlock(pthread_mutex_t *omutex)
+
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
+               uint32_t flags)
 {
        int res;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       uint32_t mtxgen, mtxugen, flags;
 
-       // Initialize static mutexes for compatibility with misbehaving
-       // applications (unlock should not be the first operation on a mutex).
-       if (os_slowpath(!_pthread_mutex_check_init_fast(mutex))) {
-               res = _pthread_mutex_check_init(omutex);
+       uint32_t updateval;
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       updateval = __psynch_mutexdrop(omutex, newseq.lgenval, newseq.ugenval,
+                       os_atomic_load(tidaddr, relaxed), flags);
+
+       if (updateval == (uint32_t)-1) {
+               res = errno;
+
+               if (res == EINTR) {
+                       res = 0;
+               }
                if (res != 0) {
-                       return res;
+                       PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
                }
-       }
-
-       res = __mtx_droplock(mutex, &flags, NULL, &mtxgen, &mtxugen);
-       if (res != 0) {
                return res;
+       } else if ((mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT)
+                       && (updateval & PTH_RWL_PBIT)) {
+               return _pthread_mutex_markprepost(mutex, updateval);
        }
 
-       if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
-               uint32_t updateval;
-               int firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
-               volatile uint64_t *tidaddr;
-               MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       return 0;
+}
 
-               updateval = __psynch_mutexdrop(omutex, mtxgen, mtxugen, *tidaddr, flags);
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_unlock_slow(pthread_mutex_t *omutex)
+{
+       int res;
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       mutex_seq newseq;
+       uint32_t flags;
 
-               if (updateval == (uint32_t)-1) {
-                       res = errno;
+       // Initialize static mutexes for compatibility with misbehaving
+       // applications (unlock should not be the first operation on a mutex).
+       res = _pthread_mutex_check_init(omutex);
+       if (res != 0) return res;
 
-                       if (res == EINTR) {
-                               res = 0;
-                       }
-                       if (res != 0) {
-                               PTHREAD_ABORT("__p_mutexdrop failed with error %d\n", res);
-                       }
-                       return res;
-               } else if (firstfit == 1) {
-                       if ((updateval & PTH_RWL_PBIT) != 0) {
-                               __mtx_markprepost(mutex, updateval, firstfit);
-                       }
-               }
+       res = _pthread_mutex_unlock_updatebits(mutex, &flags, NULL, &newseq.lgenval,
+                       &newseq.ugenval);
+       if (res != 0) return res;
+
+       if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
+               return _pthread_mutex_unlock_drop(omutex, newseq, flags);
        } else {
-               volatile uint64_t *tidaddr;
+               uint64_t *tidaddr;
                MUTEX_GETTID_ADDR(mutex, &tidaddr);
-               DEBUG_TRACE(psynch_mutex_uunlock, omutex, mtxgen, mtxugen, *tidaddr);
+               DEBUG_TRACE(psynch_mutex_uunlock, omutex, mtxgen, mtxugen,
+                               os_atomic_load(tidaddr, relaxed));
        }
 
        return 0;
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
-_pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint32_t static_type)
+pthread_mutex_unlock(pthread_mutex_t *omutex)
 {
+#if PLOCKSTAT || DEBUG_TRACE_POINTS
+       if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED() ||
+                       DEBUG_TRACE_POINTS) {
+               return _pthread_mutex_unlock_slow(omutex);
+       }
+#endif
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_unlock_slow(omutex);
+       }
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       int numwaiters = diff_genseq(oldseq.lgenval, oldseq.ugenval);
+       if (os_unlikely(numwaiters == 0)) {
+               // spurious unlock (unlock of unlocked lock)
+               return 0;
+       }
+
+       // We're giving up the mutex one way or the other, so go ahead and
+       // update the owner to 0 so that once the CAS below succeeds, there
+       // is no stale ownership information. If the CAS of the seqaddr
+       // fails, we may loop, but it's still valid for the owner to be
+       // SWITCHING/0
+       os_atomic_store(tidaddr, 0, relaxed);
+
+       do {
+               newseq = oldseq;
+               newseq.ugenval += PTHRW_INC;
+
+               if (os_likely((oldseq.lgenval & PTHRW_COUNT_MASK) ==
+                               (newseq.ugenval & PTHRW_COUNT_MASK))) {
+                       // our unlock sequence matches to lock sequence, so if the
+                       // CAS is successful, the mutex is unlocked
+
+                       // do not reset Ibit, just K&E
+                       newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
+               } else {
+                       return _pthread_mutex_unlock_slow(omutex);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       release)));
+
+       return 0;
+}
+
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
+               uint32_t static_type)
+{
+       mutex->mtxopts.value = 0;
+       mutex->mtxopts.options.mutex = 1;
        if (attr) {
                if (attr->sig != _PTHREAD_MUTEX_ATTR_SIG) {
                        return EINVAL;
                }
-               mutex->prioceiling = attr->prioceiling;
+               mutex->prioceiling = (int16_t)attr->prioceiling;
                mutex->mtxopts.options.protocol = attr->protocol;
                mutex->mtxopts.options.policy = attr->policy;
                mutex->mtxopts.options.type = attr->type;
@@ -828,31 +1048,61 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint
                }
                mutex->mtxopts.options.pshared = _PTHREAD_DEFAULT_PSHARED;
        }
-       
-       mutex->mtxopts.options.notify = 0;
-       mutex->mtxopts.options.unused = 0;
-       mutex->mtxopts.options.hold = 0;
-       mutex->mtxopts.options.mutex = 1;
-       mutex->mtxopts.options.lock_count = 0;
-
-       mutex->m_tid[0] = 0;
-       mutex->m_tid[1] = 0;
-       mutex->m_seq[0] = 0;
-       mutex->m_seq[1] = 0;
-       mutex->m_seq[2] = 0;
-       mutex->prioceiling = 0;
        mutex->priority = 0;
 
-       mutex->mtxopts.options.misalign = (((uintptr_t)&mutex->m_seq[0]) & 0x7) != 0;
-       
-       // Ensure all contents are properly set before setting signature.
-       OSMemoryBarrier();
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
-       mutex->sig = _PTHREAD_MUTEX_SIG;
+#if PTHREAD_MUTEX_INIT_UNUSED
+       if ((uint32_t*)tidaddr != mutex->m_tid) {
+               mutex->mtxopts.options.misalign = 1;
+               __builtin_memset(mutex->m_tid, 0xff, sizeof(mutex->m_tid));
+       }
+       __builtin_memset(mutex->m_mis, 0xff, sizeof(mutex->m_mis));
+#endif // PTHREAD_MUTEX_INIT_UNUSED
+       *tidaddr = 0;
+       *seqaddr = (mutex_seq){ };
+
+       long sig = _PTHREAD_MUTEX_SIG;
+       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL &&
+                       mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FAIRSHARE) {
+               // rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath
+               sig = _PTHREAD_MUTEX_SIG_fast;
+       }
+
+#if PTHREAD_MUTEX_INIT_UNUSED
+       // For detecting copied mutexes and smashes during debugging
+       uint32_t sig32 = (uint32_t)sig;
+#if defined(__LP64__)
+       uintptr_t guard = ~(uintptr_t)mutex; // use ~ to hide from leaks
+       __builtin_memcpy(mutex->_reserved, &guard, sizeof(guard));
+       mutex->_reserved[2] = sig32;
+       mutex->_reserved[3] = sig32;
+       mutex->_pad = sig32;
+#else
+       mutex->_reserved[0] = sig32;
+#endif
+#endif // PTHREAD_MUTEX_INIT_UNUSED
+
+       // Ensure all contents are properly set before setting signature.
+#if defined(__LP64__)
+       // For binary compatibility reasons we cannot require natural alignment of
+       // the 64bit 'sig' long value in the struct. rdar://problem/21610439
+       uint32_t *sig32_ptr = (uint32_t*)&mutex->sig;
+       uint32_t *sig32_val = (uint32_t*)&sig;
+       *(sig32_ptr + 1) = *(sig32_val + 1);
+       os_atomic_store(sig32_ptr, *sig32_val, release);
+#else
+       os_atomic_store2o(mutex, sig, sig, release);
+#endif
 
        return 0;
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_destroy(pthread_mutex_t *omutex)
 {
@@ -860,32 +1110,32 @@ pthread_mutex_destroy(pthread_mutex_t *omutex)
 
        int res = EINVAL;
 
-       LOCK(mutex->lock);
-       if (mutex->sig == _PTHREAD_MUTEX_SIG) {
-               uint32_t lgenval, ugenval;
-               uint64_t oldval64;
-               volatile uint64_t *seqaddr;
+       _PTHREAD_LOCK(mutex->lock);
+       if (_pthread_mutex_check_signature(mutex)) {
+               mutex_seq *seqaddr;
                MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-               volatile uint64_t *tidaddr;
+
+               mutex_seq seq;
+               mutex_seq_load(seqaddr, &seq);
+
+               uint64_t *tidaddr;
                MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
-               oldval64 = *seqaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
-               if ((*tidaddr == (uint64_t)0) &&
-                   ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK))) {
+               if ((os_atomic_load(tidaddr, relaxed) == 0) &&
+                               (seq.lgenval & PTHRW_COUNT_MASK) ==
+                               (seq.ugenval & PTHRW_COUNT_MASK)) {
                        mutex->sig = _PTHREAD_NO_SIG;
                        res = 0;
                } else {
                        res = EBUSY;
                }
-       } else if ((mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK ) == _PTHREAD_MUTEX_SIG_CMP) {
+       } else if (_pthread_mutex_check_signature_init(mutex)) {
                mutex->sig = _PTHREAD_NO_SIG;
                res = 0;
        }
-       UNLOCK(mutex->lock);
-       
-       return res;     
+       _PTHREAD_UNLOCK(mutex->lock);
+
+       return res;
 }
 
 #endif /* !BUILDING_VARIANT ] */
@@ -909,4 +1159,3 @@ pthread_mutexattr_destroy(pthread_mutexattr_t *attr)
        return 0;
 }
 
-