]> git.saurik.com Git - apple/libpthread.git/blobdiff - src/pthread_mutex.c
libpthread-330.201.1.tar.gz
[apple/libpthread.git] / src / pthread_mutex.c
index d214739ab8ac1e63f70733539215ab84420f157f..edc97ee38cbcdb10948eed3da81fd43d4f748084 100644 (file)
@@ -2,14 +2,14 @@
  * Copyright (c) 2000-2003, 2007, 2008 Apple Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * compliance with the License. Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this
  * file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -17,7 +17,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_LICENSE_HEADER_END@
  */
 /*
 #include "resolver.h"
 #include "internal.h"
 #include "kern/kern_trace.h"
-#include <sys/syscall.h>
 
-#include "os/atomic.h"
+#ifndef BUILDING_VARIANT /* [ */
 
 #ifdef PLOCKSTAT
 #include "plockstat.h"
+/* This function is never called and exists to provide never-fired dtrace
+ * probes so that user d scripts don't get errors.
+ */
+PTHREAD_NOEXPORT PTHREAD_USED
+void
+_plockstat_never_fired(void)
+{
+       PLOCKSTAT_MUTEX_SPIN(NULL);
+       PLOCKSTAT_MUTEX_SPUN(NULL, 0, 0);
+}
 #else /* !PLOCKSTAT */
 #define        PLOCKSTAT_MUTEX_SPIN(x)
 #define        PLOCKSTAT_MUTEX_SPUN(x, y, z)
 #define        PLOCKSTAT_MUTEX_RELEASE(x, y)
 #endif /* PLOCKSTAT */
 
-extern int __unix_conforming;
+#define BLOCK_FAIL_PLOCKSTAT    0
+#define BLOCK_SUCCESS_PLOCKSTAT 1
+
+#define PTHREAD_MUTEX_INIT_UNUSED 1
 
-#ifndef BUILDING_VARIANT
+PTHREAD_NOEXPORT PTHREAD_WEAK
+int _pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock);
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int
-_pthread_mutex_unlock_slow(pthread_mutex_t *omutex);
+int _pthread_mutex_fairshare_lock_slow(_pthread_mutex *mutex, bool trylock);
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int
-_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock);
+int _pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock);
 
-PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into _pthread_mutex_lock
-int
-_pthread_mutex_lock_wait(pthread_mutex_t *omutex, uint64_t newval64, uint64_t oldtid);
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex);
 
-#endif /* BUILDING_VARIANT */
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex);
 
-#define DEBUG_TRACE_POINTS 0
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_corruption_abort(_pthread_mutex *mutex);
+
+extern int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT;
+
+
+int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT =
+               _PTHREAD_MTX_OPT_POLICY_DEFAULT;
+
+static inline bool
+_pthread_mutex_policy_validate(int policy)
+{
+       return (policy >= 0 && policy < _PTHREAD_MUTEX_POLICY_LAST);
+}
+
+static inline int
+_pthread_mutex_policy_to_opt(int policy)
+{
+       switch (policy) {
+       case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+               return _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+       case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+               return _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+       default:
+               __builtin_unreachable();
+       }
+}
+
+PTHREAD_NOEXPORT
+void
+_pthread_mutex_global_init(const char *envp[],
+               struct _pthread_registration_data *registration_data)
+{
+
+       int opt = _PTHREAD_MTX_OPT_POLICY_DEFAULT;
+       if (registration_data->mutex_default_policy) {
+               int policy = registration_data->mutex_default_policy;
+               if (_pthread_mutex_policy_validate(policy)) {
+                       opt = _pthread_mutex_policy_to_opt(policy);
+               }
+       }
+
+       const char *envvar = _simple_getenv(envp, "PTHREAD_MUTEX_DEFAULT_POLICY");
+       if (envvar) {
+               int policy = envvar[0] - '0';
+               if (_pthread_mutex_policy_validate(policy)) {
+                       opt = _pthread_mutex_policy_to_opt(policy);
+               }
+       }
+
+       if (opt != __pthread_mutex_default_opt_policy) {
+               __pthread_mutex_default_opt_policy = opt;
+       }
+}
 
-#if DEBUG_TRACE_POINTS
-extern int __syscall(int number, ...);
-#define DEBUG_TRACE(x, a, b, c, d) __syscall(SYS_kdebug_trace, TRACE_##x, a, b, c, d)
-#else
-#define DEBUG_TRACE(x, a, b, c, d) do { } while(0)
-#endif
 
-#include <machine/cpu_capabilities.h>
 
-static inline int _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint32_t static_type);
+PTHREAD_ALWAYS_INLINE
+static inline int _pthread_mutex_init(_pthread_mutex *mutex,
+               const pthread_mutexattr_t *attr, uint32_t static_type);
+
+typedef union mutex_seq {
+       uint32_t seq[2];
+       struct { uint32_t lgenval; uint32_t ugenval; };
+       struct { uint32_t mgen; uint32_t ugen; };
+       uint64_t seq_LU;
+       uint64_t _Atomic atomic_seq_LU;
+} mutex_seq;
+
+_Static_assert(sizeof(mutex_seq) == 2 * sizeof(uint32_t),
+               "Incorrect mutex_seq size");
 
 #if !__LITTLE_ENDIAN__
 #error MUTEX_GETSEQ_ADDR assumes little endian layout of 2 32-bit sequence words
@@ -106,45 +176,67 @@ static inline int _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutex
 
 PTHREAD_ALWAYS_INLINE
 static inline void
-MUTEX_GETSEQ_ADDR(_pthread_mutex *mutex,
-                 volatile uint64_t **seqaddr)
+MUTEX_GETSEQ_ADDR(_pthread_mutex *mutex, mutex_seq **seqaddr)
 {
-       // addr of m_seq[1] for misaligned, m_seq[0] for aligned mutex struct
-       *seqaddr = (volatile uint64_t *)(((uintptr_t)&mutex->m_seq[1]) & ~0x7ul);
+       // 64-bit aligned address inside m_seq array (&m_seq[0] for aligned mutex)
+       // We don't require more than byte alignment on OS X. rdar://22278325
+       *seqaddr = (void *)(((uintptr_t)mutex->m_seq + 0x7ul) & ~0x7ul);
 }
 
 PTHREAD_ALWAYS_INLINE
 static inline void
-MUTEX_GETTID_ADDR(_pthread_mutex *mutex,
-                                 volatile uint64_t **tidaddr)
+MUTEX_GETTID_ADDR(_pthread_mutex *mutex, uint64_t **tidaddr)
 {
-       // addr of m_tid[1] for misaligned, m_tid[0] for aligned mutex struct
-       *tidaddr = (volatile uint64_t *)(((uintptr_t)&mutex->m_tid[1]) & ~0x7ul);
+       // 64-bit aligned address inside m_tid array (&m_tid[0] for aligned mutex)
+       // We don't require more than byte alignment on OS X. rdar://22278325
+       *tidaddr = (void*)(((uintptr_t)mutex->m_tid + 0x7ul) & ~0x7ul);
 }
 
-#ifndef BUILDING_VARIANT /* [ */
-#ifndef OS_UP_VARIANT_ONLY
+PTHREAD_ALWAYS_INLINE
+static inline void
+mutex_seq_load(mutex_seq *seqaddr, mutex_seq *oldseqval)
+{
+       oldseqval->seq_LU = seqaddr->seq_LU;
+}
 
-#define BLOCK_FAIL_PLOCKSTAT    0
-#define BLOCK_SUCCESS_PLOCKSTAT 1
+#define mutex_seq_atomic_load(seqaddr, oldseqval, m) \
+               mutex_seq_atomic_load_##m(seqaddr, oldseqval)
 
-#ifdef PLOCKSTAT
-/* This function is never called and exists to provide never-fired dtrace
- * probes so that user d scripts don't get errors.
- */
-PTHREAD_NOEXPORT PTHREAD_USED
-void
-_plockstat_never_fired(void) 
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
+static inline bool
+mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
+               mutex_seq *newseqval)
 {
-       PLOCKSTAT_MUTEX_SPIN(NULL);
-       PLOCKSTAT_MUTEX_SPUN(NULL, 0, 0);
+       return os_atomic_cmpxchgv(&seqaddr->atomic_seq_LU, oldseqval->seq_LU,
+                       newseqval->seq_LU, &oldseqval->seq_LU, relaxed);
+}
+
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
+static inline bool
+mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
+               mutex_seq *newseqval)
+{
+       return os_atomic_cmpxchgv(&seqaddr->atomic_seq_LU, oldseqval->seq_LU,
+                       newseqval->seq_LU, &oldseqval->seq_LU, acquire);
+}
+
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
+static inline bool
+mutex_seq_atomic_cmpxchgv_release(mutex_seq *seqaddr, mutex_seq *oldseqval,
+               mutex_seq *newseqval)
+{
+       return os_atomic_cmpxchgv(&seqaddr->atomic_seq_LU, oldseqval->seq_LU,
+                       newseqval->seq_LU, &oldseqval->seq_LU, release);
 }
-#endif // PLOCKSTAT
+
+#define mutex_seq_atomic_cmpxchgv(seqaddr, oldseqval, newseqval, m)\
+               mutex_seq_atomic_cmpxchgv_##m(seqaddr, oldseqval, newseqval)
 
 /*
  * Initialize a mutex variable, possibly with additional attributes.
  * Public interface - so don't trust the lock - initialize it first.
  */
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_init(pthread_mutex_t *omutex, const pthread_mutexattr_t *attr)
 {
@@ -155,43 +247,48 @@ pthread_mutex_init(pthread_mutex_t *omutex, const pthread_mutexattr_t *attr)
                return EBUSY;
 #endif
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       LOCK_INIT(mutex->lock);
+       _PTHREAD_LOCK_INIT(mutex->lock);
        return (_pthread_mutex_init(mutex, attr, 0x7));
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_getprioceiling(const pthread_mutex_t *omutex, int *prioceiling)
 {
        int res = EINVAL;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
        if (_pthread_mutex_check_signature(mutex)) {
-               LOCK(mutex->lock);
+               _PTHREAD_LOCK(mutex->lock);
                *prioceiling = mutex->prioceiling;
                res = 0;
-               UNLOCK(mutex->lock);
+               _PTHREAD_UNLOCK(mutex->lock);
        }
        return res;
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
-pthread_mutex_setprioceiling(pthread_mutex_t *omutex, int prioceiling, int *old_prioceiling)
+pthread_mutex_setprioceiling(pthread_mutex_t *omutex, int prioceiling,
+               int *old_prioceiling)
 {
        int res = EINVAL;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
        if (_pthread_mutex_check_signature(mutex)) {
-               LOCK(mutex->lock);
-               if (prioceiling >= -999 || prioceiling <= 999) {
+               _PTHREAD_LOCK(mutex->lock);
+               if (prioceiling >= -999 && prioceiling <= 999) {
                        *old_prioceiling = mutex->prioceiling;
-                       mutex->prioceiling = prioceiling;
+                       mutex->prioceiling = (int16_t)prioceiling;
                        res = 0;
                }
-               UNLOCK(mutex->lock);
+               _PTHREAD_UNLOCK(mutex->lock);
        }
        return res;
 }
 
+
 int
-pthread_mutexattr_getprioceiling(const pthread_mutexattr_t *attr, int *prioceiling)
+pthread_mutexattr_getprioceiling(const pthread_mutexattr_t *attr,
+               int *prioceiling)
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
@@ -212,6 +309,25 @@ pthread_mutexattr_getprotocol(const pthread_mutexattr_t *attr, int *protocol)
        return res;
 }
 
+int
+pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *attr, int *policy)
+{
+       int res = EINVAL;
+       if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
+               switch (attr->opt) {
+               case _PTHREAD_MTX_OPT_POLICY_FAIRSHARE:
+                       *policy = PTHREAD_MUTEX_POLICY_FAIRSHARE_NP;
+                       res = 0;
+                       break;
+               case _PTHREAD_MTX_OPT_POLICY_FIRSTFIT:
+                       *policy = PTHREAD_MUTEX_POLICY_FIRSTFIT_NP;
+                       res = 0;
+                       break;
+               }
+       }
+       return res;
+}
+
 int
 pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type)
 {
@@ -239,7 +355,7 @@ pthread_mutexattr_init(pthread_mutexattr_t *attr)
 {
        attr->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
        attr->protocol = _PTHREAD_DEFAULT_PROTOCOL;
-       attr->policy = _PTHREAD_MUTEX_POLICY_FAIRSHARE;
+       attr->opt = __pthread_mutex_default_opt_policy;
        attr->type = PTHREAD_MUTEX_DEFAULT;
        attr->sig = _PTHREAD_MUTEX_ATTR_SIG;
        attr->pshared = _PTHREAD_DEFAULT_PSHARED;
@@ -251,7 +367,7 @@ pthread_mutexattr_setprioceiling(pthread_mutexattr_t *attr, int prioceiling)
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
-               if (prioceiling >= -999 || prioceiling <= 999) {
+               if (prioceiling >= -999 && prioceiling <= 999) {
                        attr->prioceiling = prioceiling;
                        res = 0;
                }
@@ -281,12 +397,18 @@ pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *attr, int policy)
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
+               // <rdar://problem/35844519> the first-fit implementation was broken
+               // pre-Liberty so this mapping exists to ensure that the old first-fit
+               // define (2) is no longer valid when used on older systems.
                switch (policy) {
-                       case _PTHREAD_MUTEX_POLICY_FAIRSHARE:
-                       case _PTHREAD_MUTEX_POLICY_FIRSTFIT:
-                               attr->policy = policy;
-                               res = 0;
-                               break;
+               case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+                       attr->opt = _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+                       res = 0;
+                       break;
+               case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+                       attr->opt = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+                       res = 0;
+                       break;
                }
        }
        return res;
@@ -310,23 +432,6 @@ pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type)
        return res;
 }
 
-// XXX remove
-void
-cthread_yield(void) 
-{
-       sched_yield();
-}
-
-void
-pthread_yield_np(void) 
-{
-       sched_yield();
-}
-
-
-/*
- * Temp: till pshared is fixed correctly
- */
 int
 pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
 {
@@ -339,28 +444,135 @@ pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
 
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
 #if __DARWIN_UNIX03
-               if (( pshared == PTHREAD_PROCESS_PRIVATE) || (pshared == PTHREAD_PROCESS_SHARED))
+               if (( pshared == PTHREAD_PROCESS_PRIVATE) ||
+                               (pshared == PTHREAD_PROCESS_SHARED))
 #else /* __DARWIN_UNIX03 */
                if ( pshared == PTHREAD_PROCESS_PRIVATE)
 #endif /* __DARWIN_UNIX03 */
                {
-                       attr->pshared = pshared; 
+                       attr->pshared = pshared;
                        res = 0;
                }
        }
        return res;
 }
 
-PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+PTHREAD_NOEXPORT PTHREAD_NOINLINE PTHREAD_NORETURN
 int
-_pthread_mutex_corruption_abort(_pthread_mutex *mutex);
+_pthread_mutex_corruption_abort(_pthread_mutex *mutex)
+{
+       PTHREAD_ABORT("pthread_mutex corruption: mutex owner changed in the "
+                       "middle of lock/unlock");
+}
+
 
 PTHREAD_NOINLINE
-int
-_pthread_mutex_corruption_abort(_pthread_mutex *mutex)
+static int
+_pthread_mutex_check_init_slow(_pthread_mutex *mutex)
+{
+       int res = EINVAL;
+
+       if (_pthread_mutex_check_signature_init(mutex)) {
+               _PTHREAD_LOCK(mutex->lock);
+               if (_pthread_mutex_check_signature_init(mutex)) {
+                       // initialize a statically initialized mutex to provide
+                       // compatibility for misbehaving applications.
+                       // (unlock should not be the first operation on a mutex)
+                       res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
+               } else if (_pthread_mutex_check_signature(mutex)) {
+                       res = 0;
+               }
+               _PTHREAD_UNLOCK(mutex->lock);
+       } else if (_pthread_mutex_check_signature(mutex)) {
+               res = 0;
+       }
+       if (res != 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+       }
+       return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_check_init(_pthread_mutex *mutex)
+{
+       int res = 0;
+       if (!_pthread_mutex_check_signature(mutex)) {
+               return _pthread_mutex_check_init_slow(mutex);
+       }
+       return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_fairshare(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FAIRSHARE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_firstfit(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_recursive(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_lock_handle_options(_pthread_mutex *mutex, bool trylock,
+               uint64_t *tidaddr)
+{
+       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+               // NORMAL does not do EDEADLK checking
+               return 0;
+       }
+
+       uint64_t selfid = _pthread_selfid_direct();
+       if (os_atomic_load(tidaddr, relaxed) == selfid) {
+               if (_pthread_mutex_is_recursive(mutex)) {
+                       if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
+                               mutex->mtxopts.options.lock_count += 1;
+                               return mutex->mtxopts.options.lock_count;
+                       } else {
+                               return -EAGAIN;
+                       }
+               } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
+                       // <rdar://problem/16261552> as per OpenGroup, trylock cannot
+                       // return EDEADLK on a deadlock, it should return EBUSY.
+                       return -EBUSY;
+               } else { /* PTHREAD_MUTEX_ERRORCHECK */
+                       return -EDEADLK;
+               }
+       }
+
+       // Not recursive, or recursive but first lock.
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_unlock_handle_options(_pthread_mutex *mutex, uint64_t *tidaddr)
 {
-       PTHREAD_ABORT("pthread_mutex corruption: mutex %p owner changed in the middle of lock/unlock");
-       return EINVAL; // NOTREACHED
+       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+               // NORMAL does not do EDEADLK checking
+               return 0;
+       }
+
+       uint64_t selfid = _pthread_selfid_direct();
+       if (os_atomic_load(tidaddr, relaxed) != selfid) {
+               return -EPERM;
+       } else if (_pthread_mutex_is_recursive(mutex) &&
+                       --mutex->mtxopts.options.lock_count) {
+               return 1;
+       }
+       return 0;
 }
 
 /*
@@ -372,100 +584,95 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex)
  * the unlock path will then transition to D=[L5 U4 TID0] and then finally
  * E=[L5 U5 TID0].
  *
- * If a contender comes in after B, the mutex will instead transition to E=[L6+KE U4 TID0]
- * and then F=[L6+KE U4 TID940]. If a contender comes in after C, it will transition to
- * F=[L6+KE U4 TID940] directly. In both cases, the contender will enter the kernel with either
- * mutexwait(U4, TID0) or mutexwait(U4, TID940). The first owner will unlock the mutex
- * by first updating the owner to G=[L6+KE U4 TID-1] and then doing the actual unlock to
- * H=[L6+KE U5 TID=-1] before entering the kernel with mutexdrop(U5, -1) to signal the next waiter
- * (potentially as a prepost). When the waiter comes out of the kernel, it will update the owner to
- * I=[L6+KE U5 TID941]. An unlock at this point is simply J=[L6 U5 TID0] and then K=[L6 U6 TID0].
+ * If a contender comes in after B, the mutex will instead transition to
+ * E=[L6+KE U4 TID0] and then F=[L6+KE U4 TID940]. If a contender comes in after
+ * C, it will transition to F=[L6+KE U4 TID940] directly. In both cases, the
+ * contender will enter the kernel with either mutexwait(U4, TID0) or
+ * mutexwait(U4, TID940). The first owner will unlock the mutex by first
+ * updating the owner to G=[L6+KE U4 TID-1] and then doing the actual unlock to
+ * H=[L6+KE U5 TID=-1] before entering the kernel with mutexdrop(U5, -1) to
+ * signal the next waiter (potentially as a prepost). When the waiter comes out
+ * of the kernel, it will update the owner to I=[L6+KE U5 TID941]. An unlock at
+ * this point is simply J=[L6 U5 TID0] and then K=[L6 U6 TID0].
  *
- * At various points along these timelines, since the sequence words and TID are written independently,
- * a thread may get preempted and another thread might see inconsistent data. In the worst case, another
- * thread may see the TID in the SWITCHING (-1) state or unlocked (0) state for longer because the
- * owning thread was preempted.
+ * At various points along these timelines, since the sequence words and TID are
+ * written independently, a thread may get preempted and another thread might
+ * see inconsistent data. In the worst case, another thread may see the TID in
+ * the SWITCHING (-1) state or unlocked (0) state for longer because the owning
+ * thread was preempted.
  */
 
 /*
- * Drop the mutex unlock references from cond_wait. or mutex_unlock.
+ * Drop the mutex unlock references from cond_wait or mutex_unlock.
  */
 PTHREAD_ALWAYS_INLINE
 static inline int
-_pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+_pthread_mutex_fairshare_unlock_updatebits(_pthread_mutex *mutex,
+               uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
 {
-       bool firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
-       uint32_t lgenval, ugenval, flags;
-       uint64_t oldtid, newtid;
-       volatile uint64_t *tidaddr;
-       MUTEX_GETTID_ADDR(mutex, &tidaddr);
-
-       flags = mutex->mtxopts.value;
+       uint32_t flags = mutex->mtxopts.value;
        flags &= ~_PTHREAD_MTX_OPT_NOTIFY; // no notification by default
 
-       if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-               uint64_t selfid = _pthread_selfid_direct();
-
-               if (*tidaddr != selfid) {
-                       //PTHREAD_ABORT("dropping recur or error mutex not owned by the thread");
-                       PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, EPERM);
-                       return EPERM;
-               } else if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE &&
-                          --mutex->mtxopts.options.lock_count) {
-                       PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
-                       if (flagsp != NULL) {
-                               *flagsp = flags;
-                       }
-                       return 0;
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid, newtid;
+
+       int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+       if (res > 0) {
+               // Valid recursive unlock
+               if (flagsp) {
+                       *flagsp = flags;
                }
+               PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+               return 0;
+       } else if (res < 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+               return -res;
        }
 
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
-       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
-       bool clearprepost, clearnotify, spurious;
+       bool clearnotify, spurious;
        do {
-               oldval64 = *seqaddr;
-               oldtid = *tidaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
 
-               clearprepost = false;
                clearnotify = false;
                spurious = false;
 
-               int numwaiters = diff_genseq(lgenval, ugenval); // pending waiters
-
+               // pending waiters
+               int numwaiters = diff_genseq(oldseq.lgenval, oldseq.ugenval);
                if (numwaiters == 0) {
-                       // spurious unlock; do not touch tid
+                       // spurious unlock (unlock of unlocked lock)
                        spurious = true;
                } else {
-                       ugenval += PTHRW_INC;
+                       newseq.ugenval += PTHRW_INC;
 
-                       if ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK)) {
-                               // our unlock sequence matches to lock sequence, so if the CAS is successful, the mutex is unlocked
+                       if ((oldseq.lgenval & PTHRW_COUNT_MASK) ==
+                                       (newseq.ugenval & PTHRW_COUNT_MASK)) {
+                               // our unlock sequence matches to lock sequence, so if the
+                               // CAS is successful, the mutex is unlocked
 
                                /* do not reset Ibit, just K&E */
-                               lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
+                               newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
                                clearnotify = true;
                                newtid = 0; // clear owner
                        } else {
-                               if (firstfit) {
-                                       lgenval &= ~PTH_RWL_EBIT; // reset E bit so another can acquire meanwhile
-                                       newtid = 0;
-                               } else {
-                                       newtid = PTHREAD_MTX_TID_SWITCHING;
-                               }
+                               newtid = PTHREAD_MTX_TID_SWITCHING;
                                // need to signal others waiting for mutex
                                flags |= _PTHREAD_MTX_OPT_NOTIFY;
                        }
-                       
+
                        if (newtid != oldtid) {
-                               // We're giving up the mutex one way or the other, so go ahead and update the owner to SWITCHING
-                               // or 0 so that once the CAS below succeeds, there is no stale ownership information.
-                               // If the CAS of the seqaddr fails, we may loop, but it's still valid for the owner
-                               // to be SWITCHING/0
+                               // We're giving up the mutex one way or the other, so go ahead
+                               // and update the owner to 0 so that once the CAS below
+                               // succeeds, there is no stale ownership information. If the
+                               // CAS of the seqaddr fails, we may loop, but it's still valid
+                               // for the owner to be SWITCHING/0
                                if (!os_atomic_cmpxchg(tidaddr, oldtid, newtid, relaxed)) {
                                        // we own this mutex, nobody should be updating it except us
                                        return _pthread_mutex_corruption_abort(mutex);
@@ -475,26 +682,17 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, uint32
 
                if (clearnotify || spurious) {
                        flags &= ~_PTHREAD_MTX_OPT_NOTIFY;
-                       if (firstfit && ((lgenval & PTH_RWL_PBIT) != 0)) {
-                               clearprepost = true;
-                               lgenval &= ~PTH_RWL_PBIT;
-                       }
                }
-               
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
 
-       } while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, release));
-
-       if (clearprepost) {
-                __psynch_cvclrprepost(mutex, lgenval, ugenval, 0, 0, lgenval, (flags | _PTHREAD_MTX_OPT_MUTEX));
-       }
+       PTHREAD_TRACE(psynch_mutex_unlock_updatebits, mutex, oldseq.lgenval,
+                       newseq.lgenval, oldtid);
 
        if (mgenp != NULL) {
-               *mgenp = lgenval;
+               *mgenp = newseq.lgenval;
        }
        if (ugenp != NULL) {
-               *ugenp = ugenval;
+               *ugenp = newseq.ugenval;
        }
        if (pmtxp != NULL) {
                *pmtxp = (uint32_t *)mutex;
@@ -506,236 +704,111 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, uint32
        return 0;
 }
 
-PTHREAD_NOEXPORT
-int
-__mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
-{
-       return _pthread_mutex_unlock_updatebits(mutex, flagsp, pmtxp, mgenp, ugenp);
-}
-
 PTHREAD_ALWAYS_INLINE
 static inline int
-_pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
+_pthread_mutex_fairshare_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 {
-       int res = 0;
-       int firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
-       int isebit = 0;
+       bool firstfit = _pthread_mutex_is_firstfit(mutex);
+       bool gotlock = true;
 
-       uint32_t lgenval, ugenval;
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
+       mutex_seq *seqaddr;
        MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-       uint64_t oldtid;
-       volatile uint64_t *tidaddr;
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
        do {
-               do {
-                       oldval64 = *seqaddr;
-                       oldtid = *tidaddr;
-                       lgenval = (uint32_t)oldval64;
-                       ugenval = (uint32_t)(oldval64 >> 32);
-
-                       // E bit was set on first pass through the loop but is no longer
-                       // set. Apparently we spin until it arrives.
-                       // XXX: verify this is desired behavior.
-               } while (isebit && (lgenval & PTH_RWL_EBIT) == 0);
-
-               if (isebit) {
-                       // first fit mutex now has the E bit set. Return 1.
-                       res = 1;
-                       break;
-               }
+               newseq = oldseq;
 
                if (firstfit) {
-                       isebit = (lgenval & PTH_RWL_EBIT) != 0;
-               } else if ((lgenval & (PTH_RWL_KBIT|PTH_RWL_EBIT)) == (PTH_RWL_KBIT|PTH_RWL_EBIT)) {
-                       // fairshare mutex and the bits are already set, just update tid
+                       // firstfit locks can have the lock stolen out from under a locker
+                       // between the unlock from the kernel and this lock path. When this
+                       // happens, we still want to set the K bit before leaving the loop
+                       // (or notice if the lock unlocks while we try to update).
+                       gotlock = !is_rwl_ebit_set(oldseq.lgenval);
+               } else if ((oldseq.lgenval & (PTH_RWL_KBIT | PTH_RWL_EBIT)) == 
+                               (PTH_RWL_KBIT | PTH_RWL_EBIT)) {
+                       // bit are already set, just update the owner tidaddr
                        break;
                }
 
-               // either first fit or no E bit set
-               // update the bits
-               lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
-
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
-
-               // set s and b bit
-               // Retry if CAS fails, or if it succeeds with firstfit and E bit already set
-       } while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, acquire) || (firstfit && isebit));
+               newseq.lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       acquire));
 
-       if (res == 0) {
-               if (!os_atomic_cmpxchg(tidaddr, oldtid, selfid, relaxed)) {
-                       // we own this mutex, nobody should be updating it except us
-                       return _pthread_mutex_corruption_abort(mutex);
-               }
+       if (gotlock) {
+               os_atomic_store(tidaddr, selfid, relaxed);
        }
 
-       return res;
+       PTHREAD_TRACE(psynch_mutex_lock_updatebits, mutex, oldseq.lgenval,
+                       newseq.lgenval, 0);
+
+       // failing to take the lock in firstfit returns 1 to force the caller
+       // to wait in the kernel
+       return gotlock ? 0 : 1;
 }
 
 PTHREAD_NOINLINE
 static int
-__mtx_markprepost(_pthread_mutex *mutex, uint32_t updateval, int firstfit)
+_pthread_mutex_fairshare_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
+               uint64_t oldtid)
 {
-       uint32_t flags;
-       uint32_t lgenval, ugenval;
-       uint64_t oldval64, newval64;
-
-       volatile uint64_t *seqaddr;
-       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
-       if (firstfit != 0 && (updateval & PTH_RWL_PBIT) != 0) {
-               int clearprepost;
-               do {                            
-                       clearprepost = 0;
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
 
-                       flags = mutex->mtxopts.value;
+       PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
+       do {
+               uint32_t updateval;
+               do {
+                       updateval = __psynch_mutexwait(mutex, newseq.lgenval,
+                                       newseq.ugenval, oldtid, mutex->mtxopts.value);
+                       oldtid = os_atomic_load(tidaddr, relaxed);
+               } while (updateval == (uint32_t)-1);
 
-                       oldval64 = *seqaddr;
-                       lgenval = (uint32_t)oldval64;
-                       ugenval = (uint32_t)(oldval64 >> 32);
+               // returns 0 on succesful update; in firstfit it may fail with 1
+       } while (_pthread_mutex_fairshare_lock_updatebits(mutex, selfid) == 1);
+       PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
 
-                       /* update the bits */
-                       if ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK)) {
-                               clearprepost = 1;       
-                               lgenval &= ~PTH_RWL_PBIT;
-                       } else {
-                               lgenval |= PTH_RWL_PBIT;
-                       }
-                       newval64 = (((uint64_t)ugenval) << 32);
-                       newval64 |= lgenval;
-               } while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, release));
-               
-               if (clearprepost != 0) {
-                       __psynch_cvclrprepost(mutex, lgenval, ugenval, 0, 0, lgenval, (flags | _PTHREAD_MTX_OPT_MUTEX));
-               }
-       }
        return 0;
 }
 
-PTHREAD_NOINLINE
-static int
-_pthread_mutex_check_init_slow(pthread_mutex_t *omutex)
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_fairshare_lock_slow(_pthread_mutex *omutex, bool trylock)
 {
-       int res = EINVAL;
+       int res, recursive = 0;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
 
-       if (_pthread_mutex_check_signature_init(mutex)) {
-               LOCK(mutex->lock);
-               if (_pthread_mutex_check_signature_init(mutex)) {
-                       // initialize a statically initialized mutex to provide
-                       // compatibility for misbehaving applications.
-                       // (unlock should not be the first operation on a mutex)
-                       res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
-               } else if (_pthread_mutex_check_signature(mutex)) {
-                       res = 0;
-               }
-               UNLOCK(mutex->lock);
-       } else if (_pthread_mutex_check_signature(mutex)) {
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid, selfid = _pthread_selfid_direct();
+
+       res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+       if (res > 0) {
+               recursive = 1;
                res = 0;
+               goto out;
+       } else if (res < 0) {
+               res = -res;
+               goto out;
        }
-       if (res != 0) {
-               PLOCKSTAT_MUTEX_ERROR(omutex, res);
-       }
-       return res;
-}
-
-PTHREAD_ALWAYS_INLINE
-static inline int
-_pthread_mutex_check_init(pthread_mutex_t *omutex)
-{
-       int res = 0;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       
-       if (!_pthread_mutex_check_signature(mutex)) {
-               return _pthread_mutex_check_init_slow(omutex);
-       }
-       return res;
-}
-
-PTHREAD_NOINLINE
-int
-_pthread_mutex_lock_wait(pthread_mutex_t *omutex, uint64_t newval64, uint64_t oldtid)
-{
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       uint32_t lgenval = (uint32_t)newval64;
-       uint32_t ugenval = (uint32_t)(newval64 >> 32);
-
-       volatile uint64_t *tidaddr;
-       MUTEX_GETTID_ADDR(mutex, &tidaddr);
-       uint64_t selfid = _pthread_selfid_direct();
-
-       PLOCKSTAT_MUTEX_BLOCK(omutex);
-       do {
-               uint32_t updateval;
-               do {
-                       updateval = __psynch_mutexwait(omutex, lgenval, ugenval, oldtid, mutex->mtxopts.value);
-                       oldtid = *tidaddr;
-               } while (updateval == (uint32_t)-1);
-
-               // returns 0 on succesful update; in firstfit it may fail with 1
-       } while (_pthread_mutex_lock_updatebits(mutex, selfid) == 1);
-       PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
-
-       return 0;
-}
-
-int
-_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
-{
-       int res;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
-       res = _pthread_mutex_check_init(omutex);
-       if (res != 0) {
-               return res;
-       }
-
-       uint64_t oldtid;
-       volatile uint64_t *tidaddr;
-       MUTEX_GETTID_ADDR(mutex, &tidaddr);
-       uint64_t selfid = _pthread_selfid_direct();
-
-       if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-               if (*tidaddr == selfid) {
-                       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
-                               if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
-                                       mutex->mtxopts.options.lock_count++;
-                                       PLOCKSTAT_MUTEX_ACQUIRE(omutex, 1, 0);
-                                       res = 0;
-                               } else {
-                                       res = EAGAIN;
-                                       PLOCKSTAT_MUTEX_ERROR(omutex, res);
-                               }
-                       } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
-                               // <rdar://problem/16261552> as per OpenGroup, trylock cannot
-                               // return EDEADLK on a deadlock, it should return EBUSY.
-                               res = EBUSY;
-                               PLOCKSTAT_MUTEX_ERROR(omutex, res);
-                       } else  { /* PTHREAD_MUTEX_ERRORCHECK */
-                               res = EDEADLK;
-                               PLOCKSTAT_MUTEX_ERROR(omutex, res);
-                       }
-                       return res;
-               }
-       }
-
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
-       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
-       uint32_t lgenval, ugenval;
-       bool gotlock = false;
 
+       bool gotlock;
        do {
-               oldval64 = *seqaddr;
-               oldtid = *tidaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
 
-               gotlock = ((lgenval & PTH_RWL_EBIT) == 0);
+               gotlock = ((oldseq.lgenval & PTH_RWL_EBIT) == 0);
 
                if (trylock && !gotlock) {
                        // A trylock on a held lock will fail immediately. But since
@@ -744,265 +817,695 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
                } else {
                        // Increment the lock sequence number and force the lock into E+K
                        // mode, whether "gotlock" is true or not.
-                       lgenval += PTHRW_INC;
-                       lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
+                       newseq.lgenval += PTHRW_INC;
+                       newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
                }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
 
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
-               
-               // Set S and B bit
-       } while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, acquire));
+       PTHREAD_TRACE(psynch_mutex_lock_updatebits, omutex, oldseq.lgenval,
+                       newseq.lgenval, 0);
 
        if (gotlock) {
                os_atomic_store(tidaddr, selfid, relaxed);
                res = 0;
-               DEBUG_TRACE(psynch_mutex_ulock, omutex, lgenval, ugenval, selfid);
-               PLOCKSTAT_MUTEX_ACQUIRE(omutex, 0, 0);
+               PTHREAD_TRACE(psynch_mutex_ulock, omutex, newseq.lgenval,
+                               newseq.ugenval, selfid);
        } else if (trylock) {
                res = EBUSY;
-               DEBUG_TRACE(psynch_mutex_utrylock_failed, omutex, lgenval, ugenval, oldtid);
-               PLOCKSTAT_MUTEX_ERROR(omutex, res);
+               PTHREAD_TRACE(psynch_mutex_utrylock_failed, omutex, newseq.lgenval,
+                               newseq.ugenval, oldtid);
        } else {
-               res = _pthread_mutex_lock_wait(omutex, newval64, oldtid);
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, omutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
+               res = _pthread_mutex_fairshare_lock_wait(mutex, newseq, oldtid);
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, omutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
        }
 
-       if (res == 0 && mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
+       if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
                mutex->mtxopts.options.lock_count = 1;
        }
 
-       PLOCKSTAT_MUTEX_ACQUIRE(omutex, 0, 0);
+out:
+#if PLOCKSTAT
+       if (res == 0) {
+               PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
+       } else {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+       }
+#endif
 
        return res;
 }
 
-#endif // OS_UP_VARIANT_ONLY
-
-PTHREAD_ALWAYS_INLINE
+PTHREAD_NOINLINE
 static inline int
-_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_fairshare_lock(_pthread_mutex *mutex, bool trylock)
 {
-#if PLOCKSTAT || DEBUG_TRACE_POINTS
-       if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED() ||
-                       DEBUG_TRACE_POINTS) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
        }
 #endif
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (!_pthread_mutex_check_signature_fast(mutex)) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
-       }
 
-       uint64_t oldtid;
-       volatile uint64_t *tidaddr;
+       uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t selfid = _pthread_selfid_direct();
 
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
+       mutex_seq *seqaddr;
        MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
-       uint32_t lgenval, ugenval;
-       bool gotlock = false;
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
+       }
 
+       bool gotlock;
        do {
-               oldval64 = *seqaddr;
-               oldtid = *tidaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
+               newseq = oldseq;
 
-               gotlock = ((lgenval & PTH_RWL_EBIT) == 0);
+               gotlock = ((oldseq.lgenval & PTH_RWL_EBIT) == 0);
 
                if (trylock && !gotlock) {
                        // A trylock on a held lock will fail immediately. But since
                        // we did not load the sequence words atomically, perform a
                        // no-op CAS64 to ensure that nobody has unlocked concurrently.
-               } else {
+               } else if (os_likely(gotlock)) {
                        // Increment the lock sequence number and force the lock into E+K
                        // mode, whether "gotlock" is true or not.
-                       lgenval += PTHRW_INC;
-                       lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
+                       newseq.lgenval += PTHRW_INC;
+                       newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
+               } else {
+                       return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
                }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       acquire)));
 
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
-
-               // Set S and B bit
-       } while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, acquire));
-
-       if (os_fastpath(gotlock)) {
+       if (os_likely(gotlock)) {
                os_atomic_store(tidaddr, selfid, relaxed);
                return 0;
        } else if (trylock) {
                return EBUSY;
        } else {
-               return _pthread_mutex_lock_wait(omutex, newval64, oldtid);
+               __builtin_trap();
        }
 }
 
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_lock(pthread_mutex_t *mutex)
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_fairshare_unlock_drop(_pthread_mutex *mutex, mutex_seq newseq,
+               uint32_t flags)
 {
-       return _pthread_mutex_lock(mutex, false);
+       int res;
+       uint32_t updateval;
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, mutex, newseq.lgenval,
+                       newseq.ugenval, os_atomic_load(tidaddr, relaxed));
+
+       updateval = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval,
+                       os_atomic_load(tidaddr, relaxed), flags);
+
+       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, mutex, updateval, 0, 0);
+
+       if (updateval == (uint32_t)-1) {
+               res = errno;
+
+               if (res == EINTR) {
+                       res = 0;
+               }
+               if (res != 0) {
+                       PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
+               }
+               return res;
+       }
+
+       return 0;
 }
 
-PTHREAD_NOEXPORT_VARIANT
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-pthread_mutex_trylock(pthread_mutex_t *mutex)
+_pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex)
 {
-       return _pthread_mutex_lock(mutex, true);
-}
+       int res;
+       mutex_seq newseq;
+       uint32_t flags;
 
-#ifndef OS_UP_VARIANT_ONLY
-/*
- * Unlock a mutex.
- * TODO: Priority inheritance stuff
- */
+       res = _pthread_mutex_fairshare_unlock_updatebits(mutex, &flags, NULL,
+                       &newseq.lgenval, &newseq.ugenval);
+       if (res != 0) return res;
+
+       if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
+               return _pthread_mutex_fairshare_unlock_drop(mutex, newseq, flags);
+       } else {
+               uint64_t *tidaddr;
+               MUTEX_GETTID_ADDR(mutex, &tidaddr);
+               PTHREAD_TRACE(psynch_mutex_uunlock, mutex, newseq.lgenval,
+                               newseq.ugenval, os_atomic_load(tidaddr, relaxed));
+       }
+
+       return 0;
+}
 
 PTHREAD_NOINLINE
 static int
-_pthread_mutex_unlock_drop(pthread_mutex_t *omutex, uint64_t newval64, uint32_t flags)
+_pthread_mutex_fairshare_unlock(_pthread_mutex *mutex)
 {
-       int res;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       uint32_t lgenval = (uint32_t)newval64;
-       uint32_t ugenval = (uint32_t)(newval64 >> 32);
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_fairshare_unlock_slow(mutex);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_fairshare_unlock_slow(mutex);
+       }
+#endif
 
-       uint32_t updateval;
-       int firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
-       volatile uint64_t *tidaddr;
+       uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
-       updateval = __psynch_mutexdrop(omutex, lgenval, ugenval, *tidaddr, flags);
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
-       if (updateval == (uint32_t)-1) {
-               res = errno;
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       int numwaiters = diff_genseq(oldseq.lgenval, oldseq.ugenval);
+       if (os_unlikely(numwaiters == 0)) {
+               // spurious unlock (unlock of unlocked lock)
+               return 0;
+       }
+
+       // We're giving up the mutex one way or the other, so go ahead and
+       // update the owner to 0 so that once the CAS below succeeds, there
+       // is no stale ownership information. If the CAS of the seqaddr
+       // fails, we may loop, but it's still valid for the owner to be
+       // SWITCHING/0
+       os_atomic_store(tidaddr, 0, relaxed);
+
+       do {
+               newseq = oldseq;
+               newseq.ugenval += PTHRW_INC;
+
+               if (os_likely((oldseq.lgenval & PTHRW_COUNT_MASK) ==
+                               (newseq.ugenval & PTHRW_COUNT_MASK))) {
+                       // if we succeed in performing the CAS we can be sure of a fast
+                       // path (only needing the CAS) unlock, if:
+                       //   a. our lock and unlock sequence are equal
+                       //   b. we don't need to clear an unlock prepost from the kernel
+
+                       // do not reset Ibit, just K&E
+                       newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
+               } else {
+                       return _pthread_mutex_fairshare_unlock_slow(mutex);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       release)));
+
+       return 0;
+}
+
+#pragma mark firstfit
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_unlock_updatebits(_pthread_mutex *mutex,
+               uint32_t *flagsp, uint32_t **mutexp, uint32_t *lvalp, uint32_t *uvalp)
+{
+       uint32_t flags = mutex->mtxopts.value & ~_PTHREAD_MTX_OPT_NOTIFY;
+       bool kernel_wake;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid;
+
+       int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+       if (res > 0) {
+               // Valid recursive unlock
+               if (flagsp) {
+                       *flagsp = flags;
+               }
+               PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+               return 0;
+       } else if (res < 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+               return -res;
+       }
+
+       do {
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
+               // More than one kernel waiter means we need to do a wake.
+               kernel_wake = diff_genseq(oldseq.lgenval, oldseq.ugenval) > 0;
+               newseq.lgenval &= ~PTH_RWL_EBIT;
+
+               if (kernel_wake) {
+                       // Going to the kernel post-unlock removes a single waiter unlock
+                       // from the mutex counts.
+                       newseq.ugenval += PTHRW_INC;
+               }
+
+               if (oldtid != 0) {
+                       if (!os_atomic_cmpxchg(tidaddr, oldtid, 0, relaxed)) {
+                               return _pthread_mutex_corruption_abort(mutex);
+                       }
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
+
+       PTHREAD_TRACE(psynch_ffmutex_unlock_updatebits, mutex, oldseq.lgenval,
+                       newseq.lgenval, newseq.ugenval);
+
+       if (kernel_wake) {
+               // We choose to return this out via flags because the condition
+               // variable also uses this to determine whether to do a kernel wake
+               // when beginning a cvwait.
+               flags |= _PTHREAD_MTX_OPT_NOTIFY;
+       }
+       if (lvalp) {
+               *lvalp = newseq.lgenval;
+       }
+       if (uvalp) {
+               *uvalp = newseq.ugenval;
+       }
+       if (mutexp) {
+               *mutexp = (uint32_t *)mutex;
+       }
+       if (flagsp) {
+               *flagsp = flags;
+       }
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_wake(_pthread_mutex *mutex, mutex_seq newseq,
+               uint32_t flags)
+{
+       PTHREAD_TRACE(psynch_ffmutex_wake, mutex, newseq.lgenval, newseq.ugenval,
+                       0);
+       int res = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval, 0,
+                       flags);
 
+       if (res == -1) {
+               res = errno;
                if (res == EINTR) {
                        res = 0;
                }
                if (res != 0) {
-                       PTHREAD_ABORT("__p_mutexdrop failed with error %d", res);
+                       PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
                }
                return res;
-       } else if (firstfit == 1) {
-               if ((updateval & PTH_RWL_PBIT) != 0) {
-                       __mtx_markprepost(mutex, updateval, firstfit);
+       }
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex)
+{
+       mutex_seq newseq;
+       uint32_t flags;
+       int res;
+
+       res = _pthread_mutex_firstfit_unlock_updatebits(mutex, &flags, NULL,
+                       &newseq.lgenval, &newseq.ugenval);
+       if (res != 0) return res;
+
+       if (flags & _PTHREAD_MTX_OPT_NOTIFY) {
+               return _pthread_mutex_firstfit_wake(mutex, newseq, flags);
+       }
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static bool
+_pthread_mutex_firstfit_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid,
+               mutex_seq *newseqp)
+{
+       bool gotlock;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+                       oldseq.lgenval, oldseq.ugenval, 0);
+
+       do {
+               newseq = oldseq;
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+               if (gotlock) {
+                       // If we see the E-bit cleared, we should just attempt to take it.
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       // If we failed to get the lock then we need to put ourselves back
+                       // in the queue of waiters. The previous unlocker that woke us out
+                       // of the kernel consumed the S-count for our previous wake. So
+                       // take another ticket on L and go back in the kernel to sleep.
+                       newseq.lgenval += PTHRW_INC;
                }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+       if (gotlock) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+       }
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+                       newseq.lgenval, newseq.ugenval, 0);
+
+       if (newseqp) {
+               *newseqp = newseq;
        }
+       return gotlock;
+}
+
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
+               uint64_t oldtid)
+{
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
+
+       PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
+       do {
+               uint32_t uval;
+               do {
+                       PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_START, mutex,
+                                       newseq.lgenval, newseq.ugenval, mutex->mtxopts.value);
+                       uval = __psynch_mutexwait(mutex, newseq.lgenval, newseq.ugenval,
+                                       oldtid, mutex->mtxopts.value);
+                       PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_END, mutex,
+                                       uval, 0, 0);
+                       oldtid = os_atomic_load(tidaddr, relaxed);
+               } while (uval == (uint32_t)-1);
+       } while (!_pthread_mutex_firstfit_lock_updatebits(mutex, selfid, &newseq));
+       PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
 
        return 0;
 }
 
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-_pthread_mutex_unlock_slow(pthread_mutex_t *omutex)
+_pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock)
 {
-       int res;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       uint32_t mtxgen, mtxugen, flags;
+       int res, recursive = 0;
 
-       // Initialize static mutexes for compatibility with misbehaving
-       // applications (unlock should not be the first operation on a mutex).
-       res = _pthread_mutex_check_init(omutex);
-       if (res != 0) {
-               return res;
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid, selfid = _pthread_selfid_direct();
+
+       res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+       if (res > 0) {
+               recursive = 1;
+               res = 0;
+               goto out;
+       } else if (res < 0) {
+               res = -res;
+               goto out;
        }
 
-       res = _pthread_mutex_unlock_updatebits(mutex, &flags, NULL, &mtxgen, &mtxugen);
-       if (res != 0) {
-               return res;
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+                       oldseq.lgenval, oldseq.ugenval, 0);
+
+       bool gotlock;
+       do {
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
+
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+               if (trylock && !gotlock) {
+                       // We still want to perform the CAS here, even though it won't
+                       // do anything so that it fails if someone unlocked while we were
+                       // in the loop
+               } else if (gotlock) {
+                       // In first-fit, getting the lock simply adds the E-bit
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       // Failed to get the lock, increment the L-val and go to
+                       // the kernel to sleep
+                       newseq.lgenval += PTHRW_INC;
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+                       newseq.lgenval, newseq.ugenval, 0);
+
+       if (gotlock) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+               res = 0;
+               PTHREAD_TRACE(psynch_mutex_ulock, mutex, newseq.lgenval,
+                               newseq.ugenval, selfid);
+       } else if (trylock) {
+               res = EBUSY;
+               PTHREAD_TRACE(psynch_mutex_utrylock_failed, mutex, newseq.lgenval,
+                               newseq.ugenval, oldtid);
+       } else {
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, mutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
+               res = _pthread_mutex_firstfit_lock_wait(mutex, newseq, oldtid);
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, mutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
        }
 
-       if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
-               uint64_t newval64;
-               newval64 = (((uint64_t)mtxugen) << 32);
-               newval64 |= mtxgen;
-               return _pthread_mutex_unlock_drop(omutex, newval64, flags);
+       if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
+               mutex->mtxopts.options.lock_count = 1;
+       }
+
+out:
+#if PLOCKSTAT
+       if (res == 0) {
+               PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
        } else {
-               volatile uint64_t *tidaddr;
-               MUTEX_GETTID_ADDR(mutex, &tidaddr);
-               DEBUG_TRACE(psynch_mutex_uunlock, omutex, mtxgen, mtxugen, *tidaddr);
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
        }
+#endif
+       return res;
+}
 
-       return 0;
+#pragma mark fast path
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
+               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+{
+       if (_pthread_mutex_is_fairshare(mutex)) {
+               return _pthread_mutex_fairshare_unlock_updatebits(mutex, flagsp,
+                               pmtxp, mgenp, ugenp);
+       }
+       return _pthread_mutex_firstfit_unlock_updatebits(mutex, flagsp, pmtxp,
+                       mgenp, ugenp);
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock)
+{
+       int res;
+
+       res = _pthread_mutex_check_init(mutex);
+       if (res != 0) return res;
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
+       }
+       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
 }
 
-#endif // OS_UP_VARIANT_ONLY
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_unlock_init_slow(_pthread_mutex *mutex)
+{
+       int res;
+
+       // Initialize static mutexes for compatibility with misbehaving
+       // applications (unlock should not be the first operation on a mutex).
+       res = _pthread_mutex_check_init(mutex);
+       if (res != 0) return res;
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_unlock_slow(mutex);
+       }
+       return _pthread_mutex_firstfit_unlock_slow(mutex);
+}
 
 PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_unlock(pthread_mutex_t *omutex)
 {
-#if PLOCKSTAT || DEBUG_TRACE_POINTS
-       if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED() ||
-                       DEBUG_TRACE_POINTS) {
-               return _pthread_mutex_unlock_slow(omutex);
-       }
-#endif
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (!_pthread_mutex_check_signature_fast(mutex)) {
-               return _pthread_mutex_unlock_slow(omutex);
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_unlock_init_slow(mutex);
        }
 
-       volatile uint64_t *tidaddr;
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_unlock(mutex);
+       }
+
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_firstfit_unlock_slow(mutex);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_firstfit_unlock_slow(mutex);
+       }
+#endif
+
+       /*
+        * This is the first-fit fast path. The fairshare fast-ish path is in
+        * _pthread_mutex_firstfit_unlock()
+        */
+       uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
-       uint64_t oldval64, newval64;
-       volatile uint64_t *seqaddr;
+       mutex_seq *seqaddr;
        MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
-       uint32_t lgenval, ugenval;
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
 
-       do {
-               oldval64 = *seqaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
+       // We're giving up the mutex one way or the other, so go ahead and
+       // update the owner to 0 so that once the CAS below succeeds, there
+       // is no stale ownership information. If the CAS of the seqaddr
+       // fails, we may loop, but it's still valid for the owner to be
+       // SWITCHING/0
+       os_atomic_store(tidaddr, 0, relaxed);
 
-               int numwaiters = diff_genseq(lgenval, ugenval); // pending waiters
+       do {
+               newseq = oldseq;
 
-               if (numwaiters == 0) {
-                       // spurious unlock; do not touch tid
+               if (diff_genseq(oldseq.lgenval, oldseq.ugenval) == 0) {
+                       // No outstanding waiters in kernel, we can simply drop the E-bit
+                       // and return.
+                       newseq.lgenval &= ~PTH_RWL_EBIT;
                } else {
-                       ugenval += PTHRW_INC;
+                       return _pthread_mutex_firstfit_unlock_slow(mutex);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       release)));
+
+       return 0;
+}
 
-                       if ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK)) {
-                               // our unlock sequence matches to lock sequence, so if the CAS is successful, the mutex is unlocked
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_lock(pthread_mutex_t *omutex, bool trylock)
+{
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_lock_init_slow(mutex, trylock);
+       }
 
-                               /* do not reset Ibit, just K&E */
-                               lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
-                       } else {
-                               return _pthread_mutex_unlock_slow(omutex);
-                       }
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_lock(mutex, trylock);
+       }
 
-                       // We're giving up the mutex one way or the other, so go ahead and update the owner
-                       // to 0 so that once the CAS below succeeds, there is no stale ownership information.
-                       // If the CAS of the seqaddr fails, we may loop, but it's still valid for the owner
-                       // to be SWITCHING/0
-                       os_atomic_store(tidaddr, 0, relaxed);
-               }
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+       }
+#endif
+
+       /*
+        * This is the first-fit fast path. The fairshare fast-ish path is in
+        * _pthread_mutex_firstfit_lock()
+        */
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
 
-               newval64 = (((uint64_t)ugenval) << 32);
-               newval64 |= lgenval;
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
-       } while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, release));
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
 
-       return 0;
+       if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
+               return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+       }
+
+       bool gotlock;
+       do {
+               newseq = oldseq;
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+               if (trylock && !gotlock) {
+                       // A trylock on a held lock will fail immediately. But since
+                       // we did not load the sequence words atomically, perform a
+                       // no-op CAS64 to ensure that nobody has unlocked concurrently.
+               } else if (os_likely(gotlock)) {
+                       // In first-fit, getting the lock simply adds the E-bit
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       acquire)));
+
+       if (os_likely(gotlock)) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+               return 0;
+       } else if (trylock) {
+               return EBUSY;
+       } else {
+               __builtin_trap();
+       }
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+       return _pthread_mutex_firstfit_lock(mutex, false);
 }
 
-#ifndef OS_UP_VARIANT_ONLY
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+       return _pthread_mutex_firstfit_lock(mutex, true);
+}
 
 
+PTHREAD_ALWAYS_INLINE
 static inline int
-_pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint32_t static_type)
+_pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
+               uint32_t static_type)
 {
+       mutex->mtxopts.value = 0;
+       mutex->mtxopts.options.mutex = 1;
        if (attr) {
                if (attr->sig != _PTHREAD_MUTEX_ATTR_SIG) {
                        return EINVAL;
                }
-               mutex->prioceiling = attr->prioceiling;
+               mutex->prioceiling = (int16_t)attr->prioceiling;
                mutex->mtxopts.options.protocol = attr->protocol;
-               mutex->mtxopts.options.policy = attr->policy;
+               mutex->mtxopts.options.policy = attr->opt;
                mutex->mtxopts.options.type = attr->type;
                mutex->mtxopts.options.pshared = attr->pshared;
        } else {
@@ -1025,44 +1528,51 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint
                mutex->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
                mutex->mtxopts.options.protocol = _PTHREAD_DEFAULT_PROTOCOL;
                if (static_type != 3) {
-                       mutex->mtxopts.options.policy = _PTHREAD_MUTEX_POLICY_FAIRSHARE;
+                       mutex->mtxopts.options.policy = __pthread_mutex_default_opt_policy;
                } else {
-                       mutex->mtxopts.options.policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+                       mutex->mtxopts.options.policy = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
                }
                mutex->mtxopts.options.pshared = _PTHREAD_DEFAULT_PSHARED;
        }
-       
-       mutex->mtxopts.options.notify = 0;
-       mutex->mtxopts.options.unused = 0;
-       mutex->mtxopts.options.hold = 0;
-       mutex->mtxopts.options.mutex = 1;
-       mutex->mtxopts.options.lock_count = 0;
-
-       mutex->m_tid[0] = 0;
-       mutex->m_tid[1] = 0;
-       mutex->m_seq[0] = 0;
-       mutex->m_seq[1] = 0;
-       mutex->m_seq[2] = 0;
-       mutex->prioceiling = 0;
        mutex->priority = 0;
 
-       mutex->mtxopts.options.misalign = (((uintptr_t)&mutex->m_seq[0]) & 0x7ul) != 0;
-       if (mutex->mtxopts.options.misalign) {
-               mutex->m_tid[0] = ~0u;
-       } else {
-               mutex->m_seq[2] = ~0u;
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+#if PTHREAD_MUTEX_INIT_UNUSED
+       if ((uint32_t*)tidaddr != mutex->m_tid) {
+               mutex->mtxopts.options.misalign = 1;
+               __builtin_memset(mutex->m_tid, 0xff, sizeof(mutex->m_tid));
        }
+       __builtin_memset(mutex->m_mis, 0xff, sizeof(mutex->m_mis));
+#endif // PTHREAD_MUTEX_INIT_UNUSED
+       *tidaddr = 0;
+       *seqaddr = (mutex_seq){ };
 
        long sig = _PTHREAD_MUTEX_SIG;
        if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL &&
-                       mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FAIRSHARE) {
+                       (_pthread_mutex_is_fairshare(mutex) ||
+                        _pthread_mutex_is_firstfit(mutex))) {
                // rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath
                sig = _PTHREAD_MUTEX_SIG_fast;
        }
 
-       // unused, purely for detecting copied mutexes and smashes during debugging:
-       mutex->reserved2[0] = ~(uintptr_t)mutex; // use ~ to hide from leaks
-       mutex->reserved2[1] = (uintptr_t)sig;
+#if PTHREAD_MUTEX_INIT_UNUSED
+       // For detecting copied mutexes and smashes during debugging
+       uint32_t sig32 = (uint32_t)sig;
+#if defined(__LP64__)
+       uintptr_t guard = ~(uintptr_t)mutex; // use ~ to hide from leaks
+       __builtin_memcpy(mutex->_reserved, &guard, sizeof(guard));
+       mutex->_reserved[2] = sig32;
+       mutex->_reserved[3] = sig32;
+       mutex->_pad = sig32;
+#else
+       mutex->_reserved[0] = sig32;
+#endif
+#endif // PTHREAD_MUTEX_INIT_UNUSED
 
        // Ensure all contents are properly set before setting signature.
 #if defined(__LP64__)
@@ -1070,7 +1580,7 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint
        // the 64bit 'sig' long value in the struct. rdar://problem/21610439
        uint32_t *sig32_ptr = (uint32_t*)&mutex->sig;
        uint32_t *sig32_val = (uint32_t*)&sig;
-       *(sig32_ptr+1) = *(sig32_val+1);
+       *(sig32_ptr + 1) = *(sig32_val + 1);
        os_atomic_store(sig32_ptr, *sig32_val, release);
 #else
        os_atomic_store2o(mutex, sig, sig, release);
@@ -1079,6 +1589,7 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint
        return 0;
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_destroy(pthread_mutex_t *omutex)
 {
@@ -1086,20 +1597,20 @@ pthread_mutex_destroy(pthread_mutex_t *omutex)
 
        int res = EINVAL;
 
-       LOCK(mutex->lock);
+       _PTHREAD_LOCK(mutex->lock);
        if (_pthread_mutex_check_signature(mutex)) {
-               uint32_t lgenval, ugenval;
-               uint64_t oldval64;
-               volatile uint64_t *seqaddr;
+               mutex_seq *seqaddr;
                MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-               volatile uint64_t *tidaddr;
+
+               mutex_seq seq;
+               mutex_seq_load(seqaddr, &seq);
+
+               uint64_t *tidaddr;
                MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
-               oldval64 = *seqaddr;
-               lgenval = (uint32_t)oldval64;
-               ugenval = (uint32_t)(oldval64 >> 32);
-               if ((*tidaddr == (uint64_t)0) &&
-                   ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK))) {
+               if ((os_atomic_load(tidaddr, relaxed) == 0) &&
+                               (seq.lgenval & PTHRW_COUNT_MASK) ==
+                               (seq.ugenval & PTHRW_COUNT_MASK)) {
                        mutex->sig = _PTHREAD_NO_SIG;
                        res = 0;
                } else {
@@ -1109,16 +1620,13 @@ pthread_mutex_destroy(pthread_mutex_t *omutex)
                mutex->sig = _PTHREAD_NO_SIG;
                res = 0;
        }
-       UNLOCK(mutex->lock);
-       
-       return res;     
-}
+       _PTHREAD_UNLOCK(mutex->lock);
 
-#endif // OS_UP_VARIANT_ONLY
+       return res;
+}
 
 #endif /* !BUILDING_VARIANT ] */
 
-#ifndef OS_UP_VARIANT_ONLY
 /*
  * Destroy a mutex attribute structure.
  */
@@ -1138,4 +1646,3 @@ pthread_mutexattr_destroy(pthread_mutexattr_t *attr)
        return 0;
 }
 
-#endif // OS_UP_VARIANT_ONLY