xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_lock.s
index ed2710ccca30e80c1016643144175a943bdccd03..f54e040a1a78037a95a7da3630c55894fe5a0c27 100644 (file)
@@ -1,16 +1,19 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -20,7 +23,7 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  * the terms and conditions for use and redistribution.
  */
 
-#include <cpus.h>
 #include <mach_rt.h>
-#include <platforms.h>
 #include <mach_ldebug.h>
 #include <i386/asm.h>
-#include <kern/etap_options.h>
-
+#include <i386/eflags.h>
+#include <i386/trap.h>
+#include <config_dtrace.h>
+#include <i386/mp.h>
+       
 #include "assym.s"
 
-/*
- *     When performance isn't the only concern, it's
- *     nice to build stack frames...
- */
-#define        BUILD_STACK_FRAMES   ((MACH_LDEBUG || ETAP_LOCK_TRACE) && MACH_KDB)
+#define        PAUSE           rep; nop
 
-#if    BUILD_STACK_FRAMES
+#include <i386/pal_lock_asm.h>
 
-#define        L_PC            4(%ebp)
-#define        L_ARG0          8(%ebp)
-#define        L_ARG1          12(%ebp)
+#define LEAF_ENTRY(name)       \
+       Entry(name)
 
-#define SWT_HI          -4(%ebp)
-#define SWT_LO          -8(%ebp)
-#define MISSED          -12(%ebp)
+#define LEAF_ENTRY2(n1,n2)     \
+       Entry(n1);              \
+       Entry(n2)
 
-#else   /* BUILD_STACK_FRAMES */
+#define LEAF_RET               \
+       ret
 
-#undef FRAME
-#undef EMARF
-#define        FRAME
-#define        EMARF
-#define        L_PC            (%esp)
-#define        L_ARG0          4(%esp)
-#define        L_ARG1          8(%esp)
+/* Non-leaf routines always have a stack frame: */
 
-#endif   /* BUILD_STACK_FRAMES */
+#define NONLEAF_ENTRY(name)    \
+       Entry(name);            \
+       FRAME
 
+#define NONLEAF_ENTRY2(n1,n2)  \
+       Entry(n1);              \
+       Entry(n2);              \
+       FRAME
 
-#define        M_ILK                   (%edx)
-#define        M_LOCKED                1(%edx)
-#define        M_WAITERS               2(%edx)
-#define        M_PROMOTED_PRI  4(%edx)
-#if    MACH_LDEBUG
-#define        M_TYPE                  6(%edx)
-#define        M_PC                    10(%edx)
-#define        M_THREAD                14(%edx)
-#endif /* MACH_LDEBUG */
+#define NONLEAF_RET            \
+       EMARF;                  \
+       ret
 
-#include <i386/AT386/mp/mp.h>
-#if    (NCPUS > 1)
-#define        CX(addr,reg)    addr(,reg,4)
-#else
-#define        CPU_NUMBER(reg)
-#define        CX(addr,reg)    addr
-#endif /* (NCPUS > 1) */
+
+/* For x86_64, the varargs ABI requires that %al indicate
+ * how many SSE register contain arguments. In our case, 0 */
+#define ALIGN_STACK()          and  $0xFFFFFFFFFFFFFFF0, %rsp ;
+#define LOAD_STRING_ARG0(label)        leaq label(%rip), %rdi ;
+#define LOAD_ARG1(x)           mov x, %esi ;
+#define LOAD_PTR_ARG1(x)       mov x, %rsi ;
+#define CALL_PANIC()           xorb %al,%al ; call EXT(panic) ;
+
+#define        CHECK_UNLOCK(current, owner)                            \
+       cmp     current, owner                          ;       \
+       je      1f                                      ;       \
+       ALIGN_STACK()                                   ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
+       hlt                                             ;       \
+       .data                                           ;       \
+2:     String  "Mutex unlock attempted from non-owner thread"; \
+       .text                                           ;       \
+1:
 
 #if    MACH_LDEBUG
 /*
  *  Routines for general lock debugging.
  */
-#define        S_TYPE          4(%edx)
-#define        S_PC            8(%edx)
-#define        S_THREAD        12(%edx)
-#define        S_DURATIONH     16(%edx)
-#define        S_DURATIONL     20(%edx)
 
 /* 
  * Checks for expected lock types and calls "panic" on
 #define        CHECK_MUTEX_TYPE()                                      \
        cmpl    $ MUTEX_TAG,M_TYPE                      ;       \
        je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
+       ALIGN_STACK()                                   ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
        hlt                                             ;       \
        .data                                           ;       \
 2:     String  "not a mutex!"                          ;       \
        .text                                           ;       \
 1:
 
-#define        CHECK_SIMPLE_LOCK_TYPE()                                \
-       cmpl    $ SIMPLE_LOCK_TAG,S_TYPE                ;       \
-       je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
-       hlt                                             ;       \
-       .data                                           ;       \
-2:     String  "not a simple lock!"                    ;       \
-       .text                                           ;       \
-1:
-
 /*
  * If one or more simplelocks are currently held by a thread,
  * an attempt to acquire a mutex will cause this check to fail
  * (since a mutex lock may context switch, holding a simplelock
  * is not a good thing).
  */
-#if    0 /*MACH_RT - 11/12/99 - lion@apple.com disable check for now*/
+#if    MACH_RT
 #define CHECK_PREEMPTION_LEVEL()                               \
-       movl    $ CPD_PREEMPTION_LEVEL,%eax             ;       \
-       cmpl    $0,%gs:(%eax)                           ;       \
+       cmpl    $0,%gs:CPU_HIBERNATE                    ;       \
+       jne     1f                                      ;       \
+       cmpl    $0,%gs:CPU_PREEMPTION_LEVEL             ;       \
        je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
+       ALIGN_STACK()                                   ;       \
+       movl    %gs:CPU_PREEMPTION_LEVEL, %eax          ;       \
+       LOAD_ARG1(%eax)                                 ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
        hlt                                             ;       \
        .data                                           ;       \
-2:     String  "preemption_level != 0!"                ;       \
+2:     String  "preemption_level(%d) != 0!"            ;       \
        .text                                           ;       \
 1:
 #else  /* MACH_RT */
 #define        CHECK_PREEMPTION_LEVEL()
 #endif /* MACH_RT */
 
-#define        CHECK_NO_SIMPLELOCKS()                                  \
-       movl    $ CPD_SIMPLE_LOCK_COUNT,%eax            ;       \
-       cmpl    $0,%gs:(%eax)                           ;       \
-       je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
-       hlt                                             ;       \
-       .data                                           ;       \
-2:     String  "simple_locks_held!"                    ;       \
-       .text                                           ;       \
-1:
-
-/* 
- * Verifies return to the correct thread in "unlock" situations.
- */
-#define        CHECK_THREAD(thd)                                       \
-       movl    $ CPD_ACTIVE_THREAD,%eax                        ;       \
-       movl    %gs:(%eax),%ecx                         ;       \
-       testl   %ecx,%ecx                               ;       \
-       je      1f                                      ;       \
-       cmpl    %ecx,thd                                ;       \
-       je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
-       hlt                                             ;       \
-       .data                                           ;       \
-2:     String  "wrong thread!"                         ;       \
-       .text                                           ;       \
-1:
-
-#define        CHECK_MYLOCK(thd)                                       \
-       movl    $ CPD_ACTIVE_THREAD,%eax                        ;       \
-       movl    %gs:(%eax),%ecx                         ;       \
-       testl   %ecx,%ecx                               ;       \
-       je      1f                                      ;       \
-       cmpl    %ecx,thd                                ;       \
+#define        CHECK_MYLOCK(current, owner)                            \
+       cmp     current, owner                          ;       \
        jne     1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
+       ALIGN_STACK()                                   ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
        hlt                                             ;       \
        .data                                           ;       \
-2:     String  "mylock attempt!"                       ;       \
+2:     String  "Attempt to recursively lock a non-recursive lock";     \
        .text                                           ;       \
 1:
 
-#define        METER_SIMPLE_LOCK_LOCK(reg)                             \
-       pushl   reg                                     ;       \
-       call    EXT(meter_simple_lock)                  ;       \
-       popl    reg
-
-#define        METER_SIMPLE_LOCK_UNLOCK(reg)                           \
-       pushl   reg                                     ;       \
-       call    EXT(meter_simple_unlock)                ;       \
-       popl    reg
-
 #else  /* MACH_LDEBUG */
 #define        CHECK_MUTEX_TYPE()
-#define        CHECK_SIMPLE_LOCK_TYPE
-#define        CHECK_THREAD(thd)
 #define CHECK_PREEMPTION_LEVEL()
-#define        CHECK_NO_SIMPLELOCKS()
 #define        CHECK_MYLOCK(thd)
-#define        METER_SIMPLE_LOCK_LOCK(reg)
-#define        METER_SIMPLE_LOCK_UNLOCK(reg)
 #endif /* MACH_LDEBUG */
 
+#define PREEMPTION_DISABLE                             \
+       incl    %gs:CPU_PREEMPTION_LEVEL
+
+#define        PREEMPTION_LEVEL_DEBUG 1        
+#if    PREEMPTION_LEVEL_DEBUG
+#define        PREEMPTION_ENABLE                               \
+       decl    %gs:CPU_PREEMPTION_LEVEL        ;       \
+       js      17f                             ;       \
+       jnz     19f                             ;       \
+       testl   $AST_URGENT,%gs:CPU_PENDING_AST ;       \
+       jz      19f                             ;       \
+       PUSHF                                   ;       \
+       testl   $EFL_IF, S_PC                   ;       \
+       jz      18f                             ;       \
+       POPF                                    ;       \
+       int     $(T_PREEMPT)                    ;       \
+       jmp     19f                             ;       \
+17:                                                    \
+       call    _preemption_underflow_panic     ;       \
+18:                                                    \
+       POPF                                    ;       \
+19:
+#else
+#define        PREEMPTION_ENABLE                               \
+       decl    %gs:CPU_PREEMPTION_LEVEL        ;       \
+       jnz     19f                             ;       \
+       testl   $AST_URGENT,%gs:CPU_PENDING_AST ;       \
+       jz      19f                             ;       \
+       PUSHF                                   ;       \
+       testl   $EFL_IF, S_PC                   ;       \
+       jz      18f                             ;       \
+       POPF                                    ;       \
+       int     $(T_PREEMPT)                    ;       \
+       jmp     19f                             ;       \
+18:                                                    \
+       POPF                                    ;       \
+19:
+#endif
+
+
+#if    CONFIG_DTRACE
+
+       .globl  _lockstat_probe
+       .globl  _lockstat_probemap
+
+/*
+ * LOCKSTAT_LABEL creates a dtrace symbol which contains
+ * a pointer into the lock code function body. At that
+ * point is a "ret" instruction that can be patched into
+ * a "nop"
+ */
+
+#define        LOCKSTAT_LABEL(lab) \
+       .data                                       ;\
+       .globl  lab                                 ;\
+       lab:                                        ;\
+       .quad 9f                                    ;\
+       .text                                       ;\
+       9:
+
+#define LOCKSTAT_RECORD(id, lck) \
+       push    %rbp                                ;       \
+       mov     %rsp,%rbp                           ;       \
+       movl    _lockstat_probemap + (id * 4)(%rip),%eax ;  \
+       test    %eax,%eax                           ;       \
+       je              9f                          ;       \
+       mov             lck, %rsi                   ;       \
+       mov             %rax, %rdi                  ;       \
+       mov             $0, %rdx                    ;       \
+       mov             $0, %rcx                    ;       \
+       mov             $0, %r8                     ;       \
+       mov             $0, %r9                     ;       \
+       call    *_lockstat_probe(%rip)              ;       \
+9:     leave
+       /* ret - left to subsequent code, e.g. return values */
+
+#endif /* CONFIG_DTRACE */
+
+/*
+ * For most routines, the hw_lock_t pointer is loaded into a
+ * register initially, and then either a byte or register-sized
+ * word is loaded/stored to the pointer
+ */
 
 /*
- *     void hw_lock_init(hw_lock_t)
+ *     void hw_lock_byte_init(volatile uint8_t *)
  *
- *     Initialize a hardware lock.
+ *     Initialize a hardware byte lock.
  */
-ENTRY(hw_lock_init)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       xorl    %eax,%eax
-       movb    %al,0(%edx)             /* clear the lock */
-       EMARF
-       ret
+LEAF_ENTRY(hw_lock_byte_init)
+       movb    $0, (%rdi)              /* clear the lock */
+       LEAF_RET
 
 /*
- *     void hw_lock_lock(hw_lock_t)
- *     unsigned int hw_lock_to(hw_lock_t, unsigned int)
+ *     void    hw_lock_byte_lock(uint8_t *lock_byte)
  *
- *     Acquire lock, spinning until it becomes available.
- *     XXX:  For now, we don't actually implement the timeout.
+ *     Acquire byte sized lock operand, spinning until it becomes available.
  *     MACH_RT:  also return with preemption disabled.
  */
-ENTRY2(hw_lock_lock,hw_lock_to)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
 
-1:     DISABLE_PREEMPTION(%eax)
-       movb    $1,%cl
-       xchgb   0(%edx),%cl             /* try to acquire the HW lock */
-       testb   %cl,%cl                 /* success? */
+LEAF_ENTRY(hw_lock_byte_lock)
+       PREEMPTION_DISABLE
+       movl    $1, %ecx                /* Set lock value */
+1:
+       movb    (%rdi), %al             /* Load byte at address */
+       testb   %al,%al                 /* lock locked? */
+       jne     3f                      /* branch if so */
+       lock; cmpxchg %cl,(%rdi)        /* attempt atomic compare exchange */
        jne     3f
-       movl    $1,%eax                 /* In case this was a timeout call */
-       EMARF                           /* if yes, then nothing left to do */
-       ret
-
-3:     ENABLE_PREEMPTION(%eax)         /* no reason we can't be preemptable now */
-
-       movb    $1,%cl
-2:     testb   %cl,0(%edx)             /* spin checking lock value in cache */
-       jne     2b                      /* non-zero means locked, keep spinning */
-       jmp     1b                      /* zero means unlocked, try to grab it */
+       LEAF_RET                        /* if yes, then nothing left to do */
+3:
+       PAUSE                           /* pause for hyper-threading */
+       jmp     1b                      /* try again */
 
 /*
- *     void hw_lock_unlock(hw_lock_t)
+ *     void hw_lock_byte_unlock(uint8_t *lock_byte)
  *
- *     Unconditionally release lock.
+ *     Unconditionally release byte sized lock operand.
  *     MACH_RT:  release preemption level.
  */
-ENTRY(hw_lock_unlock)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       xorl    %eax,%eax
-       xchgb   0(%edx),%al             /* clear the lock... a mov instruction */
-                                       /* ...might be cheaper and less paranoid */
-       ENABLE_PREEMPTION(%eax)
-       EMARF
-       ret
+
+LEAF_ENTRY(hw_lock_byte_unlock)
+       movb $0, (%rdi)         /* Clear the lock byte */
+       PREEMPTION_ENABLE
+       LEAF_RET
 
 /*
- *     unsigned int hw_lock_try(hw_lock_t)
- *     MACH_RT:  returns with preemption disabled on success.
+ * Reader-writer lock fastpaths. These currently exist for the
+ * shared lock acquire, the exclusive lock acquire, the shared to
+ * exclusive upgrade and the release paths (where they reduce overhead
+ * considerably) -- these are by far the most frequently used routines
+ *
+ * The following should reflect the layout of the bitfield embedded within
+ * the lck_rw_t structure (see i386/locks.h).
  */
-ENTRY(hw_lock_try)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-
-       DISABLE_PREEMPTION(%eax)
-       movb    $1,%cl
-       xchgb   0(%edx),%cl             /* try to acquire the HW lock */
-       testb   %cl,%cl                 /* success? */
-       jne     1f                      /* if yes, let the caller know */
+#define LCK_RW_INTERLOCK       (0x1 << 16)
 
-       movl    $1,%eax                 /* success */
-       EMARF
-       ret
+#define LCK_RW_PRIV_EXCL       (0x1 << 24)
+#define LCK_RW_WANT_UPGRADE    (0x2 << 24)
+#define LCK_RW_WANT_WRITE      (0x4 << 24)
+#define LCK_R_WAITING          (0x8 << 24)
+#define LCK_W_WAITING          (0x10 << 24)
 
-1:     ENABLE_PREEMPTION(%eax)         /* failure:  release preemption... */
-       xorl    %eax,%eax               /* ...and return failure */
-       EMARF
-       ret     
+#define LCK_RW_SHARED_MASK     (0xffff)
 
 /*
- *     unsigned int hw_lock_held(hw_lock_t)
- *     MACH_RT:  doesn't change preemption state.
- *     N.B.  Racy, of course.
+ * For most routines, the lck_rw_t pointer is loaded into a
+ * register initially, and the flags bitfield loaded into another
+ * register and examined
  */
-ENTRY(hw_lock_held)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
+#define        RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
+/*
+ *     void lck_rw_lock_shared(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_lock_shared)
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count before atomic CAS */
+1:
+       mov     (%rdi), %eax            /* Load state bitfield and interlock */
+       testl   $(RW_LOCK_SHARED_MASK), %eax    /* Eligible for fastpath? */
+       jne     3f
 
-       movb    $1,%cl
-       testb   %cl,0(%edx)             /* check lock value */
-       jne     1f                      /* non-zero means locked */
-       xorl    %eax,%eax               /* tell caller:  lock wasn't locked */
-       EMARF
-       ret     
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       incl    %ecx                            /* Increment reader refcount */
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     2f
+
+#if    CONFIG_DTRACE
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_ACQUIRE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point)
+       ret
+       /*
+       Fall thru when patched, counting on lock pointer in %rdi
+       */
+       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, %rdi)
+#endif
+       ret
+2:
+       PAUSE
+       jmp     1b
+3:
+       jmp     EXT(lck_rw_lock_shared_gen)
 
-1:     movl    $1,%eax                 /* tell caller:  lock was locked */
-       EMARF
+
+       
+#define        RW_TRY_LOCK_SHARED_MASK (LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
+/*
+ *     void lck_rw_try_lock_shared(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_try_lock_shared)
+1:
+       mov     (%rdi), %eax            /* Load state bitfield and interlock */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     2f
+       testl   $(RW_TRY_LOCK_SHARED_MASK), %eax
+       jne     3f                      /* lock is busy */
+
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       incl    %ecx                            /* Increment reader refcount */
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     2f
+
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count on success. */
+       /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
+
+#if    CONFIG_DTRACE
+       movl    $1, %eax
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point)
+       ret
+       /* Fall thru when patched, counting on lock pointer in %rdi  */
+       LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, %rdi)
+#endif
+       movl    $1, %eax                        /* return TRUE */
+       ret
+2:
+       PAUSE
+       jmp     1b
+3:
+       xorl    %eax, %eax
        ret
+
        
+#define        RW_LOCK_EXCLUSIVE_HELD  (LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)
+/*
+ *     int lck_rw_grab_shared(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_grab_shared)
+1:
+       mov     (%rdi), %eax            /* Load state bitfield and interlock */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     5f
+       testl   $(RW_LOCK_EXCLUSIVE_HELD), %eax 
+       jne     3f
+2:     
+       movl    %eax, %ecx              /* original value in %eax for cmpxchgl */
+       incl    %ecx                    /* Increment reader refcount */
+       lock
+       cmpxchgl %ecx, (%rdi)           /* Attempt atomic exchange */
+       jne     4f
 
+       movl    $1, %eax                /* return success */
+       ret
+3:
+       testl   $(LCK_RW_SHARED_MASK), %eax
+       je      4f
+       testl   $(LCK_RW_PRIV_EXCL), %eax
+       je      2b
+4:
+       xorl    %eax, %eax              /* return failure */
+       ret
+5:
+       PAUSE
+       jmp     1b
 
-#if    0
 
+       
+#define        RW_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | \
+                               LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
+/*
+ *     void lck_rw_lock_exclusive(lck_rw_t*)
+ *
+ */
+Entry(lck_rw_lock_exclusive)
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count before atomic CAS */
+1:
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and shared count */
+       testl   $(RW_LOCK_EXCLUSIVE_MASK), %eax         /* Eligible for fastpath? */
+       jne     3f                                      /* no, go slow */
 
-ENTRY(_usimple_lock_init)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       xorl    %eax,%eax
-       movb    %al,USL_INTERLOCK(%edx) /* unlock the HW lock */
-       EMARF
+       movl    %eax, %ecx                              /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_WRITE), %ecx
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     2f
+
+#if    CONFIG_DTRACE
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_LOCK_EXCL_ACQUIRE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point)
        ret
+       /* Fall thru when patched, counting on lock pointer in %rdi  */
+       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, %rdi)
+#endif
+       ret
+2:
+       PAUSE
+       jmp     1b
+3:
+       jmp     EXT(lck_rw_lock_exclusive_gen)
 
-ENTRY(_simple_lock)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-
-       CHECK_SIMPLE_LOCK_TYPE()
 
-       DISABLE_PREEMPTION(%eax)
+       
+#define        RW_TRY_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
+/*
+ *     void lck_rw_try_lock_exclusive(lck_rw_t *)
+ *
+ *             Tries to get a write lock.
+ *
+ *             Returns FALSE if the lock is not held on return.
+ */
+Entry(lck_rw_try_lock_exclusive)
+1:
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and shared count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     2f
+       testl   $(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax
+       jne     3f                              /* can't get it */
+
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_WRITE), %ecx
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     2f
+
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count on success. */
+       /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
+
+#if    CONFIG_DTRACE
+       movl    $1, %eax
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point)
+       ret
+       /* Fall thru when patched, counting on lock pointer in %rdi  */
+       LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, %rdi)
+#endif
+       movl    $1, %eax                        /* return TRUE */
+       ret
+2:
+       PAUSE
+       jmp     1b
+3:
+       xorl    %eax, %eax                      /* return FALSE */
+       ret     
 
-sl_get_hw:
-       movb    $1,%cl
-       xchgb   USL_INTERLOCK(%edx),%cl /* try to acquire the HW lock */
-       testb   %cl,%cl                 /* did we succeed? */
 
-#if    MACH_LDEBUG
-       je      5f
-       CHECK_MYLOCK(S_THREAD)
-       jmp     sl_get_hw
-5:
-#else  /* MACH_LDEBUG */
-       jne     sl_get_hw               /* no, try again */
-#endif /* MACH_LDEBUG */
 
-#if    MACH_LDEBUG
-       movl    L_PC,%ecx
-       movl    %ecx,S_PC
-       movl    $ CPD_ACTIVE_THREAD,%eax
-       movl    %gs:(%eax),%ecx
-       movl    %ecx,S_THREAD
-       incl    CX(EXT(simple_lock_count),%eax)
-#if 0
-       METER_SIMPLE_LOCK_LOCK(%edx)
+/*
+ *     void lck_rw_lock_shared_to_exclusive(lck_rw_t*)
+ *
+ *     fastpath can be taken if
+ *     the current rw_shared_count == 1
+ *     AND the interlock is clear
+ *     AND RW_WANT_UPGRADE is not set
+ *
+ *     note that RW_WANT_WRITE could be set, but will not
+ *     be indicative of an exclusive hold since we have
+ *     a read count on the lock that we have not yet released
+ *     we can blow by that state since the lck_rw_lock_exclusive
+ *     function will block until rw_shared_count == 0 and 
+ *     RW_WANT_UPGRADE is clear... it does this check behind
+ *     the interlock which we are also checking for
+ *
+ *     to make the transition we must be able to atomically
+ *     set RW_WANT_UPGRADE and get rid of the read count we hold
+ */
+Entry(lck_rw_lock_shared_to_exclusive)
+1:
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and shared count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     7f
+       testl   $(LCK_RW_WANT_UPGRADE), %eax
+       jne     2f
+
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_UPGRADE), %ecx    /* ask for WANT_UPGRADE */
+       decl    %ecx                            /* and shed our read count */
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     7f
+                                               /* we now own the WANT_UPGRADE */
+       testl   $(LCK_RW_SHARED_MASK), %ecx     /* check to see if all of the readers are drained */
+       jne     8f                              /* if not, we need to go wait */
+
+#if    CONFIG_DTRACE
+       movl    $1, %eax
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point)
+       ret
+    /* Fall thru when patched, counting on lock pointer in %rdi  */
+    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, %rdi)
 #endif
-#if    NCPUS == 1
-       pushf
-       pushl   %edx
-       cli
-       call    EXT(lock_stack_push)
-       popl    %edx
-       popfl
-#endif /* NCPUS == 1 */
-#endif /* MACH_LDEBUG */
-
-       EMARF
+       movl    $1, %eax                        /* return success */
        ret
+       
+2:                                             /* someone else already holds WANT_UPGRADE */
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       decl    %ecx                            /* shed our read count */
+       testl   $(LCK_RW_SHARED_MASK), %ecx
+       jne     3f                              /* we were the last reader */
+       andl    $(~LCK_W_WAITING), %ecx         /* so clear the wait indicator */
+3:     
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     7f
 
-ENTRY(_simple_lock_try)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
+       mov     %eax, %esi                      /* put old flags as second arg */
+                                               /* lock is alread in %rdi */
+       call    EXT(lck_rw_lock_shared_to_exclusive_failure)
+       ret                                     /* and pass the failure return along */ 
+7:
+       PAUSE
+       jmp     1b
+8:
+       jmp     EXT(lck_rw_lock_shared_to_exclusive_success)
 
-       CHECK_SIMPLE_LOCK_TYPE()
 
-       DISABLE_PREEMPTION(%eax)
+       
+       .cstring
+rwl_release_error_str:
+       .asciz  "Releasing non-exclusive RW lock without a reader refcount!"
+       .text
+       
+/*
+ *     lck_rw_type_t lck_rw_done(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_done)
+1:
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and reader count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     7f                              /* wait for interlock to clear */
+
+       movl    %eax, %ecx                      /* keep original value in %eax for cmpxchgl */
+       testl   $(LCK_RW_SHARED_MASK), %ecx     /* if reader count == 0, must be exclusive lock */
+       je      2f
+       decl    %ecx                            /* Decrement reader count */
+       testl   $(LCK_RW_SHARED_MASK), %ecx     /* if reader count has now gone to 0, check for waiters */
+       je      4f
+       jmp     6f
+2:     
+       testl   $(LCK_RW_WANT_UPGRADE), %ecx
+       je      3f
+       andl    $(~LCK_RW_WANT_UPGRADE), %ecx
+       jmp     4f
+3:     
+       testl   $(LCK_RW_WANT_WRITE), %ecx
+       je      8f                              /* lock is not 'owned', go panic */
+       andl    $(~LCK_RW_WANT_WRITE), %ecx
+4:     
+       /*
+        * test the original values to match what
+        * lck_rw_done_gen is going to do to determine
+        * which wakeups need to happen...
+        *
+        * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
+        */
+       testl   $(LCK_W_WAITING), %eax
+       je      5f
+       andl    $(~LCK_W_WAITING), %ecx
 
-       movb    $1,%cl
-       xchgb   USL_INTERLOCK(%edx),%cl /* try to acquire the HW lock */
-       testb   %cl,%cl                 /* did we succeed? */
-       jne     1f                      /* no, return failure */
+       testl   $(LCK_RW_PRIV_EXCL), %eax
+       jne     6f
+5:     
+       andl    $(~LCK_R_WAITING), %ecx
+6:     
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     7f
 
-#if    MACH_LDEBUG
-       movl    L_PC,%ecx
-       movl    %ecx,S_PC
-       movl    $ CPD_ACTIVE_THREAD,%eax
-       movl    %gs:(%eax),%ecx
-       movl    %ecx,S_THREAD
-       incl    CX(EXT(simple_lock_count),%eax)
-#if 0
-       METER_SIMPLE_LOCK_LOCK(%edx)
-#endif
-#if    NCPUS == 1
-       pushf
-       pushl   %edx
-       cli
-       call    EXT(lock_stack_push)
-       popl    %edx
-       popfl
-#endif /* NCPUS == 1 */
-#endif /* MACH_LDEBUG */
+       mov     %eax,%esi       /* old flags in %rsi */
+                               /* lock is in %rdi already */
+       call    EXT(lck_rw_done_gen)    
+       ret
+7:
+       PAUSE
+       jmp     1b
+8:
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(rwl_release_error_str)
+       CALL_PANIC()
+       
 
-       movl    $1,%eax                 /* return success */
+       
+/*
+ *     lck_rw_type_t lck_rw_lock_exclusive_to_shared(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_lock_exclusive_to_shared)
+1:
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and reader count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     6f                              /* wait for interlock to clear */
+
+       movl    %eax, %ecx                      /* keep original value in %eax for cmpxchgl */
+       incl    %ecx                            /* Increment reader count */
+
+       testl   $(LCK_RW_WANT_UPGRADE), %ecx
+       je      2f
+       andl    $(~LCK_RW_WANT_UPGRADE), %ecx
+       jmp     3f
+2:     
+       andl    $(~LCK_RW_WANT_WRITE), %ecx
+3:     
+       /*
+        * test the original values to match what
+        * lck_rw_lock_exclusive_to_shared_gen is going to do to determine
+        * which wakeups need to happen...
+        *
+        * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
+        */
+       testl   $(LCK_W_WAITING), %eax
+       je      4f
+       testl   $(LCK_RW_PRIV_EXCL), %eax
+       jne     5f
+4:     
+       andl    $(~LCK_R_WAITING), %ecx
+5:     
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     6f
 
-       EMARF
+       mov     %eax,%esi
+       call    EXT(lck_rw_lock_exclusive_to_shared_gen)
        ret
+6:
+       PAUSE
+       jmp     1b
 
-1:
-       ENABLE_PREEMPTION(%eax)
 
-       xorl    %eax,%eax               /* and return failure */
 
-       EMARF
+/*
+ *     int lck_rw_grab_want(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_grab_want)
+1:
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and reader count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     3f                              /* wait for interlock to clear */
+       testl   $(LCK_RW_WANT_WRITE), %eax      /* want_write has been grabbed by someone else */
+       jne     2f                              /* go return failure */
+       
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_WRITE), %ecx
+       lock
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
+       jne     2f
+                                               /* we now own want_write */
+       movl    $1, %eax                        /* return success */
        ret
+2:
+       xorl    %eax, %eax                      /* return failure */
+       ret
+3:
+       PAUSE
+       jmp     1b
 
-ENTRY(_simple_unlock)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
+       
+#define        RW_LOCK_SHARED_OR_UPGRADE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE)
+/*
+ *     int lck_rw_held_read_or_upgrade(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_held_read_or_upgrade)
+       mov     (%rdi), %eax
+       andl    $(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax
+       ret
 
-       CHECK_SIMPLE_LOCK_TYPE()
-       CHECK_THREAD(S_THREAD)
 
-#if    MACH_LDEBUG
-       xorl    %eax,%eax
-       movl    %eax,S_THREAD           /* disown thread */
-       MP_DISABLE_PREEMPTION(%eax)
-       CPU_NUMBER(%eax)
-       decl    CX(EXT(simple_lock_count),%eax)
-       MP_ENABLE_PREEMPTION(%eax)
-#if 0
-       METER_SIMPLE_LOCK_UNLOCK(%edx)
-#endif
-#if    NCPUS == 1
-       pushf
-       pushl   %edx
-       cli
-       call    EXT(lock_stack_pop)
-       popl    %edx
-       popfl
-#endif /* NCPUS == 1 */
-#endif /* MACH_LDEBUG */
+       
+/*
+ * N.B.: On x86, statistics are currently recorded for all indirect mutexes.
+ * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained
+ * as a 64-bit quantity (this matches the existing PowerPC implementation,
+ * and the new x86 specific statistics are also maintained as 32-bit
+ * quantities).
+ *
+ *
+ * Enable this preprocessor define to record the first miss alone
+ * By default, we count every miss, hence multiple misses may be
+ * recorded for a single lock acquire attempt via lck_mtx_lock
+ */
+#undef LOG_FIRST_MISS_ALONE    
 
-       xorb    %cl,%cl
-       xchgb   USL_INTERLOCK(%edx),%cl /* unlock the HW lock */
+/*
+ * This preprocessor define controls whether the R-M-W update of the
+ * per-group statistics elements are atomic (LOCK-prefixed)
+ * Enabled by default.
+ */
+#define ATOMIC_STAT_UPDATES 1
 
-       ENABLE_PREEMPTION(%eax)
+#if defined(ATOMIC_STAT_UPDATES)
+#define LOCK_IF_ATOMIC_STAT_UPDATES lock
+#else
+#define LOCK_IF_ATOMIC_STAT_UPDATES
+#endif /* ATOMIC_STAT_UPDATES */
 
-       EMARF
-       ret
 
-#endif /* 0 */
+/*
+ * For most routines, the lck_mtx_t pointer is loaded into a
+ * register initially, and the owner field checked for indirection.
+ * Eventually the lock owner is loaded into a register and examined.
+ */
 
+#define M_OWNER                MUTEX_OWNER
+#define M_PTR          MUTEX_PTR
+#define M_STATE                MUTEX_STATE     
+       
 
-ENTRY(mutex_init)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       xorl    %eax,%eax
-       movb    %al,M_ILK               /* clear interlock */
-       movb    %al,M_LOCKED            /* clear locked flag */
-       movw    %ax,M_WAITERS           /* init waiter count */
-       movw    %ax,M_PROMOTED_PRI
+#define LMTX_ENTER_EXTENDED                                    \
+       mov     M_PTR(%rdx), %rdx                       ;       \
+       xor     %r11, %r11                              ;       \
+       mov     MUTEX_GRP(%rdx), %r10                   ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incq    GRP_MTX_STAT_UTIL(%r10)
+
+
+#if    LOG_FIRST_MISS_ALONE
+#define LMTX_UPDATE_MISS                                       \
+       test    $1, %r11                                ;       \
+       jnz     11f                                     ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_MISS(%r10)                 ;       \
+       or      $1, %r11                                ;       \
+11:
+#else
+#define LMTX_UPDATE_MISS                                       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_MISS(%r10)
+#endif
+       
 
-#if    MACH_LDEBUG
-       movl    $ MUTEX_TAG,M_TYPE      /* set lock type */
-       movl    %eax,M_PC               /* init caller pc */
-       movl    %eax,M_THREAD           /* and owning thread */
+#if    LOG_FIRST_MISS_ALONE
+#define LMTX_UPDATE_WAIT                                       \
+       test    $2, %r11                                ;       \
+       jnz     11f                                     ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_WAIT(%r10)                 ;       \
+       or      $2, %r11                                ;       \
+11:
+#else
+#define LMTX_UPDATE_WAIT                                       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_WAIT(%r10)
 #endif
-#if    ETAP_LOCK_TRACE
-       movl    L_ARG1,%ecx             /* fetch event type */
-       pushl   %ecx                    /* push event type */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(etap_mutex_init)    /* init ETAP data */
-       addl    $8,%esp
-#endif /* ETAP_LOCK_TRACE */
 
-       EMARF
-       ret
 
-ENTRY2(mutex_lock,_mutex_lock)
-       FRAME
+/*
+ * Record the "direct wait" statistic, which indicates if a
+ * miss proceeded to block directly without spinning--occurs
+ * if the owner of the mutex isn't running on another processor
+ * at the time of the check.
+ */
+#define LMTX_UPDATE_DIRECT_WAIT                                        \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_DIRECT_WAIT(%r10)
 
-#if    ETAP_LOCK_TRACE
-       subl    $12,%esp                /* make room for locals */
-       movl    $0,SWT_HI               /* set wait time to zero (HI) */
-       movl    $0,SWT_LO               /* set wait time to zero (LO) */
-       movl    $0,MISSED               /* clear local miss marker */
-#endif /* ETAP_LOCK_TRACE */
+       
+#define LMTX_CALLEXT1(func_name)               \
+       cmp     %rdx, %rdi              ;       \
+       je      12f                     ;       \
+       push    %r10                    ;       \
+       push    %r11                    ;       \
+12:    push    %rdi                    ;       \
+       push    %rdx                    ;       \
+       mov     %rdx, %rdi              ;       \
+       call    EXT(func_name)          ;       \
+       pop     %rdx                    ;       \
+       pop     %rdi                    ;       \
+       cmp     %rdx, %rdi              ;       \
+       je      12f                     ;       \
+       pop     %r11                    ;       \
+       pop     %r10                    ;       \
+12:
+       
+#define LMTX_CALLEXT2(func_name, reg)          \
+       cmp     %rdx, %rdi              ;       \
+       je      12f                     ;       \
+       push    %r10                    ;       \
+       push    %r11                    ;       \
+12:    push    %rdi                    ;       \
+       push    %rdx                    ;       \
+       mov     reg, %rsi               ;       \
+       mov     %rdx, %rdi              ;       \
+       call    EXT(func_name)          ;       \
+       pop     %rdx                    ;       \
+       pop     %rdi                    ;       \
+       cmp     %rdx, %rdi              ;       \
+       je      12f                     ;       \
+       pop     %r11                    ;       \
+       pop     %r10                    ;       \
+12:
+
+
+#define M_WAITERS_MSK          0x0000ffff
+#define M_PRIORITY_MSK         0x00ff0000
+#define M_ILOCKED_MSK          0x01000000
+#define M_MLOCKED_MSK          0x02000000
+#define M_PROMOTED_MSK         0x04000000
+#define M_SPIN_MSK             0x08000000
 
-       movl    L_ARG0,%edx             /* fetch lock pointer */
+/*
+ *     void lck_mtx_assert(lck_mtx_t* l, unsigned int)
+ *     Takes the address of a lock, and an assertion type as parameters.
+ *     The assertion can take one of two forms determine by the type
+ *     parameter: either the lock is held by the current thread, and the
+ *     type is LCK_MTX_ASSERT_OWNED, or it isn't and the type is
+ *     LCK_MTX_ASSERT_NOTOWNED. Calls panic on assertion failure.
+ *     
+ */
 
-       CHECK_MUTEX_TYPE()
-       CHECK_NO_SIMPLELOCKS()
-       CHECK_PREEMPTION_LEVEL()
+NONLEAF_ENTRY(lck_mtx_assert)
+        mov    %rdi, %rdx                      /* Load lock address */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax     /* Load current thread */
+
+       mov     M_STATE(%rdx), %ecx
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     0f
+       mov     M_PTR(%rdx), %rdx               /* If so, take indirection */
+0:     
+       mov     M_OWNER(%rdx), %rcx             /* Load owner */
+       cmp     $(MUTEX_ASSERT_OWNED), %rsi
+       jne     2f                              /* Assert ownership? */
+       cmp     %rax, %rcx                      /* Current thread match? */
+       jne     3f                              /* no, go panic */
+       testl   $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(%rdx)
+       je      3f
+1:                                             /* yes, we own it */
+       NONLEAF_RET
+2:
+       cmp     %rax, %rcx                      /* Current thread match? */
+       jne     1b                              /* No, return */
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(%rdx)
+       LOAD_STRING_ARG0(mutex_assert_owned_str)
+       jmp     4f
+3:
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(%rdx)
+       LOAD_STRING_ARG0(mutex_assert_not_owned_str)
+4:
+       CALL_PANIC()
+
+
+lck_mtx_destroyed:
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(%rdx)
+       LOAD_STRING_ARG0(mutex_interlock_destroyed_str)
+       CALL_PANIC()
+       
 
-ml_retry:
-       DISABLE_PREEMPTION(%eax)
+.data
+mutex_assert_not_owned_str:
+       .asciz  "mutex (%p) not owned\n"
+mutex_assert_owned_str:
+       .asciz  "mutex (%p) owned\n"
+mutex_interlock_destroyed_str:
+       .asciz  "trying to interlock destroyed mutex (%p)"
+.text
 
-ml_get_hw:
-       movb    $1,%cl
-       xchgb   %cl,M_ILK
-       testb   %cl,%cl                 /* did we succeed? */
-       jne     ml_get_hw               /* no, try again */
 
-       movb    $1,%cl
-       xchgb   %cl,M_LOCKED            /* try to set locked flag */
-       testb   %cl,%cl                 /* is the mutex locked? */
-       jne     ml_fail                 /* yes, we lose */
 
-       pushl   %edx
-       call    EXT(mutex_lock_acquire)
-       addl    $4,%esp
-       movl    L_ARG0,%edx
+/*
+ * lck_mtx_lock()
+ * lck_mtx_try_lock()
+ * lck_mtx_unlock()
+ * lck_mtx_lock_spin()
+ * lck_mtx_lock_spin_always()
+ * lck_mtx_try_lock_spin()
+ * lck_mtx_try_lock_spin_always()
+ * lck_mtx_convert_spin()
+ */
+NONLEAF_ENTRY(lck_mtx_lock_spin_always)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+       jmp     Llmls_avoid_check
+       
+NONLEAF_ENTRY(lck_mtx_lock_spin)
+       mov     %rdi, %rdx              /* fetch lock pointer */
 
+       CHECK_PREEMPTION_LEVEL()
+Llmls_avoid_check:
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llmls_slow
+Llmls_try:                             /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_SPIN_MSK), %ecx
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmls_busy_disabled
+
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of interlock */
 #if    MACH_LDEBUG
-       movl    L_PC,%ecx
-       movl    %ecx,M_PC
-       movl    $ CPD_ACTIVE_THREAD,%eax
-       movl    %gs:(%eax),%ecx
-       movl    %ecx,M_THREAD
-       testl   %ecx,%ecx
-       je      3f
-       incl    TH_MUTEX_COUNT(%ecx)
-3:
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:     
+#endif /* MACH_LDEBUG */
+
+       /* return with the interlock held and preemption disabled */
+       leave
+#if    CONFIG_DTRACE
+       LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, %rdx)
 #endif
+       ret
 
-       xorb    %cl,%cl
-       xchgb   %cl,M_ILK
-
-       ENABLE_PREEMPTION(%eax)
-
-#if    ETAP_LOCK_TRACE
-       movl    L_PC,%eax               /* fetch pc */
-       pushl   SWT_LO                  /* push wait time (low) */
-       pushl   SWT_HI                  /* push wait time (high) */
-       pushl   %eax                    /* push pc */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(etap_mutex_hold)    /* collect hold timestamp */
-       addl    $16+12,%esp             /* clean up stack, adjusting for locals */
-#endif /* ETAP_LOCK_TRACE */
-
-       EMARF
-       ret
-
-ml_fail:
-#if    ETAP_LOCK_TRACE
-       cmp     $0,MISSED               /* did we already take a wait timestamp? */
-       jne     ml_block                /* yup. carry-on */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(etap_mutex_miss)    /* get wait timestamp */
-       movl    %eax,SWT_HI             /* set wait time (high word) */
-       movl    %edx,SWT_LO             /* set wait time (low word) */
-       popl    %edx                    /* clean up stack */
-       movl    $1,MISSED               /* mark wait timestamp as taken */
-#endif /* ETAP_LOCK_TRACE */
-
-ml_block:
-       CHECK_MYLOCK(M_THREAD)
-       xorl    %eax,%eax
-       pushl   %eax                    /* no promotion here yet */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(mutex_lock_wait)    /* wait for the lock */
-       addl    $8,%esp
-       movl    L_ARG0,%edx             /* refetch lock pointer */
-       jmp     ml_retry                /* and try again */
-
-ENTRY2(mutex_try,_mutex_try)   
-       FRAME
+Llmls_slow:    
+       test    $M_ILOCKED_MSK, %ecx            /* is the interlock held */
+       jz      Llml_contended                  /* no, must have been the mutex */
 
-#if    ETAP_LOCK_TRACE
-       subl    $8,%esp                 /* make room for locals */
-       movl    $0,SWT_HI               /* set wait time to zero (HI) */
-       movl    $0,SWT_LO               /* set wait time to zero (LO) */
-#endif /* ETAP_LOCK_TRACE */
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex */
+       jne     Llmls_loop                      /* no... must be interlocked */
 
-       movl    L_ARG0,%edx             /* fetch lock pointer */
+       LMTX_ENTER_EXTENDED
 
-       CHECK_MUTEX_TYPE()
-       CHECK_NO_SIMPLELOCKS()
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_SPIN_MSK), %ecx
+       jz      Llmls_loop1
 
-       DISABLE_PREEMPTION(%eax)
+       LMTX_UPDATE_MISS                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llmls_loop:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+Llmls_loop1:
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+       jz      Llmls_try
+       test    $(M_MLOCKED_MSK), %ecx
+       jnz     Llml_contended                  /* mutex owned by someone else, go contend for it */
+       jmp     Llmls_loop
 
-mt_get_hw:
-       movb    $1,%cl
-       xchgb   %cl,M_ILK
-       testb   %cl,%cl
-       jne             mt_get_hw
+Llmls_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmls_loop
 
-       movb    $1,%cl
-       xchgb   %cl,M_LOCKED
-       testb   %cl,%cl
-       jne             mt_fail
 
-       pushl   %edx
-       call    EXT(mutex_lock_acquire)
-       addl    $4,%esp
-       movl    L_ARG0,%edx
+       
+NONLEAF_ENTRY(lck_mtx_lock)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+
+       CHECK_PREEMPTION_LEVEL()
+
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llml_slow
+Llml_try:                              /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llml_busy_disabled
 
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
 #if    MACH_LDEBUG
-       movl    L_PC,%ecx
-       movl    %ecx,M_PC
-       movl    $ CPD_ACTIVE_THREAD,%ecx
-       movl    %gs:(%ecx),%ecx
-       movl    %ecx,M_THREAD
-       testl   %ecx,%ecx
-       je      1f
-       incl    TH_MUTEX_COUNT(%ecx)
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
 1:
-#endif
-
-       xorb    %cl,%cl
-       xchgb   %cl,M_ILK
+#endif /* MACH_LDEBUG */
 
-       ENABLE_PREEMPTION(%eax)
+       testl   $(M_WAITERS_MSK), M_STATE(%rdx)
+       jz      Llml_finish
 
-#if    ETAP_LOCK_TRACE
-       movl    L_PC,%eax               /* fetch pc */
-       pushl   SWT_LO                  /* push wait time (low) */
-       pushl   SWT_HI                  /* push wait time (high) */
-       pushl   %eax                    /* push pc */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(etap_mutex_hold)    /* get start hold timestamp */
-       addl    $16,%esp                /* clean up stack, adjusting for locals */
-#endif /* ETAP_LOCK_TRACE */
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
 
-       movl    $1,%eax
+Llml_finish:
+       andl    $(~M_ILOCKED_MSK), M_STATE(%rdx)
+       PREEMPTION_ENABLE
+       
+       cmp     %rdx, %rdi              /* is this an extended mutex */
+       jne     2f
 
-#if    MACH_LDEBUG || ETAP_LOCK_TRACE
-#if    ETAP_LOCK_TRACE
-       addl    $8,%esp                 /* pop stack claimed on entry */
+       leave
+#if    CONFIG_DTRACE
+       LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, %rdx)
 #endif
+       ret
+2:     
+       leave
+#if    CONFIG_DTRACE
+       LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, %rdx)
 #endif
-
-       EMARF
        ret
 
-mt_fail:
+       
+Llml_slow:
+       test    $M_ILOCKED_MSK, %ecx            /* is the interlock held */
+       jz      Llml_contended                  /* no, must have been the mutex */
+       
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     Llml_loop                       /* no... must be interlocked */
+
+       LMTX_ENTER_EXTENDED
+
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_SPIN_MSK), %ecx
+       jz      Llml_loop1
+
+       LMTX_UPDATE_MISS                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llml_loop:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+Llml_loop1:
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+       jz      Llml_try
+       test    $(M_MLOCKED_MSK), %ecx
+       jnz     Llml_contended                  /* mutex owned by someone else, go contend for it */
+       jmp     Llml_loop
+
+Llml_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llml_loop
+
+       
+Llml_contended:
+       cmp     %rdx, %rdi              /* is this an extended mutex */
+       je      0f
+       LMTX_UPDATE_MISS
+0:     
+       LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86)
+
+       test    %rax, %rax
+       jz      Llml_acquired           /* acquired mutex, interlock held and preemption disabled */
+
+       cmp     $1, %rax                /* check for direct wait status */
+       je      2f
+       cmp     %rdx, %rdi              /* is this an extended mutex */
+       je      2f
+       LMTX_UPDATE_DIRECT_WAIT
+2:     
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK), %ecx
+       jnz     6f
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), %ecx  /* try to take the interlock */
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     5f
+
+       test    $(M_MLOCKED_MSK), %ecx  /* we've got the interlock and */
+       jnz     3f
+       or      $(M_MLOCKED_MSK), %ecx  /* the mutex is free... grab it directly */
+       mov     %ecx, M_STATE(%rdx)
+       
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
 #if    MACH_LDEBUG
-       movl    L_PC,%ecx
-       movl    %ecx,M_PC
-       movl    $ CPD_ACTIVE_THREAD,%ecx
-       movl    %gs:(%ecx),%ecx
-       movl    %ecx,M_THREAD
-       testl   %ecx,%ecx
-       je      1f
-       incl    TH_MUTEX_COUNT(%ecx)
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
 1:
-#endif
+#endif /* MACH_LDEBUG */
 
-       xorb    %cl,%cl
-       xchgb   %cl,M_ILK
+Llml_acquired:
+       testl   $(M_WAITERS_MSK), M_STATE(%rdx)
+       jnz     1f
+       mov     M_OWNER(%rdx), %rax
+       mov     TH_WAS_PROMOTED_ON_WAKEUP(%rax), %eax
+       test    %eax, %eax
+       jz      Llml_finish
+1:     
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+       jmp     Llml_finish
+
+3:                                     /* interlock held, mutex busy */
+       cmp     %rdx, %rdi              /* is this an extended mutex */
+       je      4f
+       LMTX_UPDATE_WAIT
+4:     
+       LMTX_CALLEXT1(lck_mtx_lock_wait_x86)
+       jmp     Llml_contended
+5:     
+       PREEMPTION_ENABLE
+6:
+       PAUSE
+       jmp     2b
+       
 
-       ENABLE_PREEMPTION(%eax)
+NONLEAF_ENTRY(lck_mtx_try_lock_spin_always)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+       jmp     Llmts_avoid_check
 
-#if    ETAP_LOCK_TRACE
-       movl    L_PC,%eax               /* fetch pc */
-       pushl   SWT_LO                  /* push wait time (low) */
-       pushl   SWT_HI                  /* push wait time (high) */
-       pushl   %eax                    /* push pc */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(etap_mutex_hold)    /* get start hold timestamp */
-       addl    $16,%esp                /* clean up stack, adjusting for locals */
-#endif /* ETAP_LOCK_TRACE */
+NONLEAF_ENTRY(lck_mtx_try_lock_spin)
+       mov     %rdi, %rdx              /* fetch lock pointer */
 
-       xorl    %eax,%eax
+Llmts_avoid_check:
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llmts_slow
+Llmts_try:                             /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_SPIN_MSK), %rcx
 
-#if    MACH_LDEBUG || ETAP_LOCK_TRACE
-#if    ETAP_LOCK_TRACE
-       addl    $8,%esp                 /* pop stack claimed on entry */
-#endif
-#endif
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmts_busy_disabled
 
-       EMARF
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
+
+       leave
+
+#if    CONFIG_DTRACE
+       mov     $1, %rax                        /* return success */
+       LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, %rdx)
+#endif
+       mov     $1, %rax                        /* return success */
        ret
 
-ENTRY(mutex_unlock)
-       FRAME
-       movl    L_ARG0,%edx             /* fetch lock pointer */
+Llmts_slow:
+       test    $(M_ILOCKED_MSK), %ecx  /* is the interlock held */
+       jz      Llmts_fail                      /* no, must be held as a mutex */
+
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     Llmts_loop1
+
+       LMTX_ENTER_EXTENDED
+Llmts_loop:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+Llmts_loop1:
+       test    $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx
+       jnz     Llmts_fail
+       test    $(M_ILOCKED_MSK), %ecx
+       jz      Llmts_try
+       jmp     Llmts_loop
+       
+Llmts_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmts_loop
 
-#if    ETAP_LOCK_TRACE
-       pushl   %edx                    /* push mutex address */
-       call    EXT(etap_mutex_unlock)  /* collect ETAP data */
-       popl    %edx                    /* restore mutex address */
-#endif /* ETAP_LOCK_TRACE */
 
-       CHECK_MUTEX_TYPE()
-       CHECK_THREAD(M_THREAD)
+       
+NONLEAF_ENTRY(lck_mtx_try_lock)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llmt_slow       
+Llmt_try:                              /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+       
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmt_busy_disabled
 
-       DISABLE_PREEMPTION(%eax)
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-mu_get_hw:
-       movb    $1,%cl
-       xchgb   %cl,M_ILK
-       testb   %cl,%cl                 /* did we succeed? */
-       jne     mu_get_hw               /* no, try again */
+       test    $(M_WAITERS_MSK), %ecx
+       jz      0f
 
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     mu_wakeup               /* yes, more work to do */
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+0:
+       andl    $(~M_ILOCKED_MSK), M_STATE(%rdx)
+       PREEMPTION_ENABLE
+
+       leave
+#if    CONFIG_DTRACE
+       mov     $1, %rax                        /* return success */
+       /* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */
+       LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in %rdx from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, %rdx)
+#endif 
+       mov     $1, %rax                        /* return success */
+       ret
 
-mu_doit:
-#if    MACH_LDEBUG
-       xorl    %eax,%eax
-       movl    %eax,M_THREAD           /* disown thread */
-       movl    $ CPD_ACTIVE_THREAD,%eax
-       movl    %gs:(%eax),%ecx
-       testl   %ecx,%ecx
+Llmt_slow:
+       test    $(M_ILOCKED_MSK), %ecx  /* is the interlock held */
+       jz      Llmt_fail                       /* no, must be held as a mutex */
+
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     Llmt_loop
+
+       LMTX_ENTER_EXTENDED
+Llmt_loop:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+Llmt_loop1:
+       test    $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx
+       jnz     Llmt_fail
+       test    $(M_ILOCKED_MSK), %ecx
+       jz      Llmt_try
+       jmp     Llmt_loop
+
+Llmt_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmt_loop
+
+
+Llmt_fail:
+Llmts_fail:
+       cmp     %rdx, %rdi                      /* is this an extended mutex */
        je      0f
-       decl    TH_MUTEX_COUNT(%ecx)
+       LMTX_UPDATE_MISS
 0:
-#endif
+       xor     %rax, %rax
+       NONLEAF_RET
 
-       xorb    %cl,%cl
-       xchgb   %cl,M_LOCKED            /* unlock the mutex */
 
-       xorb    %cl,%cl
-       xchgb   %cl,M_ILK
 
-       ENABLE_PREEMPTION(%eax)
+NONLEAF_ENTRY(lck_mtx_convert_spin)
+       mov     %rdi, %rdx                      /* fetch lock pointer */
 
-       EMARF
-       ret
+       mov     M_STATE(%rdx), %ecx
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     0f
+       mov     M_PTR(%rdx), %rdx               /* If so, take indirection */
+       mov     M_STATE(%rdx), %ecx
+0:
+       test    $(M_MLOCKED_MSK), %ecx          /* already owned as a mutex, just return */
+       jnz     2f
+       test    $(M_WAITERS_MSK), %ecx          /* are there any waiters? */
+       jz      1f
+
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+       mov     M_STATE(%rdx), %ecx
+1:     
+       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK)), %ecx  /* convert from spin version to mutex */
+       or      $(M_MLOCKED_MSK), %ecx
+       mov     %ecx, M_STATE(%rdx)             /* since I own the interlock, I don't need an atomic update */
+
+       PREEMPTION_ENABLE
+2:     
+       NONLEAF_RET
 
-mu_wakeup:
-       xorl    %eax,%eax
-       pushl   %eax                    /* no promotion here yet */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(mutex_unlock_wakeup)/* yes, wake a thread */
-       addl    $8,%esp
-       movl    L_ARG0,%edx             /* refetch lock pointer */
-       jmp     mu_doit
+       
 
-ENTRY(interlock_unlock)
-       FRAME
-       movl    L_ARG0,%edx
+NONLEAF_ENTRY(lck_mtx_unlock)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+Llmu_entry:
+       mov     M_STATE(%rdx), %ecx
+Llmu_prim:
+       cmp     $(MUTEX_IND), %ecx      /* Is this an indirect mutex? */
+       je      Llmu_ext
+
+Llmu_chktype:
+       test    $(M_MLOCKED_MSK), %ecx  /* check for full mutex */
+       jz      Llmu_unlock
+Llmu_mutex:
+       test    $(M_ILOCKED_MSK), %rcx  /* have to wait for interlock to clear */
+       jnz     Llmu_busy
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       and     $(~M_MLOCKED_MSK), %ecx /* drop mutex */
+       or      $(M_ILOCKED_MSK), %ecx  /* pick up interlock */
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmu_busy_disabled      /* branch on failure to spin loop */
+
+Llmu_unlock:
+       xor     %rax, %rax
+       mov     %rax, M_OWNER(%rdx)
+       mov     %rcx, %rax              /* keep original state in %ecx for later evaluation */
+       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), %rax
 
-       xorb    %cl,%cl
-       xchgb   %cl,M_ILK
+       test    $(M_WAITERS_MSK), %eax
+       jz      2f
+       dec     %eax                    /* decrement waiter count */
+2:     
+       mov     %eax, M_STATE(%rdx)     /* since I own the interlock, I don't need an atomic update */
 
-       ENABLE_PREEMPTION(%eax)
+#if    MACH_LDEBUG
+       /* perform lock statistics after drop to prevent delay */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       test    %rax, %rax
+       jz      1f
+       decl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-       EMARF
+       test    $(M_PROMOTED_MSK | M_WAITERS_MSK), %ecx
+       jz      3f
+
+       LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, %rcx)
+3:     
+       PREEMPTION_ENABLE
+
+       cmp     %rdx, %rdi
+       jne     4f
+
+       leave
+#if    CONFIG_DTRACE
+       /* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */
+       LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in %rdx from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %rdx)
+#endif
        ret
+4:     
+       leave
+#if    CONFIG_DTRACE
+       /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */
+       LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in %rdx from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, %rdx)
+#endif
+       ret
+
+
+Llmu_busy_disabled:
+       PREEMPTION_ENABLE
+Llmu_busy:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+       jmp     Llmu_mutex
+
+Llmu_ext:
+       mov     M_PTR(%rdx), %rdx
+       mov     M_OWNER(%rdx), %rax
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx
+       CHECK_UNLOCK(%rcx, %rax)
+       mov     M_STATE(%rdx), %ecx
+       jmp     Llmu_chktype
+
 
        
-ENTRY(_disable_preemption)
-#if    MACH_RT
-       _DISABLE_PREEMPTION(%eax)
-#endif /* MACH_RT */
-       ret
+LEAF_ENTRY(lck_mtx_ilk_try_lock)
+       mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
 
-ENTRY(_enable_preemption)
-#if    MACH_RT
-#if    MACH_ASSERT
-       movl    $ CPD_PREEMPTION_LEVEL,%eax
-       cmpl    $0,%gs:(%eax)
-       jg      1f
-       pushl   %gs:(%eax)
-       pushl   $2f
-       call    EXT(panic)
-       hlt
-       .data
-2:     String  "_enable_preemption: preemption_level(%d)  < 0!"
-       .text
-1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION(%eax)
-#endif /* MACH_RT */
-       ret
+       mov     M_STATE(%rdx), %ecx
 
-ENTRY(_enable_preemption_no_check)
-#if    MACH_RT
-#if    MACH_ASSERT
-       movl    $ CPD_PREEMPTION_LEVEL,%eax
-       cmpl    $0,%gs:(%eax)
-       jg      1f
-       pushl   $2f
-       call    EXT(panic)
-       hlt
-       .data
-2:     String  "_enable_preemption_no_check: preemption_level <= 0!"
-       .text
-1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION_NO_CHECK(%eax)
-#endif /* MACH_RT */
-       ret
+       test    $(M_ILOCKED_MSK), %ecx  /* can't have the interlock yet */
+       jnz     3f
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), %ecx
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     2f                      /* return failure after re-enabling preemption */
+
+       mov     $1, %rax                /* return success with preemption disabled */
+       LEAF_RET
+2:     
+       PREEMPTION_ENABLE               /* need to re-enable preemption */
+3:     
+       xor     %rax, %rax              /* return failure */
+       LEAF_RET
        
+
+LEAF_ENTRY(lck_mtx_ilk_unlock)
+       mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
+
+       andl    $(~M_ILOCKED_MSK), M_STATE(%rdx)
+
+       PREEMPTION_ENABLE               /* need to re-enable preemption */
+
+       LEAF_RET
+
        
-ENTRY(_mp_disable_preemption)
-#if    MACH_RT && NCPUS > 1
-       _DISABLE_PREEMPTION(%eax)
-#endif /* MACH_RT && NCPUS > 1*/
-       ret
+LEAF_ENTRY(lck_mtx_lock_grab_mutex)
+       mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
 
-ENTRY(_mp_enable_preemption)
-#if    MACH_RT && NCPUS > 1
-#if    MACH_ASSERT
-       movl    $ CPD_PREEMPTION_LEVEL,%eax
-       cmpl    $0,%gs:(%eax)
-       jg      1f
-       pushl   %gs:(%eax)
-       pushl   $2f
-       call    EXT(panic)
-       hlt
-       .data
-2:     String  "_mp_enable_preemption: preemption_level (%d) <= 0!"
-       .text
-1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION(%eax)
-#endif /* MACH_RT && NCPUS > 1 */
-       ret
-
-ENTRY(_mp_enable_preemption_no_check)
-#if    MACH_RT && NCPUS > 1
-#if    MACH_ASSERT
-       movl    $ CPD_PREEMPTION_LEVEL,%eax
-       cmpl    $0,%gs:(%eax)
-       jg      1f
-       pushl   $2f
-       call    EXT(panic)
-       hlt
-       .data
-2:     String  "_mp_enable_preemption_no_check: preemption_level <= 0!"
-       .text
+       mov     M_STATE(%rdx), %ecx
+
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* can't have the mutex yet */
+       jnz     3f
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     2f                              /* branch on failure to spin loop */
+
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
 1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION_NO_CHECK(%eax)
-#endif /* MACH_RT && NCPUS > 1 */
-       ret
-       
+#endif /* MACH_LDEBUG */
+
+       mov     $1, %rax                /* return success */
+       LEAF_RET
+2:                                             
+       PREEMPTION_ENABLE
+3:
+       xor     %rax, %rax      /* return failure */
+       LEAF_RET
        
-ENTRY(i_bit_set)
-       movl    S_ARG0,%edx
-       movl    S_ARG1,%eax
-       lock
-       bts     %dl,(%eax)
-       ret
 
-ENTRY(i_bit_clear)
-       movl    S_ARG0,%edx
-       movl    S_ARG1,%eax
-       lock
-       btr     %dl,(%eax)
-       ret
 
-ENTRY(bit_lock)
-       movl    S_ARG0,%ecx
-       movl    S_ARG1,%eax
+LEAF_ENTRY(lck_mtx_lock_mark_destroyed)
+       mov     %rdi, %rdx
 1:
+       mov     M_STATE(%rdx), %ecx
+       cmp     $(MUTEX_IND), %ecx      /* Is this an indirect mutex? */
+       jne     2f
+
+       movl    $(MUTEX_DESTROYED), M_STATE(%rdx)       /* convert to destroyed state */
+       jmp     3f
+2:     
+       test    $(M_ILOCKED_MSK), %rcx  /* have to wait for interlock to clear */
+       jnz     5f
+
+       PREEMPTION_DISABLE
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), %ecx
        lock
-       bts     %ecx,(%eax)
-       jb      1b
-       ret
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     4f                      /* branch on failure to spin loop */
+       movl    $(MUTEX_DESTROYED), M_STATE(%rdx)       /* convert to destroyed state */
+       PREEMPTION_ENABLE
+3:
+       LEAF_RET                        /* return with M_ILOCKED set */
+4:
+       PREEMPTION_ENABLE
+5:
+       PAUSE
+       jmp     1b
+
+LEAF_ENTRY(preemption_underflow_panic)
+       FRAME
+       incl    %gs:CPU_PREEMPTION_LEVEL
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(16f)
+       CALL_PANIC()
+       hlt
+       .data
+16:    String  "Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock"
+       .text
 
-ENTRY(bit_lock_try)
-       movl    S_ARG0,%ecx
-       movl    S_ARG1,%eax
-       lock
-       bts     %ecx,(%eax)
-       jb      bit_lock_failed
-       ret                     /* %eax better not be null ! */
-bit_lock_failed:
-       xorl    %eax,%eax
-       ret
 
-ENTRY(bit_unlock)
-       movl    S_ARG0,%ecx
-       movl    S_ARG1,%eax
-       lock
-       btr     %ecx,(%eax)
-       ret