]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/i386_lock.s
xnu-2050.22.13.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_lock.s
index b5153c73dbfd25907f0c42464f59ef4f513c727e..6b34073c68bc7d3b9b733bf813d26c0d206d3606 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <i386/eflags.h>
 #include <i386/trap.h>
 #include <config_dtrace.h>
-
+#include <i386/mp.h>
+       
 #include "assym.s"
 
 #define        PAUSE           rep; nop
 
+#include <i386/pal_lock_asm.h>
+
 /*
  *     When performance isn't the only concern, it's
  *     nice to build stack frames...
  */
-#define        BUILD_STACK_FRAMES   (GPROF || \
-                               ((MACH_LDEBUG || ETAP_LOCK_TRACE) && MACH_KDB))
+#define        BUILD_STACK_FRAMES   (GPROF)
 
 #if    BUILD_STACK_FRAMES
 
        ret
 
 
-#define        M_ILK           (%edx)
-#define        M_LOCKED        MUTEX_LOCKED(%edx)
-#define        M_WAITERS       MUTEX_WAITERS(%edx)
-#define        M_PROMOTED_PRI  MUTEX_PROMOTED_PRI(%edx)
-#define M_ITAG         MUTEX_ITAG(%edx)
-#define M_PTR          MUTEX_PTR(%edx)
-#if    MACH_LDEBUG
-#define        M_TYPE          MUTEX_TYPE(%edx)
-#define        M_PC            MUTEX_PC(%edx)
-#define        M_THREAD        MUTEX_THREAD(%edx)
-#endif /* MACH_LDEBUG */
+/* For x86_64, the varargs ABI requires that %al indicate
+ * how many SSE register contain arguments. In our case, 0 */
+#if __i386__
+#define ALIGN_STACK()          subl $8, %esp; andl     $0xFFFFFFF0, %esp ;
+#define LOAD_STRING_ARG0(label)        movl $##label, (%esp) ;
+#define LOAD_ARG1(x)           mov  x, 4(%esp) ;
+#define LOAD_PTR_ARG1(x)       mov  x, 4(%esp) ;
+#define CALL_PANIC()           call EXT(panic) ;
+#else
+#define ALIGN_STACK()          and  $0xFFFFFFFFFFFFFFF0, %rsp ;
+#define LOAD_STRING_ARG0(label)        leaq label(%rip), %rdi ;
+#define LOAD_ARG1(x)           mov x, %esi ;
+#define LOAD_PTR_ARG1(x)       mov x, %rsi ;
+#define CALL_PANIC()           xorb %al,%al ; call EXT(panic) ;
+#endif
 
-#include <i386/mp.h>
-#define        CX(addr,reg)    addr(,reg,4)
+#define        CHECK_UNLOCK(current, owner)                            \
+       cmp     current, owner                          ;       \
+       je      1f                                      ;       \
+       ALIGN_STACK()                                   ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
+       hlt                                             ;       \
+       .data                                           ;       \
+2:     String  "Mutex unlock attempted from non-owner thread"; \
+       .text                                           ;       \
+1:
 
 #if    MACH_LDEBUG
 /*
 #define        CHECK_MUTEX_TYPE()                                      \
        cmpl    $ MUTEX_TAG,M_TYPE                      ;       \
        je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
+       ALIGN_STACK()                                   ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
        hlt                                             ;       \
        .data                                           ;       \
 2:     String  "not a mutex!"                          ;       \
  */
 #if    MACH_RT
 #define CHECK_PREEMPTION_LEVEL()                               \
+       cmpl    $0,%gs:CPU_HIBERNATE                    ;       \
+       jne     1f                                      ;       \
        cmpl    $0,%gs:CPU_PREEMPTION_LEVEL             ;       \
        je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
+       ALIGN_STACK()                                   ;       \
+       movl    %gs:CPU_PREEMPTION_LEVEL, %eax          ;       \
+       LOAD_ARG1(%eax)                                 ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
        hlt                                             ;       \
        .data                                           ;       \
-2:     String  "preemption_level != 0!"                ;       \
+2:     String  "preemption_level(%d) != 0!"            ;       \
        .text                                           ;       \
 1:
 #else  /* MACH_RT */
 #define        CHECK_PREEMPTION_LEVEL()
 #endif /* MACH_RT */
 
-#define        CHECK_NO_SIMPLELOCKS()                                  \
-       cmpl    $0,%gs:CPU_SIMPLE_LOCK_COUNT            ;       \
-       je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
-       hlt                                             ;       \
-       .data                                           ;       \
-2:     String  "simple_locks_held!"                    ;       \
-       .text                                           ;       \
-1:
-
-/* 
- * Verifies return to the correct thread in "unlock" situations.
- */
-#define        CHECK_THREAD(thd)                                       \
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx              ;       \
-       testl   %ecx,%ecx                               ;       \
-       je      1f                                      ;       \
-       cmpl    %ecx,thd                                ;       \
-       je      1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
-       hlt                                             ;       \
-       .data                                           ;       \
-2:     String  "wrong thread!"                         ;       \
-       .text                                           ;       \
-1:
-
-#define        CHECK_MYLOCK(thd)                                       \
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx              ;       \
-       testl   %ecx,%ecx                               ;       \
-       je      1f                                      ;       \
-       cmpl    %ecx,thd                                ;       \
+#define        CHECK_MYLOCK(current, owner)                            \
+       cmp     current, owner                          ;       \
        jne     1f                                      ;       \
-       pushl   $2f                                     ;       \
-       call    EXT(panic)                              ;       \
+       ALIGN_STACK()                                   ;       \
+       LOAD_STRING_ARG0(2f)                            ;       \
+       CALL_PANIC()                                    ;       \
        hlt                                             ;       \
        .data                                           ;       \
-2:     String  "mylock attempt!"                       ;       \
+2:     String  "Attempt to recursively lock a non-recursive lock";     \
        .text                                           ;       \
 1:
 
-#define        METER_SIMPLE_LOCK_LOCK(reg)                             \
-       pushl   reg                                     ;       \
-       call    EXT(meter_simple_lock)                  ;       \
-       popl    reg
-
-#define        METER_SIMPLE_LOCK_UNLOCK(reg)                           \
-       pushl   reg                                     ;       \
-       call    EXT(meter_simple_unlock)                ;       \
-       popl    reg
-
 #else  /* MACH_LDEBUG */
 #define        CHECK_MUTEX_TYPE()
-#define        CHECK_SIMPLE_LOCK_TYPE
-#define        CHECK_THREAD(thd)
 #define CHECK_PREEMPTION_LEVEL()
-#define        CHECK_NO_SIMPLELOCKS()
 #define        CHECK_MYLOCK(thd)
-#define        METER_SIMPLE_LOCK_LOCK(reg)
-#define        METER_SIMPLE_LOCK_UNLOCK(reg)
 #endif /* MACH_LDEBUG */
 
-
 #define PREEMPTION_DISABLE                             \
        incl    %gs:CPU_PREEMPTION_LEVEL
-       
-       
+
+#define        PREEMPTION_LEVEL_DEBUG 1        
+#if    PREEMPTION_LEVEL_DEBUG
+#define        PREEMPTION_ENABLE                               \
+       decl    %gs:CPU_PREEMPTION_LEVEL        ;       \
+       js      17f                             ;       \
+       jnz     19f                             ;       \
+       testl   $AST_URGENT,%gs:CPU_PENDING_AST ;       \
+       jz      19f                             ;       \
+       PUSHF                                   ;       \
+       testl   $EFL_IF, S_PC                   ;       \
+       jz      18f                             ;       \
+       POPF                                    ;       \
+       int     $(T_PREEMPT)                    ;       \
+       jmp     19f                             ;       \
+17:                                                    \
+       call    _preemption_underflow_panic     ;       \
+18:                                                    \
+       POPF                                    ;       \
+19:
+#else
 #define        PREEMPTION_ENABLE                               \
        decl    %gs:CPU_PREEMPTION_LEVEL        ;       \
-       jne     9f                              ;       \
-       pushf                                   ;       \
-       testl   $ EFL_IF,(%esp)                 ;       \
-       je      8f                              ;       \
-       cli                                     ;       \
-       movl    %gs:CPU_PENDING_AST,%eax        ;       \
-       testl   $ AST_URGENT,%eax               ;       \
-       je      8f                              ;       \
-       movl    %gs:CPU_INTERRUPT_LEVEL,%eax    ;       \
-       testl   %eax,%eax                       ;       \
-       jne     8f                              ;       \
-       popf                                    ;       \
+       jnz     19f                             ;       \
+       testl   $AST_URGENT,%gs:CPU_PENDING_AST ;       \
+       jz      19f                             ;       \
+       PUSHF                                   ;       \
+       testl   $EFL_IF, S_PC                   ;       \
+       jz      18f                             ;       \
+       POPF                                    ;       \
        int     $(T_PREEMPT)                    ;       \
-       jmp     9f                              ;       \
-8:                                                     \
-       popf                                    ;       \
-9:     
+       jmp     19f                             ;       \
+18:                                                    \
+       POPF                                    ;       \
+19:
+#endif
 
-       
 
 #if    CONFIG_DTRACE
+
+       .globl  _lockstat_probe
+       .globl  _lockstat_probemap
+
+/*
+ * LOCKSTAT_LABEL creates a dtrace symbol which contains
+ * a pointer into the lock code function body. At that
+ * point is a "ret" instruction that can be patched into
+ * a "nop"
+ */
+
+#if defined(__i386__)
+
 #define        LOCKSTAT_LABEL(lab) \
        .data                           ;\
        .globl  lab                     ;\
        .text                           ;\
        9:
 
-       .globl  _lockstat_probe
-       .globl  _lockstat_probemap
-
 #define        LOCKSTAT_RECORD(id, lck) \
        push    %ebp                                    ;       \
        mov     %esp,%ebp                               ;       \
 9:     leave
        /* ret - left to subsequent code, e.g. return values */
 
-#define        LOCKSTAT_RECORD2(id, lck, arg) \
-       push    %ebp                                    ;       \
-       mov     %esp,%ebp                               ;       \
-       sub     $0x38,%esp      /* size of dtrace_probe args */ ; \
-       movl    _lockstat_probemap + (id * 4),%eax      ;       \
-       test    %eax,%eax                               ;       \
-       je      9f                                      ;       \
-       movl    $0,36(%esp)                             ;       \
-       movl    $0,40(%esp)                             ;       \
-       movl    $0,28(%esp)                             ;       \
-       movl    $0,32(%esp)                             ;       \
-       movl    $0,20(%esp)                             ;       \
-       movl    $0,24(%esp)                             ;       \
-       movl    $0,12(%esp)                             ;       \
-       movl    $0,16(%esp)                             ;       \
-       movl    lck,4(%esp)     /* copy lock pointer to arg 1 */ ; \
-       movl    arg,8(%esp)                             ;       \
-       movl    %eax,(%esp)                             ;       \
-       call    *_lockstat_probe                        ;       \
+#elif defined(__x86_64__)
+#define        LOCKSTAT_LABEL(lab) \
+       .data                                       ;\
+       .globl  lab                                 ;\
+       lab:                                        ;\
+       .quad 9f                                    ;\
+       .text                                       ;\
+       9:
+
+#define LOCKSTAT_RECORD(id, lck) \
+       push    %rbp                                ;       \
+       mov     %rsp,%rbp                           ;       \
+       movl    _lockstat_probemap + (id * 4)(%rip),%eax ;  \
+       test    %eax,%eax                           ;       \
+       je              9f                          ;       \
+       mov             lck, %rsi                   ;       \
+       mov             %rax, %rdi                  ;       \
+       mov             $0, %rdx                    ;       \
+       mov             $0, %rcx                    ;       \
+       mov             $0, %r8                     ;       \
+       mov             $0, %r9                     ;       \
+       call    *_lockstat_probe(%rip)              ;       \
 9:     leave
        /* ret - left to subsequent code, e.g. return values */
+#else
+#error Unsupported architecture
 #endif
+#endif /* CONFIG_DTRACE */
 
+/*
+ * For most routines, the hw_lock_t pointer is loaded into a
+ * register initially, and then either a byte or register-sized
+ * word is loaded/stored to the pointer
+ */
+#if defined(__i386__)
+#define        HW_LOCK_REGISTER        %edx
+#define        LOAD_HW_LOCK_REGISTER mov L_ARG0, HW_LOCK_REGISTER
+#define        HW_LOCK_THREAD_REGISTER %ecx
+#define        LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER
+#define        HW_LOCK_MOV_WORD        movl
+#define        HW_LOCK_EXAM_REGISTER   %eax
+#elif defined(__x86_64__)
+#define        HW_LOCK_REGISTER        %rdi
+#define        LOAD_HW_LOCK_REGISTER
+#define        HW_LOCK_THREAD_REGISTER %rcx
+#define        LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER
+#define        HW_LOCK_MOV_WORD        movq
+#define        HW_LOCK_EXAM_REGISTER   %rax
+#else
+#error Unsupported architecture
+#endif
 
 /*
  *     void hw_lock_init(hw_lock_t)
  *     Initialize a hardware lock.
  */
 LEAF_ENTRY(hw_lock_init)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       movl    $0,(%edx)               /* clear the lock */
+       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
+       HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)         /* clear the lock */
        LEAF_RET
 
 
 /*
- *     void hw_lock_byte_init(uint8_t *)
+ *     void hw_lock_byte_init(volatile uint8_t *)
  *
  *     Initialize a hardware byte lock.
  */
 LEAF_ENTRY(hw_lock_byte_init)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       movb    $0,(%edx)               /* clear the lock */
+       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
+       movb $0, (HW_LOCK_REGISTER)             /* clear the lock */
        LEAF_RET
 
 /*
@@ -347,15 +375,15 @@ LEAF_ENTRY(hw_lock_byte_init)
  *     MACH_RT:  also return with preemption disabled.
  */
 LEAF_ENTRY(hw_lock_lock)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
+       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
+       LOAD_HW_LOCK_THREAD_REGISTER    /* get thread pointer */
+       
        PREEMPTION_DISABLE
 1:
-       movl    (%edx), %eax
-       testl   %eax,%eax               /* lock locked? */
+       mov     (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
+       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER             /* lock locked? */
        jne     3f                      /* branch if so */
-       lock; cmpxchgl  %ecx,(%edx)     /* try to acquire the HW lock */
+       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
        jne     3f
        movl    $1,%eax                 /* In case this was a timeout call */
        LEAF_RET                        /* if yes, then nothing left to do */
@@ -371,14 +399,14 @@ LEAF_ENTRY(hw_lock_lock)
  */
 
 LEAF_ENTRY(hw_lock_byte_lock)
-       movl    L_ARG0,%edx             /* Load lock pointer */
+       LOAD_HW_LOCK_REGISTER           /* Load lock pointer */
        PREEMPTION_DISABLE
        movl    $1, %ecx                /* Set lock value */
 1:
-       movb    (%edx), %al             /* Load byte at address */
+       movb    (HW_LOCK_REGISTER), %al         /* Load byte at address */
        testb   %al,%al                 /* lock locked? */
        jne     3f                      /* branch if so */
-       lock; cmpxchgb  %cl,(%edx)      /* attempt atomic compare exchange */
+       lock; cmpxchg   %cl,(HW_LOCK_REGISTER)  /* attempt atomic compare exchange */
        jne     3f
        LEAF_RET                        /* if yes, then nothing left to do */
 3:
@@ -393,17 +421,19 @@ LEAF_ENTRY(hw_lock_byte_lock)
  */
 LEAF_ENTRY(hw_lock_to)
 1:
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
+       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
+       LOAD_HW_LOCK_THREAD_REGISTER
+
        /*
         * Attempt to grab the lock immediately
         * - fastpath without timeout nonsense.
         */
        PREEMPTION_DISABLE
-       movl    (%edx), %eax
-       testl   %eax,%eax               /* lock locked? */
+
+       mov     (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
+       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER             /* lock locked? */
        jne     2f                      /* branch if so */
-       lock; cmpxchgl  %ecx,(%edx)     /* try to acquire the HW lock */
+       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
        jne     2f                      /* branch on failure */
        movl    $1,%eax
        LEAF_RET
@@ -415,42 +445,74 @@ LEAF_ENTRY(hw_lock_to)
         * and then spin re-checking the lock but pausing
         * every so many (INNER_LOOP_COUNT) spins to check for timeout.
         */
+#if __i386__
        movl    L_ARG1,%ecx             /* fetch timeout */
        push    %edi
        push    %ebx
        mov     %edx,%edi
 
-       rdtsc                           /* read cyclecount into %edx:%eax */
        lfence
+       rdtsc                           /* read cyclecount into %edx:%eax */
        addl    %ecx,%eax               /* fetch and timeout */
        adcl    $0,%edx                 /* add carry */
        mov     %edx,%ecx
        mov     %eax,%ebx               /* %ecx:%ebx is the timeout expiry */
+       mov     %edi, %edx              /* load lock back into %edx */
+#else
+       push    %r9
+       lfence
+       rdtsc                           /* read cyclecount into %edx:%eax */
+       shlq    $32, %rdx
+       orq     %rdx, %rax              /* load 64-bit quantity into %rax */
+       addq    %rax, %rsi              /* %rsi is the timeout expiry */
+#endif
+       
 4:
        /*
         * The inner-loop spin to look for the lock being freed.
         */
-       mov     $(INNER_LOOP_COUNT),%edx
+#if __i386__
+       mov     $(INNER_LOOP_COUNT),%edi
+#else
+       mov     $(INNER_LOOP_COUNT),%r9
+#endif
 5:
        PAUSE                           /* pause for hyper-threading */
-       movl    (%edi),%eax             /* spin checking lock value in cache */
-       testl   %eax,%eax
+       mov     (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER                /* spin checking lock value in cache */
+       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
        je      6f                      /* zero => unlocked, try to grab it */
-       decl    %edx                    /* decrement inner loop count */
+#if __i386__
+       decl    %edi                    /* decrement inner loop count */
+#else
+       decq    %r9                     /* decrement inner loop count */
+#endif
        jnz     5b                      /* time to check for timeout? */
-
+       
        /*
         * Here after spinning INNER_LOOP_COUNT times, check for timeout
         */
-       rdtsc                           /* cyclecount into %edx:%eax */
+#if __i386__
+       mov     %edx,%edi               /* Save %edx */
        lfence
-       cmpl    %ecx,%edx               /* compare high-order 32-bits */
+       rdtsc                           /* cyclecount into %edx:%eax */
+       xchg    %edx,%edi               /* cyclecount into %edi:%eax */
+       cmpl    %ecx,%edi               /* compare high-order 32-bits */
        jb      4b                      /* continue spinning if less, or */
        cmpl    %ebx,%eax               /* compare low-order 32-bits */ 
        jb      4b                      /* continue if less, else bail */
        xor     %eax,%eax               /* with 0 return value */
        pop     %ebx
        pop     %edi
+#else
+       lfence
+       rdtsc                           /* cyclecount into %edx:%eax */
+       shlq    $32, %rdx
+       orq     %rdx, %rax              /* load 64-bit quantity into %rax */
+       cmpq    %rsi, %rax              /* compare to timeout */
+       jb      4b                      /* continue spinning if less, or */
+       xor     %rax,%rax               /* with 0 return value */
+       pop     %r9
+#endif
        LEAF_RET
 
 6:
@@ -458,12 +520,16 @@ LEAF_ENTRY(hw_lock_to)
         * Here to try to grab the lock that now appears to be free
         * after contention.
         */
-       movl    %gs:CPU_ACTIVE_THREAD,%edx
-       lock; cmpxchgl  %edx,(%edi)     /* try to acquire the HW lock */
+       LOAD_HW_LOCK_THREAD_REGISTER
+       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
        jne     4b                      /* no - spin again */
        movl    $1,%eax                 /* yes */
+#if __i386__
        pop     %ebx
        pop     %edi
+#else
+       pop     %r9
+#endif
        LEAF_RET
 
 /*
@@ -473,10 +539,11 @@ LEAF_ENTRY(hw_lock_to)
  *     MACH_RT:  release preemption level.
  */
 LEAF_ENTRY(hw_lock_unlock)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       movl    $0,(%edx)               /* clear the lock */
+       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
+       HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)         /* clear the lock */
        PREEMPTION_ENABLE
        LEAF_RET
+
 /*
  *     void hw_lock_byte_unlock(uint8_t *lock_byte)
  *
@@ -485,28 +552,8 @@ LEAF_ENTRY(hw_lock_unlock)
  */
 
 LEAF_ENTRY(hw_lock_byte_unlock)
-       movl    L_ARG0,%edx             /* Load lock pointer */
-       movb    $0,(%edx)               /* Clear the lock byte */
-       PREEMPTION_ENABLE
-       LEAF_RET
-       
-/*
- *     void i386_lock_unlock_with_flush(hw_lock_t)
- *
- *     Unconditionally release lock, followed by a cacheline flush of
- *     the line corresponding to the lock dword. This routine is currently
- *     used with certain locks which are susceptible to lock starvation,
- *     minimizing cache affinity for lock acquisitions. A queued spinlock
- *     or other mechanism that ensures fairness would obviate the need
- *     for this routine, but ideally few or no spinlocks should exhibit
- *     enough contention to require such measures.
- *     MACH_RT:  release preemption level.
- */
-LEAF_ENTRY(i386_lock_unlock_with_flush)
-       movl    L_ARG0,%edx             /* Fetch lock pointer */
-       movl    $0,(%edx)               /* Clear the lock */
-       mfence                          /* Serialize prior stores */
-       clflush (%edx)                  /* Write back and invalidate line */
+       LOAD_HW_LOCK_REGISTER           /* Load lock pointer */
+       movb $0, (HW_LOCK_REGISTER)             /* Clear the lock byte */
        PREEMPTION_ENABLE
        LEAF_RET
 
@@ -515,16 +562,16 @@ LEAF_ENTRY(i386_lock_unlock_with_flush)
  *     MACH_RT:  returns with preemption disabled on success.
  */
 LEAF_ENTRY(hw_lock_try)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
+       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
+       LOAD_HW_LOCK_THREAD_REGISTER
        PREEMPTION_DISABLE
-       movl    (%edx),%eax
-       testl   %eax,%eax
+
+       mov     (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER
+       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
        jne     1f
-       lock; cmpxchgl  %ecx,(%edx)     /* try to acquire the HW lock */
+       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
        jne     1f
-
+       
        movl    $1,%eax                 /* success */
        LEAF_RET
 
@@ -539,63 +586,69 @@ LEAF_ENTRY(hw_lock_try)
  *     N.B.  Racy, of course.
  */
 LEAF_ENTRY(hw_lock_held)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-
-       movl    (%edx),%eax             /* check lock value */
-       testl   %eax,%eax
+       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
+       mov     (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER                /* check lock value */
+       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
        movl    $1,%ecx
        cmovne  %ecx,%eax               /* 0 => unlocked, 1 => locked */
        LEAF_RET
 
-LEAF_ENTRY(mutex_init)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
-       xorl    %eax,%eax
-       movl    %eax,M_ILK              /* clear interlock */
-       movl    %eax,M_LOCKED           /* clear locked flag */
-       movw    %ax,M_WAITERS           /* init waiter count */
-       movw    %ax,M_PROMOTED_PRI
-
-#if    MACH_LDEBUG
-       movl    $ MUTEX_TAG,M_TYPE      /* set lock type */
-       movl    %eax,M_PC               /* init caller pc */
-       movl    %eax,M_THREAD           /* and owning thread */
-#endif
-
-       LEAF_RET
 
 /*
  * Reader-writer lock fastpaths. These currently exist for the
- * shared lock acquire and release paths (where they reduce overhead
- * considerably)--more can be added as necessary (DRK).
+ * shared lock acquire, the exclusive lock acquire, the shared to
+ * exclusive upgrade and the release paths (where they reduce overhead
+ * considerably) -- these are by far the most frequently used routines
+ *
+ * The following should reflect the layout of the bitfield embedded within
+ * the lck_rw_t structure (see i386/locks.h).
  */
+#define LCK_RW_INTERLOCK       (0x1 << 16)
+
+#define LCK_RW_PRIV_EXCL       (0x1 << 24)
+#define LCK_RW_WANT_UPGRADE    (0x2 << 24)
+#define LCK_RW_WANT_WRITE      (0x4 << 24)
+#define LCK_R_WAITING          (0x8 << 24)
+#define LCK_W_WAITING          (0x10 << 24)
+
+#define LCK_RW_SHARED_MASK     (0xffff)
 
 /*
- * These should reflect the layout of the bitfield embedded within
- * the lck_rw_t structure (see i386/locks.h).
+ * For most routines, the lck_rw_t pointer is loaded into a
+ * register initially, and the flags bitfield loaded into another
+ * register and examined
  */
-#define LCK_RW_INTERLOCK 0x1
-#define LCK_RW_WANT_UPGRADE 0x2
-#define LCK_RW_WANT_WRITE 0x4
-#define LCK_R_WAITING 0x8
-#define LCK_W_WAITING 0x10
-
-#define        RW_LOCK_SHARED_MASK ((LCK_RW_INTERLOCK<<16) |   \
-       ((LCK_RW_WANT_UPGRADE|LCK_RW_WANT_WRITE) << 24))
+#if defined(__i386__)
+#define        LCK_RW_REGISTER %edx
+#define        LOAD_LCK_RW_REGISTER mov S_ARG0, LCK_RW_REGISTER
+#define        LCK_RW_FLAGS_REGISTER   %eax
+#define        LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER
+#elif defined(__x86_64__)
+#define        LCK_RW_REGISTER %rdi
+#define        LOAD_LCK_RW_REGISTER
+#define        LCK_RW_FLAGS_REGISTER   %eax
+#define        LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER
+#else
+#error Unsupported architecture
+#endif
+       
+#define        RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
 /*
- *             void lck_rw_lock_shared(lck_rw_t*)
+ *     void lck_rw_lock_shared(lck_rw_t *)
  *
  */
-
 Entry(lck_rw_lock_shared)
-       movl    S_ARG0, %edx
+       LOAD_LCK_RW_REGISTER
 1:
-       movl    (%edx), %eax            /* Load state bitfield and interlock */
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield and interlock */
        testl   $(RW_LOCK_SHARED_MASK), %eax    /* Eligible for fastpath? */
        jne     3f
-       movl    %eax, %ecx
+
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
        incl    %ecx                            /* Increment reader refcount */
        lock
-       cmpxchgl %ecx, (%edx)                   /* Attempt atomic exchange */
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
        jne     2f
 
 #if    CONFIG_DTRACE
@@ -606,11 +659,12 @@ Entry(lck_rw_lock_shared)
         */
        LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point)
        ret
-       /* Fall thru when patched, counting on lock pointer in %edx  */
-       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, %edx)
+       /*
+       Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER
+       */
+       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
 #endif
        ret
-
 2:
        PAUSE
        jmp     1b
@@ -618,1458 +672,1344 @@ Entry(lck_rw_lock_shared)
        jmp     EXT(lck_rw_lock_shared_gen)
 
 
+       
+#define        RW_TRY_LOCK_SHARED_MASK (LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
 /*
- *             lck_rw_type_t lck_rw_done(lck_rw_t*)
+ *     void lck_rw_try_lock_shared(lck_rw_t *)
  *
  */
-
-.data
-rwl_release_error_str:
-       .asciz  "Releasing non-exclusive RW lock without a reader refcount!"
-.text
-
-#define RW_LOCK_RELEASE_MASK ((LCK_RW_INTERLOCK<<16) | \
-       ((LCK_RW_WANT_UPGRADE|LCK_RW_WANT_WRITE|LCK_R_WAITING|LCK_W_WAITING) << 24))
-Entry(lck_rw_done)
-       movl    S_ARG0, %edx
+Entry(lck_rw_try_lock_shared)
+       LOAD_LCK_RW_REGISTER
 1:
-       movl    (%edx), %eax            /* Load state bitfield and interlock */
-       testl   $(RW_LOCK_RELEASE_MASK), %eax   /* Eligible for fastpath? */
-       jne     3f
-       movl    %eax, %ecx
-       /* Assert refcount */
-       testl   $(0xFFFF), %ecx
-       jne     5f
-       movl    $(rwl_release_error_str), S_ARG0
-       jmp     EXT(panic)
-5:
-       decl    %ecx                    /* Decrement reader count */
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield and interlock */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     2f
+       testl   $(RW_TRY_LOCK_SHARED_MASK), %eax
+       jne     3f                      /* lock is busy */
+
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       incl    %ecx                            /* Increment reader refcount */
        lock
-       cmpxchgl %ecx, (%edx)
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
        jne     2f
-       movl    $(RW_SHARED), %eax      /* Indicate that the lock was shared */
+
 #if    CONFIG_DTRACE
-       /* Dtrace lockstat probe: LS_RW_DONE_RELEASE as reader */
-       LOCKSTAT_LABEL(_lck_rw_done_lockstat_patch_point)
-       ret
+       movl    $1, %eax
        /*
-        * Note: Dtrace's convention is 0 ==> reader, which is
-        * a different absolute value than $(RW_SHARED)
-        * %edx contains the lock address already from the above
+        * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
         */
-       LOCKSTAT_RECORD2(LS_LCK_RW_DONE_RELEASE, %edx, $0)
-       movl    $(RW_SHARED), %eax      /* Indicate that the lock was shared */
+       LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point)
+       ret
+    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
+    LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
 #endif
+       movl    $1, %eax                        /* return TRUE */
        ret
-
 2:
        PAUSE
        jmp     1b
 3:
-       jmp     EXT(lck_rw_done_gen)
-
-
-NONLEAF_ENTRY2(mutex_lock_spin,_mutex_lock_spin)
-
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
-
-       CHECK_MUTEX_TYPE()
-       CHECK_NO_SIMPLELOCKS()
-       CHECK_PREEMPTION_LEVEL()
+       xorl    %eax, %eax
+       ret
 
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Lmls_ilk_loop           /* no, go spin */
-Lmls_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Lmls_ilk_fail           /* branch on failure to spin loop */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Lml_fail                /* yes, fall back to a normal mutex lock */
-       movl    $(MUTEX_LOCKED_AS_SPIN),M_LOCKED        /* indicate ownership as a spin lock */
        
-#if    MACH_LDEBUG
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-       movl    %ecx,M_THREAD
-       movl    B_PC,%ecx
-       movl    %ecx,M_PC
-#endif
-       PREEMPTION_DISABLE
-       popf                            /* restore interrupt state */
-       leave                           /* return with the interlock held */
-#if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_mutex_lock_spin_lockstat_patch_point)
+#define        RW_LOCK_EXCLUSIVE_HELD  (LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)
+/*
+ *     int lck_rw_grab_shared(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_grab_shared)
+       LOAD_LCK_RW_REGISTER
+1:
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield and interlock */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     5f
+       testl   $(RW_LOCK_EXCLUSIVE_HELD), %eax 
+       jne     3f
+2:     
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       incl    %ecx                            /* Increment reader refcount */
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     4f
+
+       movl    $1, %eax                        /* return success */
        ret
-       /* %edx contains the lock address from above */
-       LOCKSTAT_RECORD(LS_MUTEX_LOCK_SPIN_ACQUIRE, %edx)
-#endif
+3:
+       testl   $(LCK_RW_SHARED_MASK), %eax
+       je      4f
+       testl   $(LCK_RW_PRIV_EXCL), %eax
+       je      2b
+4:
+       xorl    %eax, %eax                      /* return failure */
        ret
-       
-Lmls_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
-
-Lmls_ilk_loop:
+5:
        PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Lmls_retry              /* yes, go for it */
-       jmp     Lmls_ilk_loop           /* no, keep spinning */
-
-
-NONLEAF_ENTRY2(mutex_lock,_mutex_lock)
-
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
-
-       CHECK_MUTEX_TYPE()
-       CHECK_NO_SIMPLELOCKS()
-       CHECK_PREEMPTION_LEVEL()
-
-       movl    M_ILK,%eax              /* is interlock held */
-       testl   %eax,%eax
-       jne     Lml_ilk_loop            /* yes, go do the spin loop */
-Lml_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
+       jmp     1b
 
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Lml_ilk_fail            /* branch on failure to spin loop */
 
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Lml_fail                /* yes, we lose */
-Lml_acquire:
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-       movl    %ecx,M_LOCKED
+       
+#define        RW_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | \
+                               LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
+/*
+ *     void lck_rw_lock_exclusive(lck_rw_t*)
+ *
+ */
+Entry(lck_rw_lock_exclusive)
+       LOAD_LCK_RW_REGISTER
+1:
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and shared count */
+       testl   $(RW_LOCK_EXCLUSIVE_MASK), %eax         /* Eligible for fastpath? */
+       jne     3f                                      /* no, go slow */
 
-#if    MACH_LDEBUG
-       movl    %ecx,M_THREAD
-       movl    B_PC,%ecx
-       movl    %ecx,M_PC
-#endif
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Lml_waiters             /* yes, more work to do */
-Lml_return:
-       xorl    %eax,%eax
-       movl    %eax,M_ILK
+       movl    %eax, %ecx                              /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_WRITE), %ecx
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     2f
 
-       popf                            /* restore interrupt state */
-       leave
 #if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_mutex_lock_lockstat_patch_point)
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_LOCK_EXCL_ACQUIRE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point)
        ret
-       /* %edx still contains the lock pointer */
-       LOCKSTAT_RECORD(LS_MUTEX_LOCK_ACQUIRE, %edx)
+    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
+    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
 #endif
        ret
-
-       /*
-        * We got the mutex, but there are waiters.  Update information
-        * on waiters.
-        */
-Lml_waiters:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_acquire)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Lml_return
-
-Lml_restart:
-Lml_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
-
-Lml_ilk_loop:
+2:
        PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Lml_retry               /* yes, go try to grab it */
-       jmp     Lml_ilk_loop            /* no - keep spinning */
-
-Lml_fail:
-       /*
-        * Check if the owner is on another processor and therefore
-        * we should try to spin before blocking.
-        */
-       testl   $(OnProc),ACT_SPF(%ecx)
-       jz      Lml_block
+       jmp     1b
+3:
+       jmp     EXT(lck_rw_lock_exclusive_gen)
 
-       /*
-        * Here if owner is on another processor:
-        *  - release the interlock
-        *  - spin on the holder until release or timeout
-        *  - in either case re-acquire the interlock
-        *  - if released, acquire it
-        *  - otherwise drop thru to block.
-        */
-       xorl    %eax,%eax
-       movl    %eax,M_ILK              /* zero interlock */
-       popf
-       pushf                           /* restore interrupt state */
-
-       push    %edx                    /* lock address */
-       call    EXT(lck_mtx_lock_spinwait)      /* call out to do spinning */
-       addl    $4,%esp
-       movl    B_ARG0,%edx             /* refetch mutex address */
-
-       /* Re-acquire interlock - interrupts currently enabled */
-       movl    M_ILK,%eax              /* is interlock held */
-       testl   %eax,%eax
-       jne     Lml_ilk_reloop          /* yes, go do the spin loop */
-Lml_reget_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Lml_ilk_refail          /* branch on failure to spin loop */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex free? */
-       je      Lml_acquire             /* yes, acquire */
-       
-Lml_block:
-       CHECK_MYLOCK(M_THREAD)
-       pushl   M_LOCKED
-       pushl   %edx                    /* push mutex address */
-       call    EXT(lck_mtx_lock_wait)  /* wait for the lock */
-       addl    $8,%esp                 /* returns with interlock dropped */
-       movl    B_ARG0,%edx             /* refetch mutex address */
-       jmp     Lml_restart             /* and start over */
-
-Lml_ilk_refail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
-
-Lml_ilk_reloop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Lml_reget_retry         /* yes, go try to grab it */
-       jmp     Lml_ilk_reloop          /* no - keep spinning */
 
        
+#define        RW_TRY_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
+/*
+ *     void lck_rw_try_lock_exclusive(lck_rw_t *)
+ *
+ *             Tries to get a write lock.
+ *
+ *             Returns FALSE if the lock is not held on return.
+ */
+Entry(lck_rw_try_lock_exclusive)
+       LOAD_LCK_RW_REGISTER
+1:
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and shared count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     2f
+       testl   $(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax
+       jne     3f                                      /* can't get it */
 
-NONLEAF_ENTRY2(mutex_try_spin,_mutex_try_spin) 
+       movl    %eax, %ecx                              /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_WRITE), %ecx
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     2f
 
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
+#if    CONFIG_DTRACE
+       movl    $1, %eax
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point)
+       ret
+    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
+    LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
+#endif
+       movl    $1, %eax                        /* return TRUE */
+       ret
+2:
+       PAUSE
+       jmp     1b
+3:
+       xorl    %eax, %eax                      /* return FALSE */
+       ret     
 
-       CHECK_MUTEX_TYPE()
-       CHECK_NO_SIMPLELOCKS()
 
-       movl    M_ILK,%eax
-       testl   %eax,%eax               /* is the interlock held? */
-       jne     Lmts_ilk_loop           /* yes, go to spin loop */
-Lmts_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
 
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Lmts_ilk_fail           /* branch on failure to spin loop */
+/*
+ *     void lck_rw_lock_shared_to_exclusive(lck_rw_t*)
+ *
+ *     fastpath can be taken if
+ *     the current rw_shared_count == 1
+ *     AND the interlock is clear
+ *     AND RW_WANT_UPGRADE is not set
+ *
+ *     note that RW_WANT_WRITE could be set, but will not
+ *     be indicative of an exclusive hold since we have
+ *     a read count on the lock that we have not yet released
+ *     we can blow by that state since the lck_rw_lock_exclusive
+ *     function will block until rw_shared_count == 0 and 
+ *     RW_WANT_UPGRADE is clear... it does this check behind
+ *     the interlock which we are also checking for
+ *
+ *     to make the transition we must be able to atomically
+ *     set RW_WANT_UPGRADE and get rid of the read count we hold
+ */
+Entry(lck_rw_lock_shared_to_exclusive)
+       LOAD_LCK_RW_REGISTER
+1:
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and shared count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     7f
+       testl   $(LCK_RW_WANT_UPGRADE), %eax
+       jne     2f
 
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Lmt_fail                /* yes, we lose */
-Lmts_acquire:
-       movl    $(MUTEX_LOCKED_AS_SPIN),M_LOCKED        /* indicate ownership as a spin lock */
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_UPGRADE), %ecx    /* ask for WANT_UPGRADE */
+       decl    %ecx                            /* and shed our read count */
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     7f
+                                               /* we now own the WANT_UPGRADE */
+       testl   $(LCK_RW_SHARED_MASK), %ecx     /* check to see if all of the readers are drained */
+       jne     8f                              /* if not, we need to go wait */
 
-#if    MACH_LDEBUG
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-       movl    %ecx,M_THREAD
-       movl    B_PC,%ecx
-       movl    %ecx,M_PC
-#endif
-       PREEMPTION_DISABLE              /* no, return with interlock held */
-       popf                            /* restore interrupt state */
-       movl    $1,%eax
-       leave
 #if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_mutex_try_spin_lockstat_patch_point)
+       movl    $1, %eax
+       /*
+        * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
+        * Implemented by swapping between return and no-op instructions.
+        * See bsd/dev/dtrace/lockstat.c.
+        */
+       LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point)
        ret
-       /* %edx inherits the lock pointer from above */
-       LOCKSTAT_RECORD(LS_MUTEX_TRY_SPIN_ACQUIRE, %edx)
-       movl    $1,%eax
+    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
+    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, LCK_RW_REGISTER)
 #endif
+       movl    $1, %eax                        /* return success */
        ret
-
-Lmts_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
-
-Lmts_ilk_loop:
+       
+2:                                             /* someone else already holds WANT_UPGRADE */
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       decl    %ecx                            /* shed our read count */
+       testl   $(LCK_RW_SHARED_MASK), %ecx
+       jne     3f                              /* we were the last reader */
+       andl    $(~LCK_W_WAITING), %ecx         /* so clear the wait indicator */
+3:     
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     7f
+
+#if __i386__
+       pushl   %eax                            /* go check to see if we need to */
+       push    %edx                            /* wakeup anyone */
+       call    EXT(lck_rw_lock_shared_to_exclusive_failure)
+       addl    $8, %esp
+#else
+       mov     %eax, %esi                      /* put old flags as second arg */
+                                               /* lock is alread in %rdi */
+       call    EXT(lck_rw_lock_shared_to_exclusive_failure)
+#endif
+       ret                                     /* and pass the failure return along */ 
+7:
        PAUSE
-       /*
-        * need to do this check outside of the interlock in
-        * case this lock is held as a simple lock which means
-        * we won't be able to take the interlock
-        */
-       movl    M_LOCKED,%eax
-       testl   %eax,%eax               /* is the mutex locked? */
-       jne     Lmt_fail_no_ilk         /* yes, go return failure */
-
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Lmts_retry              /* yes, go try to grab it */
-       jmp     Lmts_ilk_loop           /* keep spinning */
-
+       jmp     1b
+8:
+       jmp     EXT(lck_rw_lock_shared_to_exclusive_success)
 
 
-NONLEAF_ENTRY2(mutex_try,_mutex_try)   
-
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
-
-       CHECK_MUTEX_TYPE()
-       CHECK_NO_SIMPLELOCKS()
-
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Lmt_ilk_loop            /* yes, go try to grab it */
-Lmt_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Lmt_ilk_fail            /* branch on failure to spin loop */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Lmt_fail                /* yes, we lose */
-Lmt_acquire:
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-       movl    %ecx,M_LOCKED
-
-#if    MACH_LDEBUG
-       movl    %ecx,M_THREAD
-       movl    B_PC,%ecx
-       movl    %ecx,M_PC
+       
+       .cstring
+rwl_release_error_str:
+       .asciz  "Releasing non-exclusive RW lock without a reader refcount!"
+       .text
+       
+/*
+ *     lck_rw_type_t lck_rw_done(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_done)
+       LOAD_LCK_RW_REGISTER
+1:
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     7f                              /* wait for interlock to clear */
+
+       movl    %eax, %ecx                      /* keep original value in %eax for cmpxchgl */
+       testl   $(LCK_RW_SHARED_MASK), %ecx     /* if reader count == 0, must be exclusive lock */
+       je      2f
+       decl    %ecx                            /* Decrement reader count */
+       testl   $(LCK_RW_SHARED_MASK), %ecx     /* if reader count has now gone to 0, check for waiters */
+       je      4f
+       jmp     6f
+2:     
+       testl   $(LCK_RW_WANT_UPGRADE), %ecx
+       je      3f
+       andl    $(~LCK_RW_WANT_UPGRADE), %ecx
+       jmp     4f
+3:     
+       testl   $(LCK_RW_WANT_WRITE), %ecx
+       je      8f                              /* lock is not 'owned', go panic */
+       andl    $(~LCK_RW_WANT_WRITE), %ecx
+4:     
+       /*
+        * test the original values to match what
+        * lck_rw_done_gen is going to do to determine
+        * which wakeups need to happen...
+        *
+        * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
+        */
+       testl   $(LCK_W_WAITING), %eax
+       je      5f
+       andl    $(~LCK_W_WAITING), %ecx
+
+       testl   $(LCK_RW_PRIV_EXCL), %eax
+       jne     6f
+5:     
+       andl    $(~LCK_R_WAITING), %ecx
+6:     
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     7f
+
+#if __i386__
+       pushl   %eax
+       push    %edx
+       call    EXT(lck_rw_done_gen)
+       addl    $8, %esp
+#else
+       mov     %eax,%esi       /* old flags in %rsi */
+                               /* lock is in %rdi already */
+       call    EXT(lck_rw_done_gen)    
 #endif
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Lmt_waiters             /* yes, more work to do */
-Lmt_return:
-       xorl    %eax,%eax
-       movl    %eax,M_ILK
-       popf                            /* restore interrupt state */
-
-       movl    $1,%eax
-       leave
-#if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_mutex_try_lockstat_patch_point)
        ret
-       /* inherit the lock pointer in %edx from above */
-       LOCKSTAT_RECORD(LS_MUTEX_TRY_LOCK_ACQUIRE, %edx)
-       movl    $1,%eax
+7:
+       PAUSE
+       jmp     1b
+8:
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(rwl_release_error_str)
+       CALL_PANIC()
+       
+
+       
+/*
+ *     lck_rw_type_t lck_rw_lock_exclusive_to_shared(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_lock_exclusive_to_shared)
+       LOAD_LCK_RW_REGISTER
+1:
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     6f                              /* wait for interlock to clear */
+
+       movl    %eax, %ecx                      /* keep original value in %eax for cmpxchgl */
+       incl    %ecx                            /* Increment reader count */
+
+       testl   $(LCK_RW_WANT_UPGRADE), %ecx
+       je      2f
+       andl    $(~LCK_RW_WANT_UPGRADE), %ecx
+       jmp     3f
+2:     
+       andl    $(~LCK_RW_WANT_WRITE), %ecx
+3:     
+       /*
+        * test the original values to match what
+        * lck_rw_lock_exclusive_to_shared_gen is going to do to determine
+        * which wakeups need to happen...
+        *
+        * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
+        */
+       testl   $(LCK_W_WAITING), %eax
+       je      4f
+       testl   $(LCK_RW_PRIV_EXCL), %eax
+       jne     5f
+4:     
+       andl    $(~LCK_R_WAITING), %ecx
+5:     
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     6f
+
+#if __i386__
+       pushl   %eax
+       push    %edx
+       call    EXT(lck_rw_lock_exclusive_to_shared_gen)
+       addl    $8, %esp
+#else
+       mov     %eax,%esi
+       call    EXT(lck_rw_lock_exclusive_to_shared_gen)
 #endif
        ret
-
-Lmt_waiters:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_acquire)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Lmt_return
-
-Lmt_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
-
-Lmt_ilk_loop:
+6:
        PAUSE
-       /*
-        * need to do this check outside of the interlock in
-        * case this lock is held as a simple lock which means
-        * we won't be able to take the interlock
-        */
-       movl    M_LOCKED,%eax           /* get lock owner */
-       testl   %eax,%eax               /* is the mutex locked? */
-       jne     Lmt_fail_no_ilk         /* yes, go return failure */
-
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Lmt_retry               /* yes, go try to grab it */
-       jmp     Lmt_ilk_loop            /* no - keep spinning */
-
-Lmt_fail:
-       xorl    %eax,%eax
-       movl    %eax,M_ILK
-
-Lmt_fail_no_ilk:
-       xorl    %eax,%eax
-       popf                            /* restore interrupt state */
-       NONLEAF_RET
-
-
+       jmp     1b
 
-LEAF_ENTRY(mutex_convert_spin)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
 
-       movl    M_LOCKED,%ecx           /* is this the spin variant of the mutex */
-       cmpl    $(MUTEX_LOCKED_AS_SPIN),%ecx
-       jne     Lmcs_exit               /* already owned as a mutex, just return */
 
-       movl    M_ILK,%ecx              /* convert from spin version to mutex */
-       movl    %ecx,M_LOCKED           /* take control of the mutex */
+/*
+ *     int lck_rw_grab_want(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_grab_want)
+       LOAD_LCK_RW_REGISTER
+1:
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       testl   $(LCK_RW_INTERLOCK), %eax
+       jne     3f                              /* wait for interlock to clear */
+       testl   $(LCK_RW_WANT_WRITE), %eax      /* want_write has been grabbed by someone else */
+       jne     2f                              /* go return failure */
        
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Lmcs_waiters            /* yes, more work to do */
-
-Lmcs_return:
-       xorl    %ecx,%ecx
-       movl    %ecx,M_ILK              /* clear interlock */
-       PREEMPTION_ENABLE
-Lmcs_exit:
-#if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_mutex_convert_spin_lockstat_patch_point)
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
+       orl     $(LCK_RW_WANT_WRITE), %ecx
+       lock
+       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       jne     2f
+                                               /* we now own want_write */
+       movl    $1, %eax                        /* return success */
        ret
-       /* inherit %edx from above */
-       LOCKSTAT_RECORD(LS_MUTEX_CONVERT_SPIN_ACQUIRE, %edx)
-#endif
+2:
+       xorl    %eax, %eax                      /* return failure */
        ret
+3:
+       PAUSE
+       jmp     1b
 
+       
+#define        RW_LOCK_SHARED_OR_UPGRADE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE)
+/*
+ *     int lck_rw_held_read_or_upgrade(lck_rw_t *)
+ *
+ */
+Entry(lck_rw_held_read_or_upgrade)
+       LOAD_LCK_RW_REGISTER
+       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       andl    $(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax
+       ret
 
-Lmcs_waiters:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_acquire)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Lmcs_return
 
        
+/*
+ * N.B.: On x86, statistics are currently recorded for all indirect mutexes.
+ * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained
+ * as a 64-bit quantity (this matches the existing PowerPC implementation,
+ * and the new x86 specific statistics are also maintained as 32-bit
+ * quantities).
+ *
+ *
+ * Enable this preprocessor define to record the first miss alone
+ * By default, we count every miss, hence multiple misses may be
+ * recorded for a single lock acquire attempt via lck_mtx_lock
+ */
+#undef LOG_FIRST_MISS_ALONE    
 
-NONLEAF_ENTRY(mutex_unlock)
-       movl    B_ARG0,%edx             /* fetch lock pointer */
+/*
+ * This preprocessor define controls whether the R-M-W update of the
+ * per-group statistics elements are atomic (LOCK-prefixed)
+ * Enabled by default.
+ */
+#define ATOMIC_STAT_UPDATES 1
 
-       movl    M_LOCKED,%ecx           /* is this the spin variant of the mutex */
-       cmpl    $(MUTEX_LOCKED_AS_SPIN),%ecx
-       jne     Lmu_enter               /* no, go treat like a real mutex */
+#if defined(ATOMIC_STAT_UPDATES)
+#define LOCK_IF_ATOMIC_STAT_UPDATES lock
+#else
+#define LOCK_IF_ATOMIC_STAT_UPDATES
+#endif /* ATOMIC_STAT_UPDATES */
 
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Lmus_wakeup             /* yes, more work to do */
 
-Lmus_drop_ilk: 
-       xorl    %ecx,%ecx
-       movl    %ecx,M_LOCKED           /* yes, clear the spin indicator */
-       movl    %ecx,M_ILK              /* release the interlock */
-       PREEMPTION_ENABLE               /* and re-enable preemption */
-       leave
-#if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_mutex_unlock_lockstat_patch_point)
-       ret
-       /* inherit lock pointer in %edx from above */
-       LOCKSTAT_RECORD(LS_MUTEX_UNLOCK_RELEASE, %edx)
+/*
+ * For most routines, the lck_mtx_t pointer is loaded into a
+ * register initially, and the owner field checked for indirection.
+ * Eventually the lock owner is loaded into a register and examined.
+ */
+
+#define M_OWNER                MUTEX_OWNER
+#define M_PTR          MUTEX_PTR
+#define M_STATE                MUTEX_STATE     
+       
+#if defined(__i386__)
+
+#define LMTX_ARG0      B_ARG0
+#define LMTX_ARG1      B_ARG1
+#define        LMTX_REG        %edx
+#define LMTX_A_REG     %eax
+#define LMTX_A_REG32   %eax
+#define LMTX_C_REG     %ecx
+#define LMTX_C_REG32   %ecx
+#define LMTX_RET_REG   %eax
+#define LMTX_RET_REG32 %eax
+#define LMTX_LGROUP_REG        %esi
+#define LMTX_SSTATE_REG        %edi    
+#define        LOAD_LMTX_REG(arg)      mov arg, LMTX_REG
+#define LMTX_CHK_EXTENDED      cmp LMTX_REG, LMTX_ARG0
+#define LMTX_ASSERT_OWNED      cmpl $(MUTEX_ASSERT_OWNED), LMTX_ARG1
+
+#define LMTX_ENTER_EXTENDED                                    \
+       mov     M_PTR(LMTX_REG), LMTX_REG               ;       \
+       push    LMTX_LGROUP_REG                         ;       \
+       push    LMTX_SSTATE_REG                         ;       \
+       xor     LMTX_SSTATE_REG, LMTX_SSTATE_REG        ;       \
+       mov     MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG    ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       addl    $1, GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG)  ;       \
+       jnc     11f                                     ;       \
+       incl    GRP_MTX_STAT_UTIL+4(LMTX_LGROUP_REG)    ;       \
+11:
+
+#define LMTX_EXIT_EXTENDED             \
+       pop     LMTX_SSTATE_REG ;       \
+       pop     LMTX_LGROUP_REG
+
+
+#define        LMTX_CHK_EXTENDED_EXIT                  \
+       cmp     LMTX_REG, LMTX_ARG0     ;       \
+       je      12f                     ;       \
+       pop     LMTX_SSTATE_REG         ;       \
+       pop     LMTX_LGROUP_REG         ;       \
+12:    
+       
+       
+#if    LOG_FIRST_MISS_ALONE
+#define LMTX_UPDATE_MISS                                       \
+       test    $1, LMTX_SSTATE_REG                     ;       \
+       jnz     11f                                     ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)      ;       \
+       or      $1, LMTX_SSTATE_REG                     ;       \
+11:
+#else
+#define LMTX_UPDATE_MISS                                       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)
 #endif
-       ret
 
-Lmus_wakeup:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(lck_mtx_unlockspin_wakeup)  /* yes, wake a thread */
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex pointer */
-       jmp     Lmus_drop_ilk
+       
+#if    LOG_FIRST_MISS_ALONE
+#define LMTX_UPDATE_WAIT                                       \
+       test    $2, LMTX_SSTATE_REG                     ;       \
+       jnz     11f                                     ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)      ;       \
+       or      $2, LMTX_SSTATE_REG                     ;       \
+11:
+#else
+#define LMTX_UPDATE_WAIT                                       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)
+#endif
 
-Lmu_enter:
-       pushf                           /* save interrupt state */
+       
+/*
+ * Record the "direct wait" statistic, which indicates if a
+ * miss proceeded to block directly without spinning--occurs
+ * if the owner of the mutex isn't running on another processor
+ * at the time of the check.
+ */
+#define LMTX_UPDATE_DIRECT_WAIT                                        \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG)
 
-       CHECK_MUTEX_TYPE()
-       CHECK_THREAD(M_THREAD)
+       
+#define LMTX_CALLEXT1(func_name)       \
+       push    LMTX_REG        ;       \
+       push    LMTX_REG        ;       \
+       call    EXT(func_name)  ;       \
+       add     $4, %esp        ;       \
+       pop     LMTX_REG
+       
+#define LMTX_CALLEXT2(func_name, reg)  \
+       push    LMTX_REG        ;       \
+       push    reg             ;       \
+       push    LMTX_REG        ;       \
+       call    EXT(func_name)  ;       \
+       add     $8, %esp        ;       \
+       pop     LMTX_REG
+       
+#elif defined(__x86_64__)
+
+#define LMTX_ARG0      %rdi
+#define LMTX_ARG1      %rsi
+#define LMTX_REG_ORIG  %rdi
+#define        LMTX_REG        %rdx
+#define LMTX_A_REG     %rax
+#define LMTX_A_REG32   %eax
+#define LMTX_C_REG     %rcx
+#define LMTX_C_REG32   %ecx
+#define LMTX_RET_REG   %rax
+#define LMTX_RET_REG32 %eax
+#define LMTX_LGROUP_REG        %r10
+#define LMTX_SSTATE_REG        %r11    
+#define        LOAD_LMTX_REG(arg)      mov %rdi, %rdx
+#define LMTX_CHK_EXTENDED      cmp LMTX_REG, LMTX_REG_ORIG
+#define LMTX_ASSERT_OWNED      cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1
+
+#define LMTX_ENTER_EXTENDED                                    \
+       mov     M_PTR(LMTX_REG), LMTX_REG               ;       \
+       xor     LMTX_SSTATE_REG, LMTX_SSTATE_REG        ;       \
+       mov     MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG    ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incq    GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG)
+
+#define LMTX_EXIT_EXTENDED
+
+#define        LMTX_CHK_EXTENDED_EXIT
+
+
+#if    LOG_FIRST_MISS_ALONE
+#define LMTX_UPDATE_MISS                                       \
+       test    $1, LMTX_SSTATE_REG                     ;       \
+       jnz     11f                                     ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)      ;       \
+       or      $1, LMTX_SSTATE_REG                     ;       \
+11:
+#else
+#define LMTX_UPDATE_MISS                                       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)
+#endif
+       
 
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Lmu_ilk_loop            /* yes, go try to grab it */
-Lmu_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
+#if    LOG_FIRST_MISS_ALONE
+#define LMTX_UPDATE_WAIT                                       \
+       test    $2, LMTX_SSTATE_REG                     ;       \
+       jnz     11f                                     ;       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)      ;       \
+       or      $2, LMTX_SSTATE_REG                     ;       \
+11:
+#else
+#define LMTX_UPDATE_WAIT                                       \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)
+#endif
 
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Lmu_ilk_fail            /* branch on failure to spin loop */
 
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Lmu_wakeup              /* yes, more work to do */
+/*
+ * Record the "direct wait" statistic, which indicates if a
+ * miss proceeded to block directly without spinning--occurs
+ * if the owner of the mutex isn't running on another processor
+ * at the time of the check.
+ */
+#define LMTX_UPDATE_DIRECT_WAIT                                        \
+       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
+       incl    GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG)
 
-Lmu_doit:
-#if    MACH_LDEBUG
-       movl    $0,M_THREAD             /* disown thread */
-#endif
-       xorl    %ecx,%ecx
-       movl    %ecx,M_LOCKED           /* unlock the mutex */
-       movl    %ecx,M_ILK              /* release the interlock */
-       popf                            /* restore interrupt state */
-       leave
-#if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_mutex_unlock2_lockstat_patch_point)
-       ret
-       /* inherit %edx from above */
-       LOCKSTAT_RECORD(LS_MUTEX_UNLOCK_RELEASE, %edx)
+       
+#define LMTX_CALLEXT1(func_name)               \
+       LMTX_CHK_EXTENDED               ;       \
+       je      12f                     ;       \
+       push    LMTX_LGROUP_REG         ;       \
+       push    LMTX_SSTATE_REG         ;       \
+12:    push    LMTX_REG_ORIG           ;       \
+       push    LMTX_REG                ;       \
+       mov     LMTX_REG, LMTX_ARG0     ;       \
+       call    EXT(func_name)          ;       \
+       pop     LMTX_REG                ;       \
+       pop     LMTX_REG_ORIG           ;       \
+       LMTX_CHK_EXTENDED               ;       \
+       je      12f                     ;       \
+       pop     LMTX_SSTATE_REG         ;       \
+       pop     LMTX_LGROUP_REG         ;       \
+12:
+       
+#define LMTX_CALLEXT2(func_name, reg)          \
+       LMTX_CHK_EXTENDED               ;       \
+       je      12f                     ;       \
+       push    LMTX_LGROUP_REG         ;       \
+       push    LMTX_SSTATE_REG         ;       \
+12:    push    LMTX_REG_ORIG           ;       \
+       push    LMTX_REG                ;       \
+       mov     reg, LMTX_ARG1          ;       \
+       mov     LMTX_REG, LMTX_ARG0     ;       \
+       call    EXT(func_name)          ;       \
+       pop     LMTX_REG                ;       \
+       pop     LMTX_REG_ORIG           ;       \
+       LMTX_CHK_EXTENDED               ;       \
+       je      12f                     ;       \
+       pop     LMTX_SSTATE_REG         ;       \
+       pop     LMTX_LGROUP_REG         ;       \
+12:
+
+#else
+#error Unsupported architecture
 #endif
-       ret
 
-Lmu_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
 
-Lmu_ilk_loop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Lmu_retry               /* yes, go try to grab it */
-       jmp     Lmu_ilk_loop            /* no - keep spinning */
-
-Lmu_wakeup:
-       pushl   M_LOCKED
-       pushl   %edx                    /* push mutex address */
-       call    EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */
-       addl    $8,%esp
-       movl    B_ARG0,%edx             /* restore lock pointer */
-       jmp     Lmu_doit
+#define M_WAITERS_MSK          0x0000ffff
+#define M_PRIORITY_MSK         0x00ff0000
+#define M_ILOCKED_MSK          0x01000000
+#define M_MLOCKED_MSK          0x02000000
+#define M_PROMOTED_MSK         0x04000000
+#define M_SPIN_MSK             0x08000000
 
 /*
  *     void lck_mtx_assert(lck_mtx_t* l, unsigned int)
- *     void _mutex_assert(mutex_t, unsigned int)
  *     Takes the address of a lock, and an assertion type as parameters.
  *     The assertion can take one of two forms determine by the type
  *     parameter: either the lock is held by the current thread, and the
  *     type is LCK_MTX_ASSERT_OWNED, or it isn't and the type is
- *     LCK_MTX_ASSERT_NOT_OWNED. Calls panic on assertion failure.
+ *     LCK_MTX_ASSERT_NOTOWNED. Calls panic on assertion failure.
  *     
  */
 
-Entry(lck_mtx_assert)
-Entry(_mutex_assert)
-       movl    S_ARG0,%edx                     /* Load lock address */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx      /* Load current thread */
-
-       cmpl    $(MUTEX_IND),M_ITAG             /* Is this an indirect mutex? */
-       cmove   M_PTR,%edx                      /* If so, take indirection */
-
-       movl    M_LOCKED,%eax                   /* Load lock word */
-       cmpl    $(MUTEX_LOCKED_AS_SPIN),%eax    /* check for spin variant */
-       cmove   M_ILK,%eax                      /* yes, spin lock owner is in the interlock */
-
-       cmpl    $(MUTEX_ASSERT_OWNED),S_ARG1    /* Determine assert type */
+NONLEAF_ENTRY(lck_mtx_assert)
+        LOAD_LMTX_REG(B_ARG0)                          /* Load lock address */
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG       /* Load current thread */
+
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       cmp     $(MUTEX_IND), LMTX_C_REG32      /* Is this an indirect mutex? */
+       jne     0f
+       mov     M_PTR(LMTX_REG), LMTX_REG       /* If so, take indirection */
+0:     
+       mov     M_OWNER(LMTX_REG), LMTX_C_REG   /* Load owner */
+       LMTX_ASSERT_OWNED
        jne     2f                              /* Assert ownership? */
-       cmpl    %eax,%ecx                       /* Current thread match? */
+       cmp     LMTX_A_REG, LMTX_C_REG          /* Current thread match? */
        jne     3f                              /* no, go panic */
+       testl   $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(LMTX_REG)
+       je      3f
 1:                                             /* yes, we own it */
-       ret                                     /* just return */
+       NONLEAF_RET
 2:
-       cmpl    %eax,%ecx                       /* Current thread match? */
+       cmp     LMTX_A_REG, LMTX_C_REG          /* Current thread match? */
        jne     1b                              /* No, return */
-       movl    %edx,S_ARG1                     /* Prep assertion failure */
-       movl    $(mutex_assert_owned_str),S_ARG0
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(LMTX_REG)
+       LOAD_STRING_ARG0(mutex_assert_owned_str)
        jmp     4f
 3:
-       movl    %edx,S_ARG1                     /* Prep assertion failure */
-       movl    $(mutex_assert_not_owned_str),S_ARG0
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(LMTX_REG)
+       LOAD_STRING_ARG0(mutex_assert_not_owned_str)
 4:
-       jmp     EXT(panic)
+       CALL_PANIC()
+
+
+lck_mtx_destroyed:
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(LMTX_REG)
+       LOAD_STRING_ARG0(mutex_interlock_destroyed_str)
+       CALL_PANIC()
+       
 
 .data
 mutex_assert_not_owned_str:
        .asciz  "mutex (%p) not owned\n"
 mutex_assert_owned_str:
        .asciz  "mutex (%p) owned\n"
+mutex_interlock_destroyed_str:
+       .asciz  "trying to interlock destroyed mutex (%p)"
 .text
 
-/* This preprocessor define controls whether the R-M-W update of the
- * per-group statistics elements are atomic (LOCK-prefixed)
- * Enabled by default.
- */
-#define ATOMIC_STAT_UPDATES 1
-
-#if defined(ATOMIC_STAT_UPDATES)
-#define LOCK_IF_ATOMIC_STAT_UPDATES lock
-#else
-#define LOCK_IF_ATOMIC_STAT_UPDATES
-#endif /* ATOMIC_STAT_UPDATES */
 
 
 /*
  * lck_mtx_lock()
  * lck_mtx_try_lock()
- * lck_mutex_unlock()
+ * lck_mtx_unlock()
  * lck_mtx_lock_spin()
+ * lck_mtx_lock_spin_always()
  * lck_mtx_convert_spin()
- *
- * These are variants of mutex_lock(), mutex_try(), mutex_unlock()
- * mutex_lock_spin and mutex_convert_spin without
- * DEBUG checks (which require fields not present in lck_mtx_t's).
  */
-
+NONLEAF_ENTRY(lck_mtx_lock_spin_always)
+       LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
+       jmp     Llmls_avoid_check
+       
 NONLEAF_ENTRY(lck_mtx_lock_spin)
+       LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
 
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
-
-       CHECK_NO_SIMPLELOCKS()
        CHECK_PREEMPTION_LEVEL()
+Llmls_avoid_check:
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32  /* is the interlock or mutex held */
+       jnz     Llmls_slow
+Llmls_try:                                     /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
 
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llmls_eval_ilk          /* no, go see if indirect */
-Llmls_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Llmls_ilk_fail          /* branch on failure to spin loop */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Llml_fail               /* yes, fall back to a normal mutex */
-
-Llmls_acquire: 
-       movl    $(MUTEX_LOCKED_AS_SPIN),M_LOCKED        /* indicate ownership as a spin lock */
        PREEMPTION_DISABLE
-       popf                            /* restore interrupt state */
-       NONLEAF_RET                     /* return with the interlock held */
-
-Llmls_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
-
-Llmls_ilk_loop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llmls_retry             /* yes - go try to grab it */
-
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llmls_ilk_loop          /* no - keep spinning  */
-
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llmls_ilk_loop
-
-
-Llmls_eval_ilk:
-       cmpl    $(MUTEX_IND),M_ITAG     /* Is this an indirect mutex? */
-       cmove   M_PTR,%edx              /* If so, take indirection */
-       jne     Llmls_ilk_loop          /* If not, go to spin loop */
-
-Llmls_lck_ext:
-       pushl   %esi                    /* Used to hold the lock group ptr */
-       pushl   %edi                    /* Used for stat update records */
-       movl    MUTEX_GRP(%edx),%esi    /* Load lock group */
-       xorl    %edi,%edi               /* Clear stat update records */
-       /* 64-bit increment of acquire attempt statistic (per-group) */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       addl    $1, GRP_MTX_STAT_UTIL(%esi)
-       jnc     1f
-       incl    GRP_MTX_STAT_UTIL+4(%esi)
-1:
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llmls_ext_ilk_loop      /* no, go to spin loop */
-Llmls_ext_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl %ecx,M_ILK       /* atomic compare and exchange */
-       jne     Llmls_ext_ilk_fail      /* branch on failure to retry */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Llml_ext_fail           /* yes, we lose */
-
-       popl    %edi
-       popl    %esi
-       jmp     Llmls_acquire
-
-Llmls_ext_ilk_fail:
-       /*
-        * Slow path: call out to do the spinning.
-        */
-       movl    8(%esp),%ecx
-       pushl   %ecx
-       popf                            /* restore interrupt state */
-       
-Llmls_ext_ilk_loop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llmls_ext_retry         /* yes - go try to grab it */
-
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llmls_ext_ilk_loop              /* no - keep spinning  */
-
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llmls_ext_ilk_loop      /* no - keep spinning  */
-
-       
-
-NONLEAF_ENTRY(lck_mtx_lock)
-
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
-
-       CHECK_NO_SIMPLELOCKS()
-       CHECK_PREEMPTION_LEVEL()
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     Llmls_busy_disabled
 
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llml_eval_ilk           /* no, go see if indirect */
-Llml_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Llml_ilk_fail           /* branch on failure to spin loop */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Llml_fail               /* yes, we lose */
-Llml_acquire:
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-       movl    %ecx,M_LOCKED
-
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Lml_waiters             /* yes, more work to do */
-Llml_return:
-       xorl    %eax,%eax
-       movl    %eax,M_ILK
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of interlock */
+#if    MACH_LDEBUG
+       test    LMTX_A_REG, LMTX_A_REG
+       jz      1f
+       incl    TH_MUTEX_COUNT(LMTX_A_REG)      /* lock statistic */
+1:     
+#endif /* MACH_LDEBUG */
 
-       popf                            /* restore interrupt state */
+       LMTX_CHK_EXTENDED_EXIT
+       /* return with the interlock held and preemption disabled */
        leave
 #if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point)
+       LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point)
        ret
-       /* inherit lock pointer in %edx above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, %edx)
+       /* inherit lock pointer in LMTX_REG above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, LMTX_REG)
 #endif
        ret
 
-Llml_waiters:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_acquire)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Llml_return
+Llmls_slow:    
+       test    $M_ILOCKED_MSK, LMTX_C_REG32            /* is the interlock held */
+       jz      Llml_contended                          /* no, must have been the mutex */
 
-Llml_restart:
-Llml_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
+       cmp     $(MUTEX_DESTROYED), LMTX_C_REG32        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), LMTX_C_REG32              /* Is this an indirect mutex */
+       jne     Llmls_loop                              /* no... must be interlocked */
 
-Llml_ilk_loop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llml_retry              /* yes - go try to grab it */
+       LMTX_ENTER_EXTENDED
 
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llml_ilk_loop           /* no - keep spinning  */
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       test    $(M_SPIN_MSK), LMTX_C_REG32
+       jz      Llmls_loop1
 
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llml_ilk_loop           /* no - keep spinning  */
+       LMTX_UPDATE_MISS                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llmls_loop:
+       PAUSE
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+Llmls_loop1:
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+       jz      Llmls_try
+       test    $(M_MLOCKED_MSK), LMTX_C_REG32
+       jnz     Llml_contended                          /* mutex owned by someone else, go contend for it */
+       jmp     Llmls_loop
+
+Llmls_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmls_loop
 
-Llml_fail:
-       /*
-        * Check if the owner is on another processor and therefore
-        * we should try to spin before blocking.
-        */
-       testl   $(OnProc),ACT_SPF(%ecx)
-       jz      Llml_block
 
-       /*
-        * Here if owner is on another processor:
-        *  - release the interlock
-        *  - spin on the holder until release or timeout
-        *  - in either case re-acquire the interlock
-        *  - if released, acquire it
-        *  - otherwise drop thru to block.
-        */
-       xorl    %eax,%eax
-       movl    %eax,M_ILK              /* zero interlock */
-       popf
-       pushf                           /* restore interrupt state */
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_spinwait)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-
-       /* Re-acquire interlock */
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llml_ilk_refail         /* no, go to spin loop */
-Llml_reget_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Llml_ilk_refail         /* branch on failure to retry */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex free? */
-       je      Llml_acquire            /* yes, acquire */
        
-Llml_block:
-       CHECK_MYLOCK(M_THREAD)
-       pushl   %edx                    /* save mutex address */
-       pushl   M_LOCKED
-       pushl   %edx                    /* push mutex address */
-       /*
-        * N.B.: lck_mtx_lock_wait is called here with interrupts disabled
-        * Consider reworking.
-        */
-       call    EXT(lck_mtx_lock_wait)  /* wait for the lock */
-       addl    $8,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Llml_restart            /* and start over */
-
-Llml_ilk_refail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state on stack */
-
-Llml_ilk_reloop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llml_reget_retry        /* yes - go try to grab it */
+NONLEAF_ENTRY(lck_mtx_lock)
+       LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
 
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llml_ilk_reloop         /* no - keep spinning  */
+       CHECK_PREEMPTION_LEVEL()
 
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llml_ilk_reloop         /* no - keep spinning  */
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32  /* is the interlock or mutex held */
+       jnz     Llml_slow
+Llml_try:                                      /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
 
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     Llml_busy_disabled
 
-Llml_eval_ilk:
-       cmpl    $(MUTEX_IND),M_ITAG     /* Is this an indirect mutex? */
-       cmove   M_PTR,%edx              /* If so, take indirection */
-       jne     Llml_ilk_loop           /* If not, go to spin loop */
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    LMTX_A_REG, LMTX_A_REG
+       jz      1f
+       incl    TH_MUTEX_COUNT(LMTX_A_REG)      /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-/*
- * Entry into statistics codepath for lck_mtx_lock:
- * EDX: real lock pointer
- * first dword on stack contains flags
- */
+       testl   $(M_WAITERS_MSK), M_STATE(LMTX_REG)
+       jz      Llml_finish
 
-/* Enable this preprocessor define to record the first miss alone
- * By default, we count every miss, hence multiple misses may be
- * recorded for a single lock acquire attempt via lck_mtx_lock
- */
-#undef LOG_FIRST_MISS_ALONE    
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
 
-/*
- * N.B.: On x86, statistics are currently recorded for all indirect mutexes.
- * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained
- * as a 64-bit quantity (this matches the existing PowerPC implementation,
- * and the new x86 specific statistics are also maintained as 32-bit
- * quantities).
- */
+Llml_finish:
+       andl    $(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
+       PREEMPTION_ENABLE
        
-Llml_lck_ext:
-       pushl   %esi                    /* Used to hold the lock group ptr */
-       pushl   %edi                    /* Used for stat update records */
-       movl    MUTEX_GRP(%edx),%esi    /* Load lock group */
-       xorl    %edi,%edi               /* Clear stat update records */
-       /* 64-bit increment of acquire attempt statistic (per-group) */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       addl    $1, GRP_MTX_STAT_UTIL(%esi)
-       jnc     1f
-       incl    GRP_MTX_STAT_UTIL+4(%esi)
-1:
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llml_ext_ilk_loop       /* no, go to spin loop */
-Llml_ext_get_hw:
-       cli
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl %ecx,M_ILK       /* atomic compare and exchange */
-       jne     Llml_ext_ilk_fail       /* branch on failure to retry */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Llml_ext_fail           /* yes, we lose */
-
-Llml_ext_acquire:
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-       movl    %ecx,M_LOCKED
-
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Llml_ext_waiters        /* yes, more work to do */
-Llml_ext_return:
-       xorl    %eax,%eax
-       movl    %eax,M_ILK
+       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       jne     2f
 
-       popl    %edi
-       popl    %esi
-       popf                            /* restore interrupt state */
+       leave
+#if    CONFIG_DTRACE
+       LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point)
+       ret
+       /* inherit lock pointer in LMTX_REG above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, LMTX_REG)
+#endif
+       ret
+2:     
+       LMTX_EXIT_EXTENDED
        leave
 #if    CONFIG_DTRACE
        LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point)
        ret
-       /* inherit lock pointer in %edx above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, %edx)
+       /* inherit lock pointer in LMTX_REG above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG)
 #endif
        ret
 
-Llml_ext_waiters:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_acquire)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Llml_ext_return
-
-Llml_ext_restart:
-Llml_ext_ilk_fail:
-       movl    8(%esp),%ecx
-       pushl   %ecx
-       popf                            /* restore interrupt state */
-
-Llml_ext_ilk_loop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llml_ext_get_hw         /* yes - go try to grab it */
+       
+Llml_slow:
+       test    $M_ILOCKED_MSK, LMTX_C_REG32            /* is the interlock held */
+       jz      Llml_contended                          /* no, must have been the mutex */
+       
+       cmp     $(MUTEX_DESTROYED), LMTX_C_REG32        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), LMTX_C_REG32              /* Is this an indirect mutex? */
+       jne     Llml_loop                               /* no... must be interlocked */
 
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llml_ext_ilk_loop       /* no - keep spinning  */
+       LMTX_ENTER_EXTENDED
 
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llml_ext_ilk_loop
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       test    $(M_SPIN_MSK), LMTX_C_REG32
+       jz      Llml_loop1
 
+       LMTX_UPDATE_MISS                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llml_loop:
+       PAUSE
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+Llml_loop1:
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+       jz      Llml_try
+       test    $(M_MLOCKED_MSK), LMTX_C_REG32
+       jnz     Llml_contended                          /* mutex owned by someone else, go contend for it */
+       jmp     Llml_loop
+
+Llml_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llml_loop
 
-Llml_ext_fail:
-#ifdef LOG_FIRST_MISS_ALONE
-       testl   $1, %edi
-       jnz     1f
-#endif /* LOG_FIRST_MISS_ALONE */
-       /* Record that a lock acquire attempt missed (per-group statistic) */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       incl    GRP_MTX_STAT_MISS(%esi)
-#ifdef LOG_FIRST_MISS_ALONE
-       orl     $1, %edi
-#endif /* LOG_FIRST_MISS_ALONE */
-1:
-       /*
-        * Check if the owner is on another processor and therefore
-        * we should try to spin before blocking.
-        */
-       testl   $(OnProc),ACT_SPF(%ecx)
-       jnz     2f
-       /*
-        * Record the "direct wait" statistic, which indicates if a
-        * miss proceeded to block directly without spinning--occurs
-        * if the owner of the mutex isn't running on another processor
-        * at the time of the check.
-        */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       incl    GRP_MTX_STAT_DIRECT_WAIT(%esi)
-       jmp     Llml_ext_block
-2:
-       /*
-        * Here if owner is on another processor:
-        *  - release the interlock
-        *  - spin on the holder until release or timeout
-        *  - in either case re-acquire the interlock
-        *  - if released, acquire it
-        *  - otherwise drop thru to block.
-        */
-       xorl    %eax,%eax
-       movl    %eax,M_ILK              /* zero interlock */
-
-       pushl   8(%esp)                 /* Make another copy of EFLAGS image */
-       popf                            /* Restore interrupt state */
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_spinwait)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-
-       /* Re-acquire interlock */
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llml_ext_ilk_refail     /* no, go to spin loop */
-Llml_ext_reget_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl %ecx,M_ILK       /* atomic compare and exchange */
-       jne     Llml_ext_ilk_refail     /* branch on failure to spin loop */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex free? */
-       je      Llml_ext_acquire        /* yes, acquire */
-       
-Llml_ext_block:
-       /* If we wanted to count waits just once per lock acquire, we'd
-        * skip over the stat update here
-        */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       /* Record that a lock miss proceeded to block */
-       incl    GRP_MTX_STAT_WAIT(%esi) 
-1:
-       CHECK_MYLOCK(M_THREAD)
-       pushl   %edx                    /* save mutex address */
-       pushl   M_LOCKED
-       pushl   %edx                    /* push mutex address */
-       /*
-        * N.B.: lck_mtx_lock_wait is called here with interrupts disabled
-        * Consider reworking.
-        */
-       call    EXT(lck_mtx_lock_wait)  /* wait for the lock */
-       addl    $8,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Llml_ext_restart        /* and start over */
-
-Llml_ext_ilk_refail:
-       movl    8(%esp),%ecx
-       pushl   %ecx
-       popf                            /* restore interrupt state */
        
-Llml_ext_ilk_reloop:
-       PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llml_ext_reget_retry    /* yes - go try to grab it */
+Llml_contended:
+       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       je      0f
+       LMTX_UPDATE_MISS
+0:     
+       LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86)
+
+       test    LMTX_RET_REG, LMTX_RET_REG
+       jz      Llml_acquired           /* acquired mutex, interlock held and preemption disabled */
+
+       cmp     $1, LMTX_RET_REG        /* check for direct wait status */
+       je      2f
+       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       je      2f
+       LMTX_UPDATE_DIRECT_WAIT
+2:     
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       test    $(M_ILOCKED_MSK), LMTX_C_REG32
+       jnz     6f
+
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), LMTX_C_REG32  /* try to take the interlock */
 
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llml_ext_ilk_reloop     /* no - keep spinning  */
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     5f
 
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llml_ext_ilk_reloop
+       test    $(M_MLOCKED_MSK), LMTX_C_REG32  /* we've got the interlock and */
+       jnz     3f
+       or      $(M_MLOCKED_MSK), LMTX_C_REG32  /* the mutex is free... grab it directly */
+       mov     LMTX_C_REG32, M_STATE(LMTX_REG)
+       
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    LMTX_A_REG, LMTX_A_REG
+       jz      1f
+       incl    TH_MUTEX_COUNT(LMTX_A_REG)      /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
+Llml_acquired:
+       testl   $(M_WAITERS_MSK), M_STATE(LMTX_REG)
+       jnz     1f
+       mov     M_OWNER(LMTX_REG), LMTX_A_REG
+       mov     TH_WAS_PROMOTED_ON_WAKEUP(LMTX_A_REG), LMTX_A_REG32
+       test    LMTX_A_REG32, LMTX_A_REG32
+       jz      Llml_finish
+1:     
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+       jmp     Llml_finish
+
+3:                                     /* interlock held, mutex busy */
+       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       je      4f
+       LMTX_UPDATE_WAIT
+4:     
+       LMTX_CALLEXT1(lck_mtx_lock_wait_x86)
+       jmp     Llml_contended
+5:     
+       PREEMPTION_ENABLE
+6:
+       PAUSE
+       jmp     2b
        
 
+       
 NONLEAF_ENTRY(lck_mtx_try_lock_spin)
+       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
 
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
-
-       CHECK_NO_SIMPLELOCKS()
-       CHECK_PREEMPTION_LEVEL()
-
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llmts_eval_ilk          /* no, go see if indirect */
-Llmts_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32  /* is the interlock or mutex held */
+       jnz     Llmts_slow
+Llmts_try:                                     /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
 
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Llmts_ilk_fail          /* branch on failure to retry */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Llmt_fail               /* yes, we lose */
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     Llmts_busy_disabled
 
-       movl    $(MUTEX_LOCKED_AS_SPIN),M_LOCKED        /* no, indicate ownership as a spin lock */
-       PREEMPTION_DISABLE              /* and return with interlock held */
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    LMTX_A_REG, LMTX_A_REG
+       jz      1f
+       incl    TH_MUTEX_COUNT(LMTX_A_REG)      /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-       movl    $1,%eax                 /* return success */
-       popf                            /* restore interrupt state */
+       LMTX_CHK_EXTENDED_EXIT
        leave
+
 #if    CONFIG_DTRACE
+       mov     $1, LMTX_RET_REG        /* return success */
        LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point)
        ret
-       /* inherit lock pointer in %edx above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, %edx)
-       movl    $1,%eax                 /* return success */
+       /* inherit lock pointer in LMTX_REG above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, LMTX_REG)
 #endif
+       mov     $1, LMTX_RET_REG        /* return success */
        ret
 
-Llmts_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state */
-       
-Llmts_ilk_loop:
-       PAUSE
-       /*
-        * need to do this check outside of the interlock in
-        * case this lock is held as a simple lock which means
-        * we won't be able to take the interlock
-        */
-       movl    M_LOCKED,%eax           /* get lock owner */
-       testl   %eax,%eax               /* is the mutex locked? */
-       jne     Llmt_fail_no_ilk        /* yes, go return failure */
-
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llmts_retry             /* yes - go try to grab it */
-
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llmts_ilk_loop          /* no - keep spinning  */
-
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llmts_ilk_loop
+Llmts_slow:
+       test    $(M_ILOCKED_MSK), LMTX_C_REG32  /* is the interlock held */
+       jz      Llmts_fail                      /* no, must be held as a mutex */
 
-Llmts_eval_ilk:
-       cmpl    $(MUTEX_IND),M_ITAG     /* Is this an indirect mutex? */
-       cmove   M_PTR,%edx              /* If so, take indirection */
-       jne     Llmts_ilk_loop          /* If not, go to spin loop */
+       cmp     $(MUTEX_DESTROYED), LMTX_C_REG32        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), LMTX_C_REG32      /* Is this an indirect mutex? */
+       jne     Llmts_loop1
 
-       /*
-        * bump counter on indirect lock
-        */
-       pushl   %esi                    /* Used to hold the lock group ptr */
-       movl    MUTEX_GRP(%edx),%esi    /* Load lock group */
-       /* 64-bit increment of acquire attempt statistic (per-group) */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       addl    $1, GRP_MTX_STAT_UTIL(%esi)
-       jnc     1f
-       incl    GRP_MTX_STAT_UTIL+4(%esi)
-1:
-       popl    %esi
-       jmp     Llmts_ilk_loop
+       LMTX_ENTER_EXTENDED
+Llmts_loop:
+       PAUSE
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+Llmts_loop1:
+       test    $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
+       jnz     Llmts_fail
+       test    $(M_ILOCKED_MSK), LMTX_C_REG32
+       jz      Llmts_try
+       jmp     Llmts_loop
+       
+Llmts_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmts_loop
 
 
        
 NONLEAF_ENTRY(lck_mtx_try_lock)
+       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
+
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32  /* is the interlock or mutex held */
+       jnz     Llmt_slow       
+Llmt_try:                                      /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+       
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     Llmt_busy_disabled
 
-       movl    B_ARG0,%edx             /* fetch lock pointer */
-       pushf                           /* save interrupt state */
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    LMTX_A_REG, LMTX_A_REG
+       jz      1f
+       incl    TH_MUTEX_COUNT(LMTX_A_REG)      /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-       CHECK_NO_SIMPLELOCKS()
-       CHECK_PREEMPTION_LEVEL()
+       LMTX_CHK_EXTENDED_EXIT
 
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llmt_eval_ilk           /* no, go see if indirect */
-Llmt_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Llmt_ilk_fail           /* branch on failure to retry */
-
-       movl    M_LOCKED,%ecx           /* get lock owner */
-       testl   %ecx,%ecx               /* is the mutex locked? */
-       jne     Llmt_fail               /* yes, we lose */
-Llmt_acquire:
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-       movl    %ecx,M_LOCKED
-
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Llmt_waiters            /* yes, more work to do */
-Llmt_return:
-       xorl    %eax,%eax
-       movl    %eax,M_ILK
+       test    $(M_WAITERS_MSK), LMTX_C_REG32
+       jz      0f
 
-       popf                            /* restore interrupt state */
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+0:
+       andl    $(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
+       PREEMPTION_ENABLE
 
-       movl    $1,%eax                 /* return success */
        leave
 #if    CONFIG_DTRACE
+       mov     $1, LMTX_RET_REG                /* return success */
        /* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */
        LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point)
        ret
-       /* inherit lock pointer in %edx from above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, %edx)
-       movl    $1,%eax                 /* return success */
-#endif
+       /* inherit lock pointer in LMTX_REG from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, LMTX_REG)
+#endif 
+       mov     $1, LMTX_RET_REG                /* return success */
        ret
 
-Llmt_waiters:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_acquire)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Llmt_return
-
-Llmt_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state */
-       
-Llmt_ilk_loop:
-       PAUSE
-       /*
-        * need to do this check outside of the interlock in
-        * case this lock is held as a simple lock which means
-        * we won't be able to take the interlock
-        */
-       movl    M_LOCKED,%eax           /* get lock owner */
-       testl   %eax,%eax               /* is the mutex locked? */
-       jne     Llmt_fail_no_ilk        /* yes, go return failure */
-
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llmt_retry              /* yes - go try to grab it */
-
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llmt_ilk_loop           /* no - keep spinning  */
-
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llmt_ilk_loop
-
-Llmt_fail:
-       xorl    %eax,%eax               /* Zero interlock value */
-       movl    %eax,M_ILK
-
-Llmt_fail_no_ilk:
-       popf                            /* restore interrupt state */
-
-       cmpl    %edx,B_ARG0
-       jne     Llmt_fail_indirect
+Llmt_slow:
+       test    $(M_ILOCKED_MSK), LMTX_C_REG32  /* is the interlock held */
+       jz      Llmt_fail                       /* no, must be held as a mutex */
 
-       xorl    %eax,%eax
-       /* Note that we don't record a dtrace event for trying and missing */
-       NONLEAF_RET
+       cmp     $(MUTEX_DESTROYED), LMTX_C_REG32        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), LMTX_C_REG32      /* Is this an indirect mutex? */
+       jne     Llmt_loop
 
-Llmt_fail_indirect:    
-       pushl   %esi                    /* Used to hold the lock group ptr */
-       movl    MUTEX_GRP(%edx),%esi    /* Load lock group */
+       LMTX_ENTER_EXTENDED
+Llmt_loop:
+       PAUSE
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+Llmt_loop1:
+       test    $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
+       jnz     Llmt_fail
+       test    $(M_ILOCKED_MSK), LMTX_C_REG32
+       jz      Llmt_try
+       jmp     Llmt_loop
+
+Llmt_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmt_loop
 
-       /* Record mutex acquire attempt miss statistic */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       incl    GRP_MTX_STAT_MISS(%esi)
 
-       popl    %esi
-       xorl    %eax,%eax
+Llmt_fail:
+Llmts_fail:
+       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       je      0f
+       LMTX_UPDATE_MISS
+       LMTX_EXIT_EXTENDED
+0:
+       xor     LMTX_RET_REG, LMTX_RET_REG
        NONLEAF_RET
 
-Llmt_eval_ilk:
-       cmpl    $(MUTEX_IND),M_ITAG     /* Is this an indirect mutex? */
-       cmove   M_PTR,%edx              /* If so, take indirection */
-       jne     Llmt_ilk_loop           /* If not, go to spin loop */
-
-       /*
-        * bump counter for indirect lock
-        */
-       pushl   %esi                    /* Used to hold the lock group ptr */
-       movl    MUTEX_GRP(%edx),%esi    /* Load lock group */
-
-       /* 64-bit increment of acquire attempt statistic (per-group) */
-       LOCK_IF_ATOMIC_STAT_UPDATES
-       addl    $1, GRP_MTX_STAT_UTIL(%esi)
-       jnc     1f
-       incl    GRP_MTX_STAT_UTIL+4(%esi)
-1:
-       pop     %esi
-       jmp     Llmt_ilk_loop
-
-
-
-LEAF_ENTRY(lck_mtx_convert_spin)
-       movl    L_ARG0,%edx             /* fetch lock pointer */
 
-       cmpl    $(MUTEX_IND),M_ITAG     /* Is this an indirect mutex? */
-       cmove   M_PTR,%edx              /* If so, take indirection */
 
-       movl    M_LOCKED,%ecx           /* is this the spin variant of the mutex */
-       cmpl    $(MUTEX_LOCKED_AS_SPIN),%ecx
-       jne     Llmcs_exit              /* already owned as a mutex, just return */
+NONLEAF_ENTRY(lck_mtx_convert_spin)
+       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
 
-       movl    M_ILK,%ecx              /* convert from spin version to mutex */
-       movl    %ecx,M_LOCKED           /* take control of the mutex */
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       cmp     $(MUTEX_IND), LMTX_C_REG32      /* Is this an indirect mutex? */
+       jne     0f
+       mov     M_PTR(LMTX_REG), LMTX_REG       /* If so, take indirection */
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+0:
+       test    $(M_MLOCKED_MSK), LMTX_C_REG32  /* already owned as a mutex, just return */
+       jnz     2f
+       test    $(M_WAITERS_MSK), LMTX_C_REG32  /* are there any waiters? */
+       jz      1f
 
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Llmcs_waiters           /* yes, more work to do */
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+1:     
+       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG32  /* convert from spin version to mutex */
+       or      $(M_MLOCKED_MSK), LMTX_C_REG32
+       mov     LMTX_C_REG32, M_STATE(LMTX_REG)         /* since I own the interlock, I don't need an atomic update */
 
-Llmcs_return:
-       xorl    %ecx,%ecx
-       movl    %ecx,M_ILK              /* clear interlock */
        PREEMPTION_ENABLE
-Llmcs_exit:
-       LEAF_RET
+2:     
+       NONLEAF_RET
 
-Llmcs_waiters:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx
-       call    EXT(lck_mtx_lock_acquire)
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex address */
-       jmp     Llmcs_return
-       
        
 
+#if    defined(__i386__)
+NONLEAF_ENTRY(lck_mtx_unlock)
+       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
+       mov     M_OWNER(LMTX_REG), LMTX_A_REG
+       test    LMTX_A_REG, LMTX_A_REG
+       jnz     Llmu_entry
+       leave
+       ret
+NONLEAF_ENTRY(lck_mtx_unlock_darwin10)
+#else
 NONLEAF_ENTRY(lck_mtx_unlock)
+#endif
+       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
+Llmu_entry:
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+Llmu_prim:
+       cmp     $(MUTEX_IND), LMTX_C_REG32      /* Is this an indirect mutex? */
+       je      Llmu_ext
+
+Llmu_chktype:
+       test    $(M_MLOCKED_MSK), LMTX_C_REG32  /* check for full mutex */
+       jz      Llmu_unlock
+Llmu_mutex:
+       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* have to wait for interlock to clear */
+       jnz     Llmu_busy
+
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       and     $(~M_MLOCKED_MSK), LMTX_C_REG32 /* drop mutex */
+       or      $(M_ILOCKED_MSK), LMTX_C_REG32  /* pick up interlock */
 
-       movl    B_ARG0,%edx             /* fetch lock pointer */
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     Llmu_busy_disabled              /* branch on failure to spin loop */
 
-       cmpl    $(MUTEX_IND),M_ITAG     /* Is this an indirect mutex? */
-       cmove   M_PTR,%edx              /* If so, take indirection */
+Llmu_unlock:
+       xor     LMTX_A_REG, LMTX_A_REG
+       mov     LMTX_A_REG, M_OWNER(LMTX_REG)
+       mov     LMTX_C_REG, LMTX_A_REG                  /* keep original state in %ecx for later evaluation */
+       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG
 
-       movl    M_LOCKED,%ecx           /* is this the spin variant of the mutex */
-       cmpl    $(MUTEX_LOCKED_AS_SPIN),%ecx
-       jne     Llmu_enter              /* no, go treat like a real mutex */
+       test    $(M_WAITERS_MSK), LMTX_A_REG32
+       jz      2f
+       dec     LMTX_A_REG32                            /* decrement waiter count */
+2:     
+       mov     LMTX_A_REG32, M_STATE(LMTX_REG)         /* since I own the interlock, I don't need an atomic update */
 
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Llmus_wakeup            /* yes, more work to do */
+#if    MACH_LDEBUG
+       /* perform lock statistics after drop to prevent delay */
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+       test    LMTX_A_REG, LMTX_A_REG
+       jz      1f
+       decl    TH_MUTEX_COUNT(LMTX_A_REG)      /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-Llmu_drop_ilk:
-       xorl    %eax,%eax
-       movl    %eax,M_LOCKED           /* clear spin indicator */
-       movl    %eax,M_ILK              /* release the interlock */
+       test    $(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG32
+       jz      3f
+
+       LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG)
+3:     
+       PREEMPTION_ENABLE
+
+       LMTX_CHK_EXTENDED
+       jne     4f
 
-       PREEMPTION_ENABLE               /* and re-enable preemption */
        leave
 #if    CONFIG_DTRACE
        /* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */
        LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point)
        ret
-       /* inherit lock pointer in %edx from above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %edx)
+       /* inherit lock pointer in LMTX_REG from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, LMTX_REG)
 #endif
        ret
-       
-Llmus_wakeup:
-       pushl   %edx                    /* save mutex address */
-       pushl   %edx                    /* push mutex address */
-       call    EXT(lck_mtx_unlockspin_wakeup)  /* yes, wake a thread */
-       addl    $4,%esp
-       popl    %edx                    /* restore mutex pointer */
-       jmp     Llmu_drop_ilk
-
-
-Llmu_enter:    
-       pushf                           /* save interrupt state */
-
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       jne     Llmu_ilk_loop           /* no - go to spin loop */
-Llmu_retry:
-       cli                             /* disable interrupts */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx
-
-       /* eax == 0 at this point */
-       lock; cmpxchgl  %ecx,M_ILK      /* atomic compare and exchange */
-       jne     Llmu_ilk_fail           /* branch on failure to spin loop */
-
-       cmpw    $0,M_WAITERS            /* are there any waiters? */
-       jne     Llmu_wakeup             /* yes, more work to do */
-
-Llmu_doit:
-       xorl    %ecx,%ecx
-       movl    %ecx,M_LOCKED           /* unlock the mutex */
-       movl    %ecx,M_ILK              /* clear the interlock */
-
-       popf                            /* restore interrupt state */
+4:     
        leave
 #if    CONFIG_DTRACE
-       LOCKSTAT_LABEL(_lck_mtx_unlock2_lockstat_patch_point)
+       /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */
+       LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point)
        ret
-       /* inherit lock pointer in %edx above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %edx)
+       /* inherit lock pointer in LMTX_REG from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG)
 #endif
        ret
 
-Llmu_ilk_fail:
-       popf                            /* restore interrupt state */
-       pushf                           /* resave interrupt state */
-       
-Llmu_ilk_loop:
+
+Llmu_busy_disabled:
+       PREEMPTION_ENABLE
+Llmu_busy:
        PAUSE
-       movl    M_ILK,%eax              /* read interlock */
-       testl   %eax,%eax               /* unlocked? */
-       je      Llmu_retry              /* yes - go try to grab it */
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       jmp     Llmu_mutex
 
-       cmpl    $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */
-       jne     Llmu_ilk_loop           /* no - keep spinning  */
+Llmu_ext:
+       mov     M_PTR(LMTX_REG), LMTX_REG
+       mov     M_OWNER(LMTX_REG), LMTX_A_REG
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_C_REG
+       CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG)
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       jmp     Llmu_chktype
 
-       pushl   %edx
-       call    EXT(lck_mtx_interlock_panic)
-       /*
-        * shouldn't return from here, but just in case
-        */
-       popl    %edx
-       jmp     Llmu_ilk_loop
 
-Llmu_wakeup:
-       pushl   %edx                    /* save mutex address */
-       pushl   M_LOCKED
-       pushl   %edx                    /* push mutex address */
-       call    EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */
-       addl    $8,%esp
-       popl    %edx                    /* restore mutex pointer */
-       xorl    %ecx,%ecx
-       movl    %ecx,M_LOCKED           /* unlock the mutex */
+       
+LEAF_ENTRY(lck_mtx_ilk_unlock)
+       LOAD_LMTX_REG(L_ARG0)                   /* fetch lock pointer - no indirection here */
 
-       movl    %ecx,M_ILK
+       andl    $(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
 
-       popf                            /* restore interrupt state */
+       PREEMPTION_ENABLE                       /* need to re-enable preemption */
 
-       leave
-#if    CONFIG_DTRACE
-       /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */
-       LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point)
-       ret
-       /* inherit lock pointer in %edx from above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, %edx)
-#endif
-       ret
+       LEAF_RET
+       
 
+       
+LEAF_ENTRY(lck_mtx_lock_grab_mutex)
+       LOAD_LMTX_REG(L_ARG0)                   /* fetch lock pointer - no indirection here */
 
-LEAF_ENTRY(lck_mtx_ilk_unlock)
-       movl    L_ARG0,%edx             /* no indirection here */
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
 
-       xorl    %eax,%eax
-       movl    %eax,M_ILK
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32  /* can't have the mutex yet */
+       jnz     3f
 
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     2f                              /* branch on failure to spin loop */
+
+       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    LMTX_A_REG, LMTX_A_REG
+       jz      1f
+       incl    TH_MUTEX_COUNT(LMTX_A_REG)      /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
+
+       mov     $1, LMTX_RET_REG                /* return success */
+       LEAF_RET
+2:                                             
+       PREEMPTION_ENABLE
+3:
+       xor     LMTX_RET_REG, LMTX_RET_REG      /* return failure */
        LEAF_RET
+       
+
+
+LEAF_ENTRY(lck_mtx_lock_mark_destroyed)
+       LOAD_LMTX_REG(L_ARG0)
+1:
+       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       cmp     $(MUTEX_IND), LMTX_C_REG32      /* Is this an indirect mutex? */
+       jne     2f
+
+       movl    $(MUTEX_DESTROYED), M_STATE(LMTX_REG)   /* convert to destroyed state */
+       jmp     3f
+2:     
+       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* have to wait for interlock to clear */
+       jnz     5f
+
+       PREEMPTION_DISABLE
+       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), LMTX_C_REG32
+       lock
+       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       jne     4f                              /* branch on failure to spin loop */
+       movl    $(MUTEX_DESTROYED), M_STATE(LMTX_REG)   /* convert to destroyed state */
+       PREEMPTION_ENABLE
+3:
+       LEAF_RET                                /* return with M_ILOCKED set */
+4:
+       PREEMPTION_ENABLE
+5:
+       PAUSE
+       jmp     1b
+
+LEAF_ENTRY(preemption_underflow_panic)
+       FRAME
+       incl    %gs:CPU_PREEMPTION_LEVEL
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(16f)
+       CALL_PANIC()
+       hlt
+       .data
+16:    String  "Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock"
+       .text
 
 
 LEAF_ENTRY(_disable_preemption)
 #if    MACH_RT
-       _DISABLE_PREEMPTION
+       PREEMPTION_DISABLE
 #endif /* MACH_RT */
        LEAF_RET
 
@@ -2078,16 +2018,22 @@ LEAF_ENTRY(_enable_preemption)
 #if    MACH_ASSERT
        cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
        jg      1f
+#if __i386__
        pushl   %gs:CPU_PREEMPTION_LEVEL
-       pushl   $2f
-       call    EXT(panic)
+#else
+       movl    %gs:CPU_PREEMPTION_LEVEL,%esi
+#endif
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(_enable_preemption_less_than_zero)
+       CALL_PANIC()
        hlt
-       .data
-2:     String  "_enable_preemption: preemption_level(%d)  < 0!"
+       .cstring
+_enable_preemption_less_than_zero:
+       .asciz  "_enable_preemption: preemption_level(%d)  < 0!"
        .text
 1:
 #endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION
+       PREEMPTION_ENABLE
 #endif /* MACH_RT */
        LEAF_RET
 
@@ -2096,11 +2042,13 @@ LEAF_ENTRY(_enable_preemption_no_check)
 #if    MACH_ASSERT
        cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
        jg      1f
-       pushl   $2f
-       call    EXT(panic)
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero)
+       CALL_PANIC()
        hlt
-       .data
-2:     String  "_enable_preemption_no_check: preemption_level <= 0!"
+       .cstring
+_enable_preemption_no_check_less_than_zero:
+       .asciz  "_enable_preemption_no_check: preemption_level <= 0!"
        .text
 1:
 #endif /* MACH_ASSERT */
@@ -2111,7 +2059,7 @@ LEAF_ENTRY(_enable_preemption_no_check)
        
 LEAF_ENTRY(_mp_disable_preemption)
 #if    MACH_RT
-       _DISABLE_PREEMPTION
+       PREEMPTION_DISABLE
 #endif /* MACH_RT */
        LEAF_RET
 
@@ -2120,16 +2068,22 @@ LEAF_ENTRY(_mp_enable_preemption)
 #if    MACH_ASSERT
        cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
        jg      1f
+#if __i386__
        pushl   %gs:CPU_PREEMPTION_LEVEL
-       pushl   $2f
-       call    EXT(panic)
+#else
+       movl    %gs:CPU_PREEMPTION_LEVEL,%esi
+#endif
+       ALIGN_PANIC()
+       LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero)
+       CALL_PANIC()
        hlt
-       .data
-2:     String  "_mp_enable_preemption: preemption_level (%d) <= 0!"
+       .cstring
+_mp_enable_preemption_less_than_zero:
+       .asciz "_mp_enable_preemption: preemption_level (%d) <= 0!"
        .text
 1:
 #endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION
+       PREEMPTION_ENABLE
 #endif /* MACH_RT */
        LEAF_RET
 
@@ -2138,11 +2092,13 @@ LEAF_ENTRY(_mp_enable_preemption_no_check)
 #if    MACH_ASSERT
        cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
        jg      1f
-       pushl   $2f
-       call    EXT(panic)
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero)
+       CALL_PANIC()
        hlt
-       .data
-2:     String  "_mp_enable_preemption_no_check: preemption_level <= 0!"
+       .cstring
+_mp_enable_preemption_no_check_less_than_zero:
+       .asciz "_mp_enable_preemption_no_check: preemption_level <= 0!"
        .text
 1:
 #endif /* MACH_ASSERT */
@@ -2150,6 +2106,7 @@ LEAF_ENTRY(_mp_enable_preemption_no_check)
 #endif /* MACH_RT */
        LEAF_RET
        
+#if __i386__
        
 LEAF_ENTRY(i_bit_set)
        movl    L_ARG0,%edx
@@ -2259,5 +2216,134 @@ LEAF_ENTRY(hw_atomic_and_noret)
        movl    L_ARG0, %ecx            /* Load address of operand */
        movl    L_ARG1, %edx            /* Load mask */
        lock
-       andl    %edx, (%ecx)            /* Atomic OR */
+       andl    %edx, (%ecx)            /* Atomic AND */
+       LEAF_RET
+
+#else /* !__i386__ */
+
+LEAF_ENTRY(i_bit_set)
+       lock
+       bts     %edi,(%rsi)
+       LEAF_RET
+
+LEAF_ENTRY(i_bit_clear)
+       lock
+       btr     %edi,(%rsi)
+       LEAF_RET
+
+
+LEAF_ENTRY(bit_lock)
+1:
+       lock
+       bts     %edi,(%rsi)
+       jb      1b
+       LEAF_RET
+
+
+LEAF_ENTRY(bit_lock_try)
+       lock
+       bts     %edi,(%rsi)
+       jb      bit_lock_failed
+       movl    $1, %eax
        LEAF_RET
+bit_lock_failed:
+       xorl    %eax,%eax
+       LEAF_RET
+
+LEAF_ENTRY(bit_unlock)
+       lock
+       btr     %edi,(%rsi)
+       LEAF_RET
+
+       
+/*
+ * Atomic primitives, prototyped in kern/simple_lock.h
+ */
+LEAF_ENTRY(hw_atomic_add)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
+       movl    %esi, %eax              /* Load addend */
+       lock    xaddl %eax, (%rdi)              /* Atomic exchange and add */
+       addl    %esi, %eax              /* Calculate result */
+       LEAF_RET
+
+LEAF_ENTRY(hw_atomic_sub)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
+       negl    %esi
+       movl    %esi, %eax
+       lock    xaddl %eax, (%rdi)              /* Atomic exchange and add */
+       addl    %esi, %eax              /* Calculate result */
+       LEAF_RET
+
+LEAF_ENTRY(hw_atomic_or)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
+       movl    (%rdi), %eax
+1:
+       movl    %esi, %edx              /* Load mask */
+       orl     %eax, %edx
+       lock    cmpxchgl %edx, (%rdi)   /* Atomic CAS */
+       jne     1b
+       movl    %edx, %eax              /* Result */
+       LEAF_RET
+/*
+ * A variant of hw_atomic_or which doesn't return a value.
+ * The implementation is thus comparatively more efficient.
+ */
+
+LEAF_ENTRY(hw_atomic_or_noret)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
+       lock
+       orl     %esi, (%rdi)            /* Atomic OR */
+       LEAF_RET
+
+
+LEAF_ENTRY(hw_atomic_and)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
+       movl    (%rdi), %eax
+1:
+       movl    %esi, %edx              /* Load mask */
+       andl    %eax, %edx
+       lock    cmpxchgl %edx, (%rdi)   /* Atomic CAS */
+       jne     1b
+       movl    %edx, %eax              /* Result */
+       LEAF_RET
+/*
+ * A variant of hw_atomic_and which doesn't return a value.
+ * The implementation is thus comparatively more efficient.
+ */
+
+LEAF_ENTRY(hw_atomic_and_noret)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
+       lock    andl    %esi, (%rdi)            /* Atomic OR */
+       LEAF_RET
+
+#endif /* !__i386 __ */