]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/i386_lock.s
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_lock.s
index 267b4b0db4a33aae8f168dcbba2d695a79d5879a..f54e040a1a78037a95a7da3630c55894fe5a0c27 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -36,7 +36,6 @@
  */
 
 #include <mach_rt.h>
-#include <platforms.h>
 #include <mach_ldebug.h>
 #include <i386/asm.h>
 #include <i386/eflags.h>
 
 #define        PAUSE           rep; nop
 
-
-#define PUSHF pushf
-#define POPF  popf
-#define CLI   cli
-
-
-/*
- *     When performance isn't the only concern, it's
- *     nice to build stack frames...
- */
-#define        BUILD_STACK_FRAMES   (GPROF || \
-                               ((MACH_LDEBUG) && MACH_KDB))
-
-#if    BUILD_STACK_FRAMES
-
-/* Stack-frame-relative: */
-#define        L_PC            B_PC
-#define        L_ARG0          B_ARG0
-#define        L_ARG1          B_ARG1
-
-#define LEAF_ENTRY(name)       \
-       Entry(name);            \
-       FRAME;                  \
-       MCOUNT
-
-#define LEAF_ENTRY2(n1,n2)     \
-       Entry(n1);              \
-       Entry(n2);              \
-       FRAME;                  \
-       MCOUNT
-
-#define LEAF_RET               \
-       EMARF;                  \
-       ret
-
-#else  /* BUILD_STACK_FRAMES */
-
-/* Stack-pointer-relative: */
-#define        L_PC            S_PC
-#define        L_ARG0          S_ARG0
-#define        L_ARG1          S_ARG1
+#include <i386/pal_lock_asm.h>
 
 #define LEAF_ENTRY(name)       \
        Entry(name)
 #define LEAF_RET               \
        ret
 
-#endif /* BUILD_STACK_FRAMES */
-
-
 /* Non-leaf routines always have a stack frame: */
 
 #define NONLEAF_ENTRY(name)    \
        Entry(name);            \
-       FRAME;                  \
-       MCOUNT
+       FRAME
 
 #define NONLEAF_ENTRY2(n1,n2)  \
        Entry(n1);              \
        Entry(n2);              \
-       FRAME;                  \
-       MCOUNT
+       FRAME
 
 #define NONLEAF_RET            \
        EMARF;                  \
 
 /* For x86_64, the varargs ABI requires that %al indicate
  * how many SSE register contain arguments. In our case, 0 */
-#if __i386__
-#define LOAD_STRING_ARG0(label)        pushl $##label ;
-#define LOAD_ARG1(x)           pushl x ;
-#define CALL_PANIC()           call EXT(panic) ;
-#else
+#define ALIGN_STACK()          and  $0xFFFFFFFFFFFFFFF0, %rsp ;
 #define LOAD_STRING_ARG0(label)        leaq label(%rip), %rdi ;
-#define LOAD_ARG1(x)           movq x, %rsi ;
+#define LOAD_ARG1(x)           mov x, %esi ;
+#define LOAD_PTR_ARG1(x)       mov x, %rsi ;
 #define CALL_PANIC()           xorb %al,%al ; call EXT(panic) ;
-#endif
 
 #define        CHECK_UNLOCK(current, owner)                            \
        cmp     current, owner                          ;       \
        je      1f                                      ;       \
+       ALIGN_STACK()                                   ;       \
        LOAD_STRING_ARG0(2f)                            ;       \
        CALL_PANIC()                                    ;       \
        hlt                                             ;       \
 #define        CHECK_MUTEX_TYPE()                                      \
        cmpl    $ MUTEX_TAG,M_TYPE                      ;       \
        je      1f                                      ;       \
+       ALIGN_STACK()                                   ;       \
        LOAD_STRING_ARG0(2f)                            ;       \
        CALL_PANIC()                                    ;       \
        hlt                                             ;       \
        jne     1f                                      ;       \
        cmpl    $0,%gs:CPU_PREEMPTION_LEVEL             ;       \
        je      1f                                      ;       \
-       LOAD_ARG1(%gs:CPU_PREEMPTION_LEVEL)             ;       \
+       ALIGN_STACK()                                   ;       \
+       movl    %gs:CPU_PREEMPTION_LEVEL, %eax          ;       \
+       LOAD_ARG1(%eax)                                 ;       \
        LOAD_STRING_ARG0(2f)                            ;       \
        CALL_PANIC()                                    ;       \
        hlt                                             ;       \
 #define        CHECK_MYLOCK(current, owner)                            \
        cmp     current, owner                          ;       \
        jne     1f                                      ;       \
+       ALIGN_STACK()                                   ;       \
        LOAD_STRING_ARG0(2f)                            ;       \
        CALL_PANIC()                                    ;       \
        hlt                                             ;       \
 #define        CHECK_MYLOCK(thd)
 #endif /* MACH_LDEBUG */
 
-
 #define PREEMPTION_DISABLE                             \
-       incl    %gs:CPU_PREEMPTION_LEVEL                
-       
-       
+       incl    %gs:CPU_PREEMPTION_LEVEL
+
+#define        PREEMPTION_LEVEL_DEBUG 1        
+#if    PREEMPTION_LEVEL_DEBUG
+#define        PREEMPTION_ENABLE                               \
+       decl    %gs:CPU_PREEMPTION_LEVEL        ;       \
+       js      17f                             ;       \
+       jnz     19f                             ;       \
+       testl   $AST_URGENT,%gs:CPU_PENDING_AST ;       \
+       jz      19f                             ;       \
+       PUSHF                                   ;       \
+       testl   $EFL_IF, S_PC                   ;       \
+       jz      18f                             ;       \
+       POPF                                    ;       \
+       int     $(T_PREEMPT)                    ;       \
+       jmp     19f                             ;       \
+17:                                                    \
+       call    _preemption_underflow_panic     ;       \
+18:                                                    \
+       POPF                                    ;       \
+19:
+#else
 #define        PREEMPTION_ENABLE                               \
        decl    %gs:CPU_PREEMPTION_LEVEL        ;       \
-       jne     9f                              ;       \
+       jnz     19f                             ;       \
+       testl   $AST_URGENT,%gs:CPU_PENDING_AST ;       \
+       jz      19f                             ;       \
        PUSHF                                   ;       \
-       testl   $ EFL_IF,S_PC                   ;       \
-       je      8f                              ;       \
-       CLI                                     ;       \
-       movl    %gs:CPU_PENDING_AST,%eax        ;       \
-       testl   $ AST_URGENT,%eax               ;       \
-       je      8f                              ;       \
-       movl    %gs:CPU_INTERRUPT_LEVEL,%eax    ;       \
-       testl   %eax,%eax                       ;       \
-       jne     8f                              ;       \
+       testl   $EFL_IF, S_PC                   ;       \
+       jz      18f                             ;       \
        POPF                                    ;       \
        int     $(T_PREEMPT)                    ;       \
-       jmp     9f                              ;       \
-8:                                                     \
+       jmp     19f                             ;       \
+18:                                                    \
        POPF                                    ;       \
-9:     
+19:
+#endif
 
-       
 
 #if    CONFIG_DTRACE
 
  * a "nop"
  */
 
-#if defined(__i386__)
-
-#define        LOCKSTAT_LABEL(lab) \
-       .data                           ;\
-       .globl  lab                     ;\
-       lab:                            ;\
-       .long 9f                        ;\
-       .text                           ;\
-       9:
-
-#define        LOCKSTAT_RECORD(id, lck) \
-       push    %ebp                                    ;       \
-       mov     %esp,%ebp                               ;       \
-       sub     $0x38,%esp      /* size of dtrace_probe args */ ; \
-       movl    _lockstat_probemap + (id * 4),%eax      ;       \
-       test    %eax,%eax                               ;       \
-       je      9f                                      ;       \
-       movl    $0,36(%esp)                             ;       \
-       movl    $0,40(%esp)                             ;       \
-       movl    $0,28(%esp)                             ;       \
-       movl    $0,32(%esp)                             ;       \
-       movl    $0,20(%esp)                             ;       \
-       movl    $0,24(%esp)                             ;       \
-       movl    $0,12(%esp)                             ;       \
-       movl    $0,16(%esp)                             ;       \
-       movl    lck,4(%esp)     /* copy lock pointer to arg 1 */ ; \
-       movl    $0,8(%esp)                              ;       \
-       movl    %eax,(%esp)                             ;       \
-       call    *_lockstat_probe                        ;       \
-9:     leave
-       /* ret - left to subsequent code, e.g. return values */
-
-#elif defined(__x86_64__)
 #define        LOCKSTAT_LABEL(lab) \
        .data                                       ;\
        .globl  lab                                 ;\
        call    *_lockstat_probe(%rip)              ;       \
 9:     leave
        /* ret - left to subsequent code, e.g. return values */
-#else
-#error Unsupported architecture
-#endif
+
 #endif /* CONFIG_DTRACE */
 
 /*
  * register initially, and then either a byte or register-sized
  * word is loaded/stored to the pointer
  */
-#if defined(__i386__)
-#define        HW_LOCK_REGISTER        %edx
-#define        LOAD_HW_LOCK_REGISTER mov L_ARG0, HW_LOCK_REGISTER
-#define        HW_LOCK_THREAD_REGISTER %ecx
-#define        LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER
-#define        HW_LOCK_MOV_WORD        movl
-#define        HW_LOCK_EXAM_REGISTER   %eax
-#elif defined(__x86_64__)
-#define        HW_LOCK_REGISTER        %rdi
-#define        LOAD_HW_LOCK_REGISTER
-#define        HW_LOCK_THREAD_REGISTER %rcx
-#define        LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER
-#define        HW_LOCK_MOV_WORD        movq
-#define        HW_LOCK_EXAM_REGISTER   %rax
-#else
-#error Unsupported architecture
-#endif
-
-/*
- *     void hw_lock_init(hw_lock_t)
- *
- *     Initialize a hardware lock.
- */
-LEAF_ENTRY(hw_lock_init)
-       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
-       HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)         /* clear the lock */
-       LEAF_RET
-
 
 /*
- *     void hw_lock_byte_init(uint8_t *)
+ *     void hw_lock_byte_init(volatile uint8_t *)
  *
  *     Initialize a hardware byte lock.
  */
 LEAF_ENTRY(hw_lock_byte_init)
-       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
-       movb $0, (HW_LOCK_REGISTER)             /* clear the lock */
+       movb    $0, (%rdi)              /* clear the lock */
        LEAF_RET
 
-/*
- *     void hw_lock_lock(hw_lock_t)
- *
- *     Acquire lock, spinning until it becomes available.
- *     MACH_RT:  also return with preemption disabled.
- */
-LEAF_ENTRY(hw_lock_lock)
-       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
-       LOAD_HW_LOCK_THREAD_REGISTER    /* get thread pointer */
-       
-       PREEMPTION_DISABLE
-1:
-       mov     (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
-       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER             /* lock locked? */
-       jne     3f                      /* branch if so */
-       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
-       jne     3f
-       movl    $1,%eax                 /* In case this was a timeout call */
-       LEAF_RET                        /* if yes, then nothing left to do */
-3:
-       PAUSE                           /* pause for hyper-threading */
-       jmp     1b                      /* try again */
-
 /*
  *     void    hw_lock_byte_lock(uint8_t *lock_byte)
  *
@@ -382,155 +262,19 @@ LEAF_ENTRY(hw_lock_lock)
  */
 
 LEAF_ENTRY(hw_lock_byte_lock)
-       LOAD_HW_LOCK_REGISTER           /* Load lock pointer */
        PREEMPTION_DISABLE
        movl    $1, %ecx                /* Set lock value */
 1:
-       movb    (HW_LOCK_REGISTER), %al         /* Load byte at address */
+       movb    (%rdi), %al             /* Load byte at address */
        testb   %al,%al                 /* lock locked? */
        jne     3f                      /* branch if so */
-       lock; cmpxchg   %cl,(HW_LOCK_REGISTER)  /* attempt atomic compare exchange */
+       lock; cmpxchg %cl,(%rdi)        /* attempt atomic compare exchange */
        jne     3f
        LEAF_RET                        /* if yes, then nothing left to do */
 3:
        PAUSE                           /* pause for hyper-threading */
        jmp     1b                      /* try again */
 
-/*
- *     unsigned int hw_lock_to(hw_lock_t, unsigned int)
- *
- *     Acquire lock, spinning until it becomes available or timeout.
- *     MACH_RT:  also return with preemption disabled.
- */
-LEAF_ENTRY(hw_lock_to)
-1:
-       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
-       LOAD_HW_LOCK_THREAD_REGISTER
-
-       /*
-        * Attempt to grab the lock immediately
-        * - fastpath without timeout nonsense.
-        */
-       PREEMPTION_DISABLE
-
-       mov     (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER
-       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER             /* lock locked? */
-       jne     2f                      /* branch if so */
-       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
-       jne     2f                      /* branch on failure */
-       movl    $1,%eax
-       LEAF_RET
-
-2:
-#define        INNER_LOOP_COUNT        1000
-       /*
-        * Failed to get the lock so set the timeout
-        * and then spin re-checking the lock but pausing
-        * every so many (INNER_LOOP_COUNT) spins to check for timeout.
-        */
-#if __i386__
-       movl    L_ARG1,%ecx             /* fetch timeout */
-       push    %edi
-       push    %ebx
-       mov     %edx,%edi
-
-       lfence
-       rdtsc                           /* read cyclecount into %edx:%eax */
-       lfence
-       addl    %ecx,%eax               /* fetch and timeout */
-       adcl    $0,%edx                 /* add carry */
-       mov     %edx,%ecx
-       mov     %eax,%ebx               /* %ecx:%ebx is the timeout expiry */
-       mov     %edi, %edx              /* load lock back into %edx */
-#else
-       push    %r9
-       lfence
-       rdtsc                           /* read cyclecount into %edx:%eax */
-       lfence
-       shlq    $32, %rdx
-       orq     %rdx, %rax              /* load 64-bit quantity into %rax */
-       addq    %rax, %rsi              /* %rsi is the timeout expiry */
-#endif
-       
-4:
-       /*
-        * The inner-loop spin to look for the lock being freed.
-        */
-#if __i386__
-       mov     $(INNER_LOOP_COUNT),%edi
-#else
-       mov     $(INNER_LOOP_COUNT),%r9
-#endif
-5:
-       PAUSE                           /* pause for hyper-threading */
-       mov     (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER                /* spin checking lock value in cache */
-       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
-       je      6f                      /* zero => unlocked, try to grab it */
-#if __i386__
-       decl    %edi                    /* decrement inner loop count */
-#else
-       decq    %r9                     /* decrement inner loop count */
-#endif
-       jnz     5b                      /* time to check for timeout? */
-       
-       /*
-        * Here after spinning INNER_LOOP_COUNT times, check for timeout
-        */
-#if __i386__
-       mov     %edx,%edi               /* Save %edx */
-       lfence
-       rdtsc                           /* cyclecount into %edx:%eax */
-       lfence
-       xchg    %edx,%edi               /* cyclecount into %edi:%eax */
-       cmpl    %ecx,%edi               /* compare high-order 32-bits */
-       jb      4b                      /* continue spinning if less, or */
-       cmpl    %ebx,%eax               /* compare low-order 32-bits */ 
-       jb      4b                      /* continue if less, else bail */
-       xor     %eax,%eax               /* with 0 return value */
-       pop     %ebx
-       pop     %edi
-#else
-       lfence
-       rdtsc                           /* cyclecount into %edx:%eax */
-       lfence
-       shlq    $32, %rdx
-       orq     %rdx, %rax              /* load 64-bit quantity into %rax */
-       cmpq    %rsi, %rax              /* compare to timeout */
-       jb      4b                      /* continue spinning if less, or */
-       xor     %rax,%rax               /* with 0 return value */
-       pop     %r9
-#endif
-       LEAF_RET
-
-6:
-       /*
-        * Here to try to grab the lock that now appears to be free
-        * after contention.
-        */
-       LOAD_HW_LOCK_THREAD_REGISTER
-       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
-       jne     4b                      /* no - spin again */
-       movl    $1,%eax                 /* yes */
-#if __i386__
-       pop     %ebx
-       pop     %edi
-#else
-       pop     %r9
-#endif
-       LEAF_RET
-
-/*
- *     void hw_lock_unlock(hw_lock_t)
- *
- *     Unconditionally release lock.
- *     MACH_RT:  release preemption level.
- */
-LEAF_ENTRY(hw_lock_unlock)
-       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
-       HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER)         /* clear the lock */
-       PREEMPTION_ENABLE
-       LEAF_RET
-
 /*
  *     void hw_lock_byte_unlock(uint8_t *lock_byte)
  *
@@ -539,48 +283,10 @@ LEAF_ENTRY(hw_lock_unlock)
  */
 
 LEAF_ENTRY(hw_lock_byte_unlock)
-       LOAD_HW_LOCK_REGISTER           /* Load lock pointer */
-       movb $0, (HW_LOCK_REGISTER)             /* Clear the lock byte */
+       movb $0, (%rdi)         /* Clear the lock byte */
        PREEMPTION_ENABLE
        LEAF_RET
 
-/*
- *     unsigned int hw_lock_try(hw_lock_t)
- *     MACH_RT:  returns with preemption disabled on success.
- */
-LEAF_ENTRY(hw_lock_try)
-       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
-       LOAD_HW_LOCK_THREAD_REGISTER
-       PREEMPTION_DISABLE
-
-       mov     (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER
-       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
-       jne     1f
-       lock; cmpxchg   HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER)      /* try to acquire the HW lock */
-       jne     1f
-       
-       movl    $1,%eax                 /* success */
-       LEAF_RET
-
-1:
-       PREEMPTION_ENABLE               /* failure:  release preemption... */
-       xorl    %eax,%eax               /* ...and return failure */
-       LEAF_RET
-
-/*
- *     unsigned int hw_lock_held(hw_lock_t)
- *     MACH_RT:  doesn't change preemption state.
- *     N.B.  Racy, of course.
- */
-LEAF_ENTRY(hw_lock_held)
-       LOAD_HW_LOCK_REGISTER           /* fetch lock pointer */
-       mov     (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER                /* check lock value */
-       test    HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER
-       movl    $1,%ecx
-       cmovne  %ecx,%eax               /* 0 => unlocked, 1 => locked */
-       LEAF_RET
-
-
 /*
  * Reader-writer lock fastpaths. These currently exist for the
  * shared lock acquire, the exclusive lock acquire, the shared to
@@ -606,36 +312,23 @@ LEAF_ENTRY(hw_lock_held)
  * register and examined
  */
  
-#if defined(__i386__)
-#define        LCK_RW_REGISTER %edx
-#define        LOAD_LCK_RW_REGISTER mov S_ARG0, LCK_RW_REGISTER
-#define        LCK_RW_FLAGS_REGISTER   %eax
-#define        LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER
-#elif defined(__x86_64__)
-#define        LCK_RW_REGISTER %rdi
-#define        LOAD_LCK_RW_REGISTER
-#define        LCK_RW_FLAGS_REGISTER   %eax
-#define        LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER
-#else
-#error Unsupported architecture
-#endif
-       
 #define        RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE)
 /*
  *     void lck_rw_lock_shared(lck_rw_t *)
  *
  */
 Entry(lck_rw_lock_shared)
-       LOAD_LCK_RW_REGISTER
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count before atomic CAS */
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield and interlock */
+       mov     (%rdi), %eax            /* Load state bitfield and interlock */
        testl   $(RW_LOCK_SHARED_MASK), %eax    /* Eligible for fastpath? */
        jne     3f
 
        movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
        incl    %ecx                            /* Increment reader refcount */
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     2f
 
 #if    CONFIG_DTRACE
@@ -646,8 +339,10 @@ Entry(lck_rw_lock_shared)
         */
        LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point)
        ret
-    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+       /*
+       Fall thru when patched, counting on lock pointer in %rdi
+       */
+       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, %rdi)
 #endif
        ret
 2:
@@ -664,9 +359,8 @@ Entry(lck_rw_lock_shared)
  *
  */
 Entry(lck_rw_try_lock_shared)
-       LOAD_LCK_RW_REGISTER
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield and interlock */
+       mov     (%rdi), %eax            /* Load state bitfield and interlock */
        testl   $(LCK_RW_INTERLOCK), %eax
        jne     2f
        testl   $(RW_TRY_LOCK_SHARED_MASK), %eax
@@ -675,9 +369,13 @@ Entry(lck_rw_try_lock_shared)
        movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
        incl    %ecx                            /* Increment reader refcount */
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     2f
 
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count on success. */
+       /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
+
 #if    CONFIG_DTRACE
        movl    $1, %eax
        /*
@@ -687,8 +385,8 @@ Entry(lck_rw_try_lock_shared)
         */
        LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point)
        ret
-    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+       /* Fall thru when patched, counting on lock pointer in %rdi  */
+       LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, %rdi)
 #endif
        movl    $1, %eax                        /* return TRUE */
        ret
@@ -706,21 +404,20 @@ Entry(lck_rw_try_lock_shared)
  *
  */
 Entry(lck_rw_grab_shared)
-       LOAD_LCK_RW_REGISTER
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield and interlock */
+       mov     (%rdi), %eax            /* Load state bitfield and interlock */
        testl   $(LCK_RW_INTERLOCK), %eax
        jne     5f
        testl   $(RW_LOCK_EXCLUSIVE_HELD), %eax 
        jne     3f
 2:     
-       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
-       incl    %ecx                            /* Increment reader refcount */
+       movl    %eax, %ecx              /* original value in %eax for cmpxchgl */
+       incl    %ecx                    /* Increment reader refcount */
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)           /* Attempt atomic exchange */
        jne     4f
 
-       movl    $1, %eax                        /* return success */
+       movl    $1, %eax                /* return success */
        ret
 3:
        testl   $(LCK_RW_SHARED_MASK), %eax
@@ -728,7 +425,7 @@ Entry(lck_rw_grab_shared)
        testl   $(LCK_RW_PRIV_EXCL), %eax
        je      2b
 4:
-       xorl    %eax, %eax                      /* return failure */
+       xorl    %eax, %eax              /* return failure */
        ret
 5:
        PAUSE
@@ -743,16 +440,17 @@ Entry(lck_rw_grab_shared)
  *
  */
 Entry(lck_rw_lock_exclusive)
-       LOAD_LCK_RW_REGISTER
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count before atomic CAS */
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and shared count */
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and shared count */
        testl   $(RW_LOCK_EXCLUSIVE_MASK), %eax         /* Eligible for fastpath? */
        jne     3f                                      /* no, go slow */
 
        movl    %eax, %ecx                              /* original value in %eax for cmpxchgl */
        orl     $(LCK_RW_WANT_WRITE), %ecx
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     2f
 
 #if    CONFIG_DTRACE
@@ -763,8 +461,8 @@ Entry(lck_rw_lock_exclusive)
         */
        LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point)
        ret
-    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+       /* Fall thru when patched, counting on lock pointer in %rdi  */
+       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, %rdi)
 #endif
        ret
 2:
@@ -784,20 +482,23 @@ Entry(lck_rw_lock_exclusive)
  *             Returns FALSE if the lock is not held on return.
  */
 Entry(lck_rw_try_lock_exclusive)
-       LOAD_LCK_RW_REGISTER
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and shared count */
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and shared count */
        testl   $(LCK_RW_INTERLOCK), %eax
        jne     2f
        testl   $(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax
-       jne     3f                                      /* can't get it */
+       jne     3f                              /* can't get it */
 
-       movl    %eax, %ecx                              /* original value in %eax for cmpxchgl */
+       movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
        orl     $(LCK_RW_WANT_WRITE), %ecx
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     2f
 
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Load thread pointer */
+       incl    TH_RWLOCK_COUNT(%rcx)           /* Increment count on success. */
+       /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
+
 #if    CONFIG_DTRACE
        movl    $1, %eax
        /*
@@ -807,8 +508,8 @@ Entry(lck_rw_try_lock_exclusive)
         */
        LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point)
        ret
-    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+       /* Fall thru when patched, counting on lock pointer in %rdi  */
+       LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, %rdi)
 #endif
        movl    $1, %eax                        /* return TRUE */
        ret
@@ -841,9 +542,8 @@ Entry(lck_rw_try_lock_exclusive)
  *     set RW_WANT_UPGRADE and get rid of the read count we hold
  */
 Entry(lck_rw_lock_shared_to_exclusive)
-       LOAD_LCK_RW_REGISTER
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and shared count */
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and shared count */
        testl   $(LCK_RW_INTERLOCK), %eax
        jne     7f
        testl   $(LCK_RW_WANT_UPGRADE), %eax
@@ -853,7 +553,7 @@ Entry(lck_rw_lock_shared_to_exclusive)
        orl     $(LCK_RW_WANT_UPGRADE), %ecx    /* ask for WANT_UPGRADE */
        decl    %ecx                            /* and shed our read count */
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     7f
                                                /* we now own the WANT_UPGRADE */
        testl   $(LCK_RW_SHARED_MASK), %ecx     /* check to see if all of the readers are drained */
@@ -868,8 +568,8 @@ Entry(lck_rw_lock_shared_to_exclusive)
         */
        LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point)
        ret
-    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+    /* Fall thru when patched, counting on lock pointer in %rdi  */
+    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, %rdi)
 #endif
        movl    $1, %eax                        /* return success */
        ret
@@ -882,19 +582,12 @@ Entry(lck_rw_lock_shared_to_exclusive)
        andl    $(~LCK_W_WAITING), %ecx         /* so clear the wait indicator */
 3:     
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     7f
 
-#if __i386__
-       pushl   %eax                            /* go check to see if we need to */
-       push    %edx                            /* wakeup anyone */
-       call    EXT(lck_rw_lock_shared_to_exclusive_failure)
-       addl    $8, %esp
-#else
        mov     %eax, %esi                      /* put old flags as second arg */
                                                /* lock is alread in %rdi */
        call    EXT(lck_rw_lock_shared_to_exclusive_failure)
-#endif
        ret                                     /* and pass the failure return along */ 
 7:
        PAUSE
@@ -914,9 +607,8 @@ rwl_release_error_str:
  *
  */
 Entry(lck_rw_done)
-       LOAD_LCK_RW_REGISTER
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and reader count */
        testl   $(LCK_RW_INTERLOCK), %eax
        jne     7f                              /* wait for interlock to clear */
 
@@ -954,24 +646,18 @@ Entry(lck_rw_done)
        andl    $(~LCK_R_WAITING), %ecx
 6:     
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     7f
 
-#if __i386__
-       pushl   %eax
-       push    %edx
-       call    EXT(lck_rw_done_gen)
-       addl    $8, %esp
-#else
        mov     %eax,%esi       /* old flags in %rsi */
                                /* lock is in %rdi already */
        call    EXT(lck_rw_done_gen)    
-#endif
        ret
 7:
        PAUSE
        jmp     1b
 8:
+       ALIGN_STACK()
        LOAD_STRING_ARG0(rwl_release_error_str)
        CALL_PANIC()
        
@@ -982,9 +668,8 @@ Entry(lck_rw_done)
  *
  */
 Entry(lck_rw_lock_exclusive_to_shared)
-       LOAD_LCK_RW_REGISTER
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and reader count */
        testl   $(LCK_RW_INTERLOCK), %eax
        jne     6f                              /* wait for interlock to clear */
 
@@ -1013,18 +698,11 @@ Entry(lck_rw_lock_exclusive_to_shared)
        andl    $(~LCK_R_WAITING), %ecx
 5:     
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     6f
 
-#if __i386__
-       pushl   %eax
-       push    %edx
-       call    EXT(lck_rw_lock_exclusive_to_shared_gen)
-       addl    $8, %esp
-#else
        mov     %eax,%esi
        call    EXT(lck_rw_lock_exclusive_to_shared_gen)
-#endif
        ret
 6:
        PAUSE
@@ -1037,9 +715,8 @@ Entry(lck_rw_lock_exclusive_to_shared)
  *
  */
 Entry(lck_rw_grab_want)
-       LOAD_LCK_RW_REGISTER
 1:
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       mov     (%rdi), %eax            /* Load state bitfield, interlock and reader count */
        testl   $(LCK_RW_INTERLOCK), %eax
        jne     3f                              /* wait for interlock to clear */
        testl   $(LCK_RW_WANT_WRITE), %eax      /* want_write has been grabbed by someone else */
@@ -1048,7 +725,7 @@ Entry(lck_rw_grab_want)
        movl    %eax, %ecx                      /* original value in %eax for cmpxchgl */
        orl     $(LCK_RW_WANT_WRITE), %ecx
        lock
-       cmpxchgl %ecx, (LCK_RW_REGISTER)                        /* Attempt atomic exchange */
+       cmpxchgl %ecx, (%rdi)                   /* Attempt atomic exchange */
        jne     2f
                                                /* we now own want_write */
        movl    $1, %eax                        /* return success */
@@ -1067,8 +744,7 @@ Entry(lck_rw_grab_want)
  *
  */
 Entry(lck_rw_held_read_or_upgrade)
-       LOAD_LCK_RW_REGISTER
-       LOAD_LCK_RW_FLAGS_REGISTER              /* Load state bitfield, interlock and reader count */
+       mov     (%rdi), %eax
        andl    $(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax
        ret
 
@@ -1112,165 +788,42 @@ Entry(lck_rw_held_read_or_upgrade)
 #define M_PTR          MUTEX_PTR
 #define M_STATE                MUTEX_STATE     
        
-#if defined(__i386__)
-
-#define LMTX_ARG0      B_ARG0
-#define LMTX_ARG1      B_ARG1
-#define        LMTX_REG        %edx
-#define LMTX_A_REG     %eax
-#define LMTX_A_REG32   %eax
-#define LMTX_C_REG     %ecx
-#define LMTX_C_REG32   %ecx
-#define LMTX_D_REG     %edx
-#define LMTX_RET_REG   %eax
-#define LMTX_LGROUP_REG        %esi
-#define LMTX_SSTATE_REG        %edi    
-#define        LOAD_LMTX_REG(arg)      mov arg, LMTX_REG
-#define LOAD_REG_ARG0(reg)     push reg
-#define LOAD_REG_ARG1(reg)     push reg
-#define LMTX_CHK_EXTENDED      cmp LMTX_REG, LMTX_ARG0
-#define LMTX_ASSERT_OWNED      cmpl $(MUTEX_ASSERT_OWNED), LMTX_ARG1
-
-#define LMTX_ENTER_EXTENDED                                    \
-       mov     M_PTR(LMTX_REG), LMTX_REG               ;       \
-       push    LMTX_LGROUP_REG                         ;       \
-       push    LMTX_SSTATE_REG                         ;       \
-       xor     LMTX_SSTATE_REG, LMTX_SSTATE_REG        ;       \
-       mov     MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG    ;       \
-       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       addl    $1, GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG)  ;       \
-       jnc     11f                                     ;       \
-       incl    GRP_MTX_STAT_UTIL+4(LMTX_LGROUP_REG)    ;       \
-11:
-
-#define LMTX_EXIT_EXTENDED             \
-       pop     LMTX_SSTATE_REG ;       \
-       pop     LMTX_LGROUP_REG
-
-
-#define        LMTX_CHK_EXTENDED_EXIT                  \
-       cmp     LMTX_REG, LMTX_ARG0     ;       \
-       je      12f                     ;       \
-       pop     LMTX_SSTATE_REG         ;       \
-       pop     LMTX_LGROUP_REG         ;       \
-12:    
-       
-       
-#if    LOG_FIRST_MISS_ALONE
-#define LMTX_UPDATE_MISS                                       \
-       test    $1, LMTX_SSTATE_REG                     ;       \
-       jnz     11f                                     ;       \
-       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)      ;       \
-       or      $1, LMTX_SSTATE_REG                     ;       \
-11:
-#else
-#define LMTX_UPDATE_MISS                                       \
-       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)
-#endif
-
-       
-#if    LOG_FIRST_MISS_ALONE
-#define LMTX_UPDATE_WAIT                                       \
-       test    $2, LMTX_SSTATE_REG                     ;       \
-       jnz     11f                                     ;       \
-       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)      ;       \
-       or      $2, LMTX_SSTATE_REG                     ;       \
-11:
-#else
-#define LMTX_UPDATE_WAIT                                       \
-       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)
-#endif
-
-       
-/*
- * Record the "direct wait" statistic, which indicates if a
- * miss proceeded to block directly without spinning--occurs
- * if the owner of the mutex isn't running on another processor
- * at the time of the check.
- */
-#define LMTX_UPDATE_DIRECT_WAIT                                        \
-       LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG)
-
-       
-#define LMTX_CALLEXT1(func_name)       \
-       push    LMTX_REG        ;       \
-       push    LMTX_REG        ;       \
-       call    EXT(func_name)  ;       \
-       add     $4, %esp        ;       \
-       pop     LMTX_REG
-       
-#define LMTX_CALLEXT2(func_name, reg)  \
-       push    LMTX_REG        ;       \
-       push    reg             ;       \
-       push    LMTX_REG        ;       \
-       call    EXT(func_name)  ;       \
-       add     $8, %esp        ;       \
-       pop     LMTX_REG
-       
-#elif defined(__x86_64__)
-
-#define LMTX_ARG0      %rdi
-#define LMTX_ARG1      %rsi
-#define LMTX_REG_ORIG  %rdi
-#define        LMTX_REG        %rdx
-#define LMTX_A_REG     %rax
-#define LMTX_A_REG32   %eax
-#define LMTX_C_REG     %rcx
-#define LMTX_C_REG32   %ecx
-#define LMTX_D_REG     %rdx
-#define LMTX_RET_REG   %rax
-#define LMTX_LGROUP_REG        %r10
-#define LMTX_SSTATE_REG        %r11    
-#define        LOAD_LMTX_REG(arg)      mov %rdi, %rdx
-#define LOAD_REG_ARG0(reg)     mov reg, %rdi
-#define LOAD_REG_ARG1(reg)     mov reg, %rsi
-#define LMTX_CHK_EXTENDED      cmp LMTX_REG, LMTX_REG_ORIG
-#define LMTX_ASSERT_OWNED      cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1
 
 #define LMTX_ENTER_EXTENDED                                    \
-       mov     M_PTR(LMTX_REG), LMTX_REG               ;       \
-       xor     LMTX_SSTATE_REG, LMTX_SSTATE_REG        ;       \
-       mov     MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG    ;       \
+       mov     M_PTR(%rdx), %rdx                       ;       \
+       xor     %r11, %r11                              ;       \
+       mov     MUTEX_GRP(%rdx), %r10                   ;       \
        LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incq    GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG)
-
-#define LMTX_EXIT_EXTENDED
-
-#define        LMTX_CHK_EXTENDED_EXIT
+       incq    GRP_MTX_STAT_UTIL(%r10)
 
 
 #if    LOG_FIRST_MISS_ALONE
 #define LMTX_UPDATE_MISS                                       \
-       test    $1, LMTX_SSTATE_REG                     ;       \
+       test    $1, %r11                                ;       \
        jnz     11f                                     ;       \
        LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)      ;       \
-       or      $1, LMTX_SSTATE_REG                     ;       \
+       incl    GRP_MTX_STAT_MISS(%r10)                 ;       \
+       or      $1, %r11                                ;       \
 11:
 #else
 #define LMTX_UPDATE_MISS                                       \
        LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_MISS(LMTX_LGROUP_REG)
+       incl    GRP_MTX_STAT_MISS(%r10)
 #endif
        
 
 #if    LOG_FIRST_MISS_ALONE
 #define LMTX_UPDATE_WAIT                                       \
-       test    $2, LMTX_SSTATE_REG                     ;       \
+       test    $2, %r11                                ;       \
        jnz     11f                                     ;       \
        LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)      ;       \
-       or      $2, LMTX_SSTATE_REG                     ;       \
+       incl    GRP_MTX_STAT_WAIT(%r10)                 ;       \
+       or      $2, %r11                                ;       \
 11:
 #else
 #define LMTX_UPDATE_WAIT                                       \
        LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG)
+       incl    GRP_MTX_STAT_WAIT(%r10)
 #endif
 
 
@@ -1282,47 +835,43 @@ Entry(lck_rw_held_read_or_upgrade)
  */
 #define LMTX_UPDATE_DIRECT_WAIT                                        \
        LOCK_IF_ATOMIC_STAT_UPDATES                     ;       \
-       incl    GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG)
+       incl    GRP_MTX_STAT_DIRECT_WAIT(%r10)
 
        
 #define LMTX_CALLEXT1(func_name)               \
-       LMTX_CHK_EXTENDED               ;       \
+       cmp     %rdx, %rdi              ;       \
        je      12f                     ;       \
-       push    LMTX_LGROUP_REG         ;       \
-       push    LMTX_SSTATE_REG         ;       \
-12:    push    LMTX_REG_ORIG           ;       \
-       push    LMTX_REG                ;       \
-       mov     LMTX_REG, LMTX_ARG0     ;       \
+       push    %r10                    ;       \
+       push    %r11                    ;       \
+12:    push    %rdi                    ;       \
+       push    %rdx                    ;       \
+       mov     %rdx, %rdi              ;       \
        call    EXT(func_name)          ;       \
-       pop     LMTX_REG                ;       \
-       pop     LMTX_REG_ORIG           ;       \
-       LMTX_CHK_EXTENDED               ;       \
+       pop     %rdx                    ;       \
+       pop     %rdi                    ;       \
+       cmp     %rdx, %rdi              ;       \
        je      12f                     ;       \
-       pop     LMTX_SSTATE_REG         ;       \
-       pop     LMTX_LGROUP_REG         ;       \
+       pop     %r11                    ;       \
+       pop     %r10                    ;       \
 12:
        
 #define LMTX_CALLEXT2(func_name, reg)          \
-       LMTX_CHK_EXTENDED               ;       \
+       cmp     %rdx, %rdi              ;       \
        je      12f                     ;       \
-       push    LMTX_LGROUP_REG         ;       \
-       push    LMTX_SSTATE_REG         ;       \
-12:    push    LMTX_REG_ORIG           ;       \
-       push    LMTX_REG                ;       \
-       mov     reg, LMTX_ARG1          ;       \
-       mov     LMTX_REG, LMTX_ARG0     ;       \
+       push    %r10                    ;       \
+       push    %r11                    ;       \
+12:    push    %rdi                    ;       \
+       push    %rdx                    ;       \
+       mov     reg, %rsi               ;       \
+       mov     %rdx, %rdi              ;       \
        call    EXT(func_name)          ;       \
-       pop     LMTX_REG                ;       \
-       pop     LMTX_REG_ORIG           ;       \
-       LMTX_CHK_EXTENDED               ;       \
+       pop     %rdx                    ;       \
+       pop     %rdi                    ;       \
+       cmp     %rdx, %rdi              ;       \
        je      12f                     ;       \
-       pop     LMTX_SSTATE_REG         ;       \
-       pop     LMTX_LGROUP_REG         ;       \
+       pop     %r11                    ;       \
+       pop     %r10                    ;       \
 12:
-       
-#else
-#error Unsupported architecture
-#endif
 
 
 #define M_WAITERS_MSK          0x0000ffff
@@ -1332,8 +881,6 @@ Entry(lck_rw_held_read_or_upgrade)
 #define M_PROMOTED_MSK         0x04000000
 #define M_SPIN_MSK             0x08000000
 
-       
-
 /*
  *     void lck_mtx_assert(lck_mtx_t* l, unsigned int)
  *     Takes the address of a lock, and an assertion type as parameters.
@@ -1345,37 +892,41 @@ Entry(lck_rw_held_read_or_upgrade)
  */
 
 NONLEAF_ENTRY(lck_mtx_assert)
-        LOAD_LMTX_REG(B_ARG0)                          /* Load lock address */
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG       /* Load current thread */
-
-       mov     M_OWNER(LMTX_REG), LMTX_C_REG
-       cmp     $(MUTEX_IND), LMTX_C_REG        /* Is this an indirect mutex? */
-       cmove   M_PTR(LMTX_REG), LMTX_REG       /* If so, take indirection */
+        mov    %rdi, %rdx                      /* Load lock address */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax     /* Load current thread */
 
-       mov     M_OWNER(LMTX_REG), LMTX_C_REG   /* Load owner */
-       LMTX_ASSERT_OWNED
+       mov     M_STATE(%rdx), %ecx
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     0f
+       mov     M_PTR(%rdx), %rdx               /* If so, take indirection */
+0:     
+       mov     M_OWNER(%rdx), %rcx             /* Load owner */
+       cmp     $(MUTEX_ASSERT_OWNED), %rsi
        jne     2f                              /* Assert ownership? */
-       cmp     LMTX_A_REG, LMTX_C_REG          /* Current thread match? */
+       cmp     %rax, %rcx                      /* Current thread match? */
        jne     3f                              /* no, go panic */
-       testl   $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(LMTX_REG)
+       testl   $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(%rdx)
        je      3f
 1:                                             /* yes, we own it */
        NONLEAF_RET
 2:
-       cmp     LMTX_A_REG, LMTX_C_REG          /* Current thread match? */
+       cmp     %rax, %rcx                      /* Current thread match? */
        jne     1b                              /* No, return */
-       LOAD_REG_ARG1(LMTX_REG)
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(%rdx)
        LOAD_STRING_ARG0(mutex_assert_owned_str)
        jmp     4f
 3:
-       LOAD_REG_ARG1(LMTX_REG)
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(%rdx)
        LOAD_STRING_ARG0(mutex_assert_not_owned_str)
 4:
        CALL_PANIC()
 
 
 lck_mtx_destroyed:
-       LOAD_REG_ARG1(LMTX_REG)
+       ALIGN_STACK()
+       LOAD_PTR_ARG1(%rdx)
        LOAD_STRING_ARG0(mutex_interlock_destroyed_str)
        CALL_PANIC()
        
@@ -1396,399 +947,465 @@ mutex_interlock_destroyed_str:
  * lck_mtx_try_lock()
  * lck_mtx_unlock()
  * lck_mtx_lock_spin()
+ * lck_mtx_lock_spin_always()
+ * lck_mtx_try_lock_spin()
+ * lck_mtx_try_lock_spin_always()
  * lck_mtx_convert_spin()
  */
+NONLEAF_ENTRY(lck_mtx_lock_spin_always)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+       jmp     Llmls_avoid_check
        
 NONLEAF_ENTRY(lck_mtx_lock_spin)
-       LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
+       mov     %rdi, %rdx              /* fetch lock pointer */
 
        CHECK_PREEMPTION_LEVEL()
+Llmls_avoid_check:
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llmls_slow
+Llmls_try:                             /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_SPIN_MSK), %ecx
 
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* is the interlock held */
-       je      Llmls_enter                     /* no - can't be INDIRECT or DESTROYED */
-
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-       cmp     $(MUTEX_DESTROYED), LMTX_A_REG  /* check to see if its marked destroyed */
-       je      lck_mtx_destroyed
-       cmp     $(MUTEX_IND), LMTX_A_REG        /* Is this an indirect mutex */
-       jne     Llmls_loop
-
-       LMTX_ENTER_EXTENDED
-
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_SPIN_MSK), LMTX_C_REG
-       je      Llmls_loop
-
-       LMTX_UPDATE_MISS
-Llmls_loop:
-       PAUSE
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* is the interlock held */
-       jne     Llmls_loop
-Llmls_enter:
-       test    $(M_MLOCKED_MSK), LMTX_C_REG    /* is the mutex locked */
-       jne     Llml_contended                  /* fall back to normal mutex handling */
-
-       PUSHF                                   /* save interrupt state */
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-       CLI                                     /* disable interrupts */
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     1f
-
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of interlock */
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmls_busy_disabled
 
-       PREEMPTION_DISABLE
-       POPF                            /* restore interrupt state */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of interlock */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:     
+#endif /* MACH_LDEBUG */
 
-       LMTX_CHK_EXTENDED_EXIT
        /* return with the interlock held and preemption disabled */
        leave
 #if    CONFIG_DTRACE
        LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point)
        ret
-       /* inherit lock pointer in LMTX_REG above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, LMTX_REG)
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, %rdx)
 #endif
        ret
 
-1:     
-       POPF                            /* restore interrupt state */
-       jmp     Llmls_loop
+Llmls_slow:    
+       test    $M_ILOCKED_MSK, %ecx            /* is the interlock held */
+       jz      Llml_contended                  /* no, must have been the mutex */
 
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex */
+       jne     Llmls_loop                      /* no... must be interlocked */
 
-       
-NONLEAF_ENTRY(lck_mtx_lock)
-       LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
+       LMTX_ENTER_EXTENDED
 
-       CHECK_PREEMPTION_LEVEL()
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_SPIN_MSK), %ecx
+       jz      Llmls_loop1
 
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* is the interlock held */
-       je      Llml_enter                      /* no - can't be INDIRECT or DESTROYED */
+       LMTX_UPDATE_MISS                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llmls_loop:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+Llmls_loop1:
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+       jz      Llmls_try
+       test    $(M_MLOCKED_MSK), %ecx
+       jnz     Llml_contended                  /* mutex owned by someone else, go contend for it */
+       jmp     Llmls_loop
 
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-       cmp     $(MUTEX_DESTROYED), LMTX_A_REG  /* check to see if its marked destroyed */
-       je      lck_mtx_destroyed
-       cmp     $(MUTEX_IND), LMTX_A_REG        /* Is this an indirect mutex? */
-       jne     Llml_loop
+Llmls_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmls_loop
 
-       LMTX_ENTER_EXTENDED
 
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_SPIN_MSK), LMTX_C_REG
-       je      Llml_loop
+       
+NONLEAF_ENTRY(lck_mtx_lock)
+       mov     %rdi, %rdx              /* fetch lock pointer */
 
-       LMTX_UPDATE_MISS
-Llml_loop:
-       PAUSE
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       CHECK_PREEMPTION_LEVEL()
 
-       test    $(M_ILOCKED_MSK), LMTX_C_REG
-       jne     Llml_loop
-Llml_enter:
-       test    $(M_MLOCKED_MSK), LMTX_C_REG
-       jne     Llml_contended                  /* mutex owned by someone else, go contend for it */
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llml_slow
+Llml_try:                              /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
 
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_MLOCKED_MSK), LMTX_C_REG
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     Llml_loop
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llml_busy_disabled
 
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-Llml_acquired:
-       testl   $(M_WAITERS_MSK), M_STATE(LMTX_REG)
-       je      1f
+       testl   $(M_WAITERS_MSK), M_STATE(%rdx)
+       jz      Llml_finish
 
        LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
-1:     
-       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+
+Llml_finish:
+       andl    $(~M_ILOCKED_MSK), M_STATE(%rdx)
+       PREEMPTION_ENABLE
+       
+       cmp     %rdx, %rdi              /* is this an extended mutex */
        jne     2f
 
        leave
 #if    CONFIG_DTRACE
        LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point)
        ret
-       /* inherit lock pointer in LMTX_REG above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, LMTX_REG)
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, %rdx)
 #endif
        ret
 2:     
-       LMTX_EXIT_EXTENDED
        leave
 #if    CONFIG_DTRACE
        LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point)
        ret
-       /* inherit lock pointer in LMTX_REG above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG)
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, %rdx)
 #endif
        ret
+
        
+Llml_slow:
+       test    $M_ILOCKED_MSK, %ecx            /* is the interlock held */
+       jz      Llml_contended                  /* no, must have been the mutex */
+       
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     Llml_loop                       /* no... must be interlocked */
 
+       LMTX_ENTER_EXTENDED
+
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_SPIN_MSK), %ecx
+       jz      Llml_loop1
+
+       LMTX_UPDATE_MISS                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llml_loop:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+Llml_loop1:
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+       jz      Llml_try
+       test    $(M_MLOCKED_MSK), %ecx
+       jnz     Llml_contended                  /* mutex owned by someone else, go contend for it */
+       jmp     Llml_loop
+
+Llml_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llml_loop
+
+       
 Llml_contended:
-       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       cmp     %rdx, %rdi              /* is this an extended mutex */
        je      0f
        LMTX_UPDATE_MISS
 0:     
        LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86)
 
-       test    LMTX_RET_REG, LMTX_RET_REG
-       je      Llml_acquired           /* acquired mutex */
-       cmp     $1, LMTX_RET_REG        /* check for direct wait status */
+       test    %rax, %rax
+       jz      Llml_acquired           /* acquired mutex, interlock held and preemption disabled */
+
+       cmp     $1, %rax                /* check for direct wait status */
        je      2f
-       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       cmp     %rdx, %rdi              /* is this an extended mutex */
        je      2f
        LMTX_UPDATE_DIRECT_WAIT
 2:     
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_ILOCKED_MSK), LMTX_C_REG
-       jne     6f
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK), %ecx
+       jnz     6f
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), %ecx  /* try to take the interlock */
 
-       PUSHF                                   /* save state of interrupt mask */
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_ILOCKED_MSK), LMTX_C_REG    /* try to take the interlock */
-       CLI                                     /* disable interrupts */
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
        jne     5f
 
-       test    $(M_MLOCKED_MSK), LMTX_C_REG    /* we've got the interlock and */
-       jne     3f
-       or      $(M_MLOCKED_MSK), LMTX_C_REG    /* the mutex is free... grab it directly */
-       and     $(~M_ILOCKED_MSK), LMTX_C_REG
+       test    $(M_MLOCKED_MSK), %ecx  /* we've got the interlock and */
+       jnz     3f
+       or      $(M_MLOCKED_MSK), %ecx  /* the mutex is free... grab it directly */
+       mov     %ecx, M_STATE(%rdx)
        
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
-       mov     LMTX_C_REG32, M_STATE(LMTX_REG) /* now drop the interlock */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-       POPF                            /* restore interrupt state */
-       jmp     Llml_acquired
-3:                                     /* interlock held, mutex busy */
-       PREEMPTION_DISABLE
-       POPF                            /* restore interrupt state */
+Llml_acquired:
+       testl   $(M_WAITERS_MSK), M_STATE(%rdx)
+       jnz     1f
+       mov     M_OWNER(%rdx), %rax
+       mov     TH_WAS_PROMOTED_ON_WAKEUP(%rax), %eax
+       test    %eax, %eax
+       jz      Llml_finish
+1:     
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+       jmp     Llml_finish
 
-       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+3:                                     /* interlock held, mutex busy */
+       cmp     %rdx, %rdi              /* is this an extended mutex */
        je      4f
        LMTX_UPDATE_WAIT
 4:     
        LMTX_CALLEXT1(lck_mtx_lock_wait_x86)
        jmp     Llml_contended
 5:     
-       POPF                            /* restore interrupt state */
+       PREEMPTION_ENABLE
 6:
        PAUSE
        jmp     2b
        
 
-       
-NONLEAF_ENTRY(lck_mtx_try_lock_spin)
-       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
+NONLEAF_ENTRY(lck_mtx_try_lock_spin_always)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+       jmp     Llmts_avoid_check
 
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* is the interlock held */
-       je      Llmts_enter                     /* no - can't be INDIRECT or DESTROYED */
+NONLEAF_ENTRY(lck_mtx_try_lock_spin)
+       mov     %rdi, %rdx              /* fetch lock pointer */
 
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-       cmp     $(MUTEX_DESTROYED), LMTX_A_REG  /* check to see if its marked destroyed */
-       je      lck_mtx_destroyed
-       cmp     $(MUTEX_IND), LMTX_A_REG        /* Is this an indirect mutex? */
-       jne     Llmts_enter
+Llmts_avoid_check:
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llmts_slow
+Llmts_try:                             /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_SPIN_MSK), %rcx
 
-       LMTX_ENTER_EXTENDED
-Llmts_loop:
-       PAUSE
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-Llmts_enter:
-       test    $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-       jne     Llmts_fail
-       test    $(M_ILOCKED_MSK), LMTX_C_REG
-       jne     Llmts_loop
-
-       PUSHF                                   /* save interrupt state */
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-       CLI                                     /* disable interrupts */
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     3f
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmts_busy_disabled
 
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
-
-       PREEMPTION_DISABLE
-       POPF                            /* restore interrupt state */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-       LMTX_CHK_EXTENDED_EXIT
        leave
 
 #if    CONFIG_DTRACE
-       mov     $1, LMTX_RET_REG        /* return success */
+       mov     $1, %rax                        /* return success */
        LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point)
        ret
-       /* inherit lock pointer in LMTX_REG above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, LMTX_REG)
+       /* inherit lock pointer in %rdx above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, %rdx)
 #endif
-       mov     $1, LMTX_RET_REG        /* return success */
+       mov     $1, %rax                        /* return success */
        ret
-3:     
-       POPF                            /* restore interrupt state */
-       jmp     Llmts_loop
 
-       
-       
-NONLEAF_ENTRY(lck_mtx_try_lock)
-       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
+Llmts_slow:
+       test    $(M_ILOCKED_MSK), %ecx  /* is the interlock held */
+       jz      Llmts_fail                      /* no, must be held as a mutex */
 
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* is the interlock held */
-       je      Llmt_enter                      /* no - can't be INDIRECT or DESTROYED */
-
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-       cmp     $(MUTEX_DESTROYED), LMTX_A_REG  /* check to see if its marked destroyed */
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
        je      lck_mtx_destroyed
-       cmp     $(MUTEX_IND), LMTX_A_REG        /* Is this an indirect mutex? */
-       jne     Llmt_enter
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     Llmts_loop1
 
        LMTX_ENTER_EXTENDED
-Llmt_loop:
+Llmts_loop:
        PAUSE
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-Llmt_enter:
-       test    $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-       jne     Llmt_fail
-       test    $(M_ILOCKED_MSK), LMTX_C_REG
-       jne     Llmt_loop
+       mov     M_STATE(%rdx), %ecx
+Llmts_loop1:
+       test    $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx
+       jnz     Llmts_fail
+       test    $(M_ILOCKED_MSK), %ecx
+       jz      Llmts_try
+       jmp     Llmts_loop
+       
+Llmts_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmts_loop
+
 
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_MLOCKED_MSK), LMTX_C_REG
+       
+NONLEAF_ENTRY(lck_mtx_try_lock)
+       mov     %rdi, %rdx              /* fetch lock pointer */
+
+       mov     M_STATE(%rdx), %ecx
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* is the interlock or mutex held */
+       jnz     Llmt_slow       
+Llmt_try:                              /* no - can't be INDIRECT, DESTROYED or locked */
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
+       
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     Llmt_loop
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmt_busy_disabled
 
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-       LMTX_CHK_EXTENDED_EXIT
+       test    $(M_WAITERS_MSK), %ecx
+       jz      0f
 
-       test    $(M_WAITERS_MSK), LMTX_C_REG
-       je      2f
        LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
-2:
-       leave
+0:
+       andl    $(~M_ILOCKED_MSK), M_STATE(%rdx)
+       PREEMPTION_ENABLE
 
+       leave
 #if    CONFIG_DTRACE
-       mov     $1, LMTX_RET_REG                /* return success */
+       mov     $1, %rax                        /* return success */
        /* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */
        LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point)
        ret
-       /* inherit lock pointer in LMTX_REG from above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, LMTX_REG)
+       /* inherit lock pointer in %rdx from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, %rdx)
 #endif 
-       mov     $1, LMTX_RET_REG                /* return success */
+       mov     $1, %rax                        /* return success */
        ret
 
+Llmt_slow:
+       test    $(M_ILOCKED_MSK), %ecx  /* is the interlock held */
+       jz      Llmt_fail                       /* no, must be held as a mutex */
+
+       cmp     $(MUTEX_DESTROYED), %ecx        /* check to see if its marked destroyed */
+       je      lck_mtx_destroyed
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     Llmt_loop
+
+       LMTX_ENTER_EXTENDED
+Llmt_loop:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+Llmt_loop1:
+       test    $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx
+       jnz     Llmt_fail
+       test    $(M_ILOCKED_MSK), %ecx
+       jz      Llmt_try
+       jmp     Llmt_loop
+
+Llmt_busy_disabled:
+       PREEMPTION_ENABLE
+       jmp     Llmt_loop
+
 
 Llmt_fail:
 Llmts_fail:
-       LMTX_CHK_EXTENDED               /* is this an extended mutex */
+       cmp     %rdx, %rdi                      /* is this an extended mutex */
        je      0f
        LMTX_UPDATE_MISS
-       LMTX_EXIT_EXTENDED
 0:
-       xor     LMTX_RET_REG, LMTX_RET_REG
+       xor     %rax, %rax
        NONLEAF_RET
 
 
 
 NONLEAF_ENTRY(lck_mtx_convert_spin)
-       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
+       mov     %rdi, %rdx                      /* fetch lock pointer */
 
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-       cmp     $(MUTEX_IND), LMTX_A_REG        /* Is this an indirect mutex? */
-       cmove   M_PTR(LMTX_REG), LMTX_REG       /* If so, take indirection */
+       mov     M_STATE(%rdx), %ecx
+       cmp     $(MUTEX_IND), %ecx              /* Is this an indirect mutex? */
+       jne     0f
+       mov     M_PTR(%rdx), %rdx               /* If so, take indirection */
+       mov     M_STATE(%rdx), %ecx
+0:
+       test    $(M_MLOCKED_MSK), %ecx          /* already owned as a mutex, just return */
+       jnz     2f
+       test    $(M_WAITERS_MSK), %ecx          /* are there any waiters? */
+       jz      1f
 
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_MLOCKED_MSK), LMTX_C_REG    /* already owned as a mutex, just return */
-       jne     2f
+       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+       mov     M_STATE(%rdx), %ecx
 1:     
-       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG    /* convert from spin version to mutex */
-       or      $(M_MLOCKED_MSK), LMTX_C_REG
-       mov     LMTX_C_REG32, M_STATE(LMTX_REG)         /* since I own the interlock, I don't need an atomic update */
+       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK)), %ecx  /* convert from spin version to mutex */
+       or      $(M_MLOCKED_MSK), %ecx
+       mov     %ecx, M_STATE(%rdx)             /* since I own the interlock, I don't need an atomic update */
 
-       PREEMPTION_ENABLE                       /* only %eax is consumed */
-
-       test    $(M_WAITERS_MSK), LMTX_C_REG    /* are there any waiters? */
-       je      2f
-
-       LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+       PREEMPTION_ENABLE
 2:     
        NONLEAF_RET
 
+       
 
-#if    defined(__i386__)
 NONLEAF_ENTRY(lck_mtx_unlock)
-       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-       test    LMTX_A_REG, LMTX_A_REG
-       jnz     Llmu_prim
-       leave
-       ret
-NONLEAF_ENTRY(lck_mtx_unlock_darwin10)
-#else
-NONLEAF_ENTRY(lck_mtx_unlock)
-#endif
-       LOAD_LMTX_REG(B_ARG0)                   /* fetch lock pointer */
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
+       mov     %rdi, %rdx              /* fetch lock pointer */
+Llmu_entry:
+       mov     M_STATE(%rdx), %ecx
 Llmu_prim:
-       cmp     $(MUTEX_IND), LMTX_A_REG        /* Is this an indirect mutex? */
+       cmp     $(MUTEX_IND), %ecx      /* Is this an indirect mutex? */
        je      Llmu_ext
-0:     
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       test    $(M_MLOCKED_MSK), LMTX_C_REG    /* check for full mutex */
-       jne     1f
-
-       xor     LMTX_A_REG, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)
-       mov     LMTX_C_REG, LMTX_A_REG                  /* keep original state in %ecx for later evaluation */
-       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG
-       mov     LMTX_A_REG32, M_STATE(LMTX_REG)         /* since I own the interlock, I don't need an atomic update */
-
-       PREEMPTION_ENABLE                       /* need to re-enable preemption - clobbers eax */
-       jmp     2f
-1:     
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* have to wait for interlock to clear */
-       jne     7f
 
-       PUSHF                                   /* save interrupt state */
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       and     $(~M_MLOCKED_MSK), LMTX_C_REG   /* drop mutex */
-       or      $(M_ILOCKED_MSK), LMTX_C_REG    /* pick up interlock */
-       CLI
+Llmu_chktype:
+       test    $(M_MLOCKED_MSK), %ecx  /* check for full mutex */
+       jz      Llmu_unlock
+Llmu_mutex:
+       test    $(M_ILOCKED_MSK), %rcx  /* have to wait for interlock to clear */
+       jnz     Llmu_busy
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       and     $(~M_MLOCKED_MSK), %ecx /* drop mutex */
+       or      $(M_ILOCKED_MSK), %ecx  /* pick up interlock */
+
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     6f                              /* branch on failure to spin loop */
-
-       xor     LMTX_A_REG, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)
-       mov     LMTX_C_REG, LMTX_A_REG                  /* keep original state in %ecx for later evaluation */
-       and     $(~(M_ILOCKED_MSK | M_PROMOTED_MSK)), LMTX_A_REG
-       mov     LMTX_A_REG32, M_STATE(LMTX_REG)         /* since I own the interlock, I don't need an atomic update */
-       POPF                                            /* restore interrupt state */
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     Llmu_busy_disabled      /* branch on failure to spin loop */
+
+Llmu_unlock:
+       xor     %rax, %rax
+       mov     %rax, M_OWNER(%rdx)
+       mov     %rcx, %rax              /* keep original state in %ecx for later evaluation */
+       and     $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), %rax
+
+       test    $(M_WAITERS_MSK), %eax
+       jz      2f
+       dec     %eax                    /* decrement waiter count */
 2:     
-       test    $(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG
-       je      3f
-       and     $(M_PROMOTED_MSK), LMTX_C_REG
+       mov     %eax, M_STATE(%rdx)     /* since I own the interlock, I don't need an atomic update */
 
-       LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG)
+#if    MACH_LDEBUG
+       /* perform lock statistics after drop to prevent delay */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       test    %rax, %rax
+       jz      1f
+       decl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
+
+       test    $(M_PROMOTED_MSK | M_WAITERS_MSK), %ecx
+       jz      3f
+
+       LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, %rcx)
 3:     
-       LMTX_CHK_EXTENDED
+       PREEMPTION_ENABLE
+
+       cmp     %rdx, %rdi
        jne     4f
 
        leave
@@ -1796,8 +1413,8 @@ Llmu_prim:
        /* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */
        LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point)
        ret
-       /* inherit lock pointer in LMTX_REG from above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, LMTX_REG)
+       /* inherit lock pointer in %rdx from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %rdx)
 #endif
        ret
 4:     
@@ -1806,483 +1423,137 @@ Llmu_prim:
        /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */
        LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point)
        ret
-       /* inherit lock pointer in LMTX_REG from above */
-       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG)
+       /* inherit lock pointer in %rdx from above */
+       LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, %rdx)
 #endif
        ret
-6:
-       POPF                            /* restore interrupt state */
-7:
-       PAUSE
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-       jmp     1b
-Llmu_ext:
-       mov     M_PTR(LMTX_REG), LMTX_REG
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_C_REG
-       CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG)
-       jmp 0b
 
 
-LEAF_ENTRY(lck_mtx_lock_decr_waiter)
-       LOAD_LMTX_REG(L_ARG0)                   /* fetch lock pointer - no indirection here */
-1:     
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+Llmu_busy_disabled:
+       PREEMPTION_ENABLE
+Llmu_busy:
+       PAUSE
+       mov     M_STATE(%rdx), %ecx
+       jmp     Llmu_mutex
 
-       test    $(M_WAITERS_MSK), LMTX_C_REG
-       je      2f
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* have to wait for interlock to clear */
-       jne     3f
+Llmu_ext:
+       mov     M_PTR(%rdx), %rdx
+       mov     M_OWNER(%rdx), %rax
+       mov     %gs:CPU_ACTIVE_THREAD, %rcx
+       CHECK_UNLOCK(%rcx, %rax)
+       mov     M_STATE(%rdx), %ecx
+       jmp     Llmu_chktype
 
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       dec     LMTX_C_REG                      /* decrement waiter count */
-       lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     3f                              /* branch on failure to spin loop */
 
-       mov     $1, LMTX_RET_REG
-       LEAF_RET
-2:     
-       xor     LMTX_RET_REG, LMTX_RET_REG
-       LEAF_RET
-3:     
-       PAUSE
-       jmp     1b
        
+LEAF_ENTRY(lck_mtx_ilk_try_lock)
+       mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
 
-       
-LEAF_ENTRY(lck_mtx_lock_get_pri)
-       LOAD_LMTX_REG(L_ARG0)                   /* fetch lock pointer - no indirection here */
-1:     
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       mov     M_STATE(%rdx), %ecx
 
-       test    $(M_WAITERS_MSK), LMTX_C_REG
-       jne     2f
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* have to wait for interlock to clear */
-       jne     3f
+       test    $(M_ILOCKED_MSK), %ecx  /* can't have the interlock yet */
+       jnz     3f
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), %ecx
 
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       and     $(~M_PRIORITY_MSK), LMTX_C_REG  /* no waiters, reset mutex priority to 0 */
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     3f                              /* branch on failure to spin loop */
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     2f                      /* return failure after re-enabling preemption */
 
-       xor     LMTX_RET_REG, LMTX_RET_REG      /* return mutex priority == 0 */
+       mov     $1, %rax                /* return success with preemption disabled */
        LEAF_RET
 2:     
-       mov     LMTX_C_REG, LMTX_RET_REG
-       and     $(M_PRIORITY_MSK), LMTX_RET_REG
-       shr     $16, LMTX_RET_REG               /* return current mutex priority */
-       LEAF_RET
+       PREEMPTION_ENABLE               /* need to re-enable preemption */
 3:     
-       PAUSE
-       jmp     1b
-       
+       xor     %rax, %rax              /* return failure */
+       LEAF_RET
        
 
-
 LEAF_ENTRY(lck_mtx_ilk_unlock)
-       LOAD_LMTX_REG(L_ARG0)                   /* fetch lock pointer - no indirection here */
+       mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
 
-       andl    $(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
+       andl    $(~M_ILOCKED_MSK), M_STATE(%rdx)
 
-       PREEMPTION_ENABLE                       /* need to re-enable preemption */
+       PREEMPTION_ENABLE               /* need to re-enable preemption */
 
        LEAF_RET
-       
 
        
 LEAF_ENTRY(lck_mtx_lock_grab_mutex)
-       LOAD_LMTX_REG(L_ARG0)                   /* fetch lock pointer - no indirection here */
+       mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
 
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
+       mov     M_STATE(%rdx), %ecx
 
-       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG    /* can't have the mutex yet */
-       jne     2f
+       test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx  /* can't have the mutex yet */
+       jnz     3f
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx
 
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_MLOCKED_MSK), LMTX_C_REG
+       PREEMPTION_DISABLE
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
        jne     2f                              /* branch on failure to spin loop */
 
-       mov     %gs:CPU_ACTIVE_THREAD, LMTX_A_REG
-       mov     LMTX_A_REG, M_OWNER(LMTX_REG)   /* record owner of mutex */
+       mov     %gs:CPU_ACTIVE_THREAD, %rax
+       mov     %rax, M_OWNER(%rdx)     /* record owner of mutex */
+#if    MACH_LDEBUG
+       test    %rax, %rax
+       jz      1f
+       incl    TH_MUTEX_COUNT(%rax)    /* lock statistic */
+1:
+#endif /* MACH_LDEBUG */
 
-       mov     $1, LMTX_RET_REG                /* return success */
+       mov     $1, %rax                /* return success */
        LEAF_RET
 2:                                             
-       xor     LMTX_RET_REG, LMTX_RET_REG      /* return failure */
-       LEAF_RET
-       
-
-
-LEAF_ENTRY(lck_mtx_lock_mark_promoted)
-       LOAD_LMTX_REG(L_ARG0)                   /* fetch lock pointer - no indirection here */
-1:     
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-
-       test    $(M_PROMOTED_MSK), LMTX_C_REG
-       jne     3f
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* have to wait for interlock to clear */
-       jne     2f
-
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_PROMOTED_MSK), LMTX_C_REG
-       lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     2f                              /* branch on failure to spin loop */
-
-       mov     $1, LMTX_RET_REG
-       LEAF_RET
-2:     
-       PAUSE
-       jmp     1b
+       PREEMPTION_ENABLE
 3:
-       xor     LMTX_RET_REG, LMTX_RET_REG
+       xor     %rax, %rax      /* return failure */
        LEAF_RET
+       
 
 
-       
 LEAF_ENTRY(lck_mtx_lock_mark_destroyed)
-       LOAD_LMTX_REG(L_ARG0)
+       mov     %rdi, %rdx
 1:
-       mov     M_OWNER(LMTX_REG), LMTX_A_REG
-
-       cmp     $(MUTEX_DESTROYED), LMTX_A_REG  /* check to see if its marked destroyed */
-       je      3f
-       cmp     $(MUTEX_IND), LMTX_A_REG        /* Is this an indirect mutex? */
+       mov     M_STATE(%rdx), %ecx
+       cmp     $(MUTEX_IND), %ecx      /* Is this an indirect mutex? */
        jne     2f
 
-       movl    $(MUTEX_DESTROYED), M_OWNER(LMTX_REG)   /* convert to destroyed state */
+       movl    $(MUTEX_DESTROYED), M_STATE(%rdx)       /* convert to destroyed state */
        jmp     3f
 2:     
-       mov     M_STATE(LMTX_REG), LMTX_C_REG32
-
-       test    $(M_ILOCKED_MSK), LMTX_C_REG    /* have to wait for interlock to clear */
-       jne     5f
+       test    $(M_ILOCKED_MSK), %rcx  /* have to wait for interlock to clear */
+       jnz     5f
 
-       PUSHF                                   /* save interrupt state */
-       mov     LMTX_C_REG, LMTX_A_REG          /* eax contains snapshot for cmpxchgl */
-       or      $(M_ILOCKED_MSK), LMTX_C_REG
-       CLI
+       PREEMPTION_DISABLE
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), %ecx
        lock
-       cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */
-       jne     4f                              /* branch on failure to spin loop */
-       movl    $(MUTEX_DESTROYED), M_OWNER(LMTX_REG)   /* convert to destroyed state */
-       POPF                                    /* restore interrupt state */
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     4f                      /* branch on failure to spin loop */
+       movl    $(MUTEX_DESTROYED), M_STATE(%rdx)       /* convert to destroyed state */
+       PREEMPTION_ENABLE
 3:
-       LEAF_RET                                /* return with M_ILOCKED set */
+       LEAF_RET                        /* return with M_ILOCKED set */
 4:
-       POPF                                    /* restore interrupt state */
+       PREEMPTION_ENABLE
 5:
        PAUSE
        jmp     1b
 
-       
-       
-LEAF_ENTRY(_disable_preemption)
-#if    MACH_RT
-       _DISABLE_PREEMPTION
-#endif /* MACH_RT */
-       LEAF_RET
-
-LEAF_ENTRY(_enable_preemption)
-#if    MACH_RT
-#if    MACH_ASSERT
-       cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
-       jg      1f
-#if __i386__
-       pushl   %gs:CPU_PREEMPTION_LEVEL
-#else
-       movl    %gs:CPU_PREEMPTION_LEVEL,%esi
-#endif
-       LOAD_STRING_ARG0(_enable_preemption_less_than_zero)
-       CALL_PANIC()
-       hlt
-       .cstring
-_enable_preemption_less_than_zero:
-       .asciz  "_enable_preemption: preemption_level(%d)  < 0!"
-       .text
-1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION
-#endif /* MACH_RT */
-       LEAF_RET
-
-LEAF_ENTRY(_enable_preemption_no_check)
-#if    MACH_RT
-#if    MACH_ASSERT
-       cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
-       jg      1f
-       LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero)
-       CALL_PANIC()
-       hlt
-       .cstring
-_enable_preemption_no_check_less_than_zero:
-       .asciz  "_enable_preemption_no_check: preemption_level <= 0!"
-       .text
-1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION_NO_CHECK
-#endif /* MACH_RT */
-       LEAF_RET
-       
-       
-LEAF_ENTRY(_mp_disable_preemption)
-#if    MACH_RT
-       _DISABLE_PREEMPTION
-#endif /* MACH_RT */
-       LEAF_RET
-
-LEAF_ENTRY(_mp_enable_preemption)
-#if    MACH_RT
-#if    MACH_ASSERT
-       cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
-       jg      1f
-#if __i386__
-       pushl   %gs:CPU_PREEMPTION_LEVEL
-#else
-       movl    %gs:CPU_PREEMPTION_LEVEL,%esi
-#endif
-       LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero)
+LEAF_ENTRY(preemption_underflow_panic)
+       FRAME
+       incl    %gs:CPU_PREEMPTION_LEVEL
+       ALIGN_STACK()
+       LOAD_STRING_ARG0(16f)
        CALL_PANIC()
        hlt
-       .cstring
-_mp_enable_preemption_less_than_zero:
-       .asciz "_mp_enable_preemption: preemption_level (%d) <= 0!"
+       .data
+16:    String  "Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock"
        .text
-1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION
-#endif /* MACH_RT */
-       LEAF_RET
-
-LEAF_ENTRY(_mp_enable_preemption_no_check)
-#if    MACH_RT
-#if    MACH_ASSERT
-       cmpl    $0,%gs:CPU_PREEMPTION_LEVEL
-       jg      1f
-       LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero)
-       CALL_PANIC()
-       hlt
-       .cstring
-_mp_enable_preemption_no_check_less_than_zero:
-       .asciz "_mp_enable_preemption_no_check: preemption_level <= 0!"
-       .text
-1:
-#endif /* MACH_ASSERT */
-       _ENABLE_PREEMPTION_NO_CHECK
-#endif /* MACH_RT */
-       LEAF_RET
-       
-#if __i386__
-       
-LEAF_ENTRY(i_bit_set)
-       movl    L_ARG0,%edx
-       movl    L_ARG1,%eax
-       lock
-       bts     %edx,(%eax)
-       LEAF_RET
-
-LEAF_ENTRY(i_bit_clear)
-       movl    L_ARG0,%edx
-       movl    L_ARG1,%eax
-       lock
-       btr     %edx,(%eax)
-       LEAF_RET
-
-
-LEAF_ENTRY(bit_lock)
-       movl    L_ARG0,%ecx
-       movl    L_ARG1,%eax
-1:
-       lock
-       bts     %ecx,(%eax)
-       jb      1b
-       LEAF_RET
-
-
-LEAF_ENTRY(bit_lock_try)
-       movl    L_ARG0,%ecx
-       movl    L_ARG1,%eax
-       lock
-       bts     %ecx,(%eax)
-       jb      bit_lock_failed
-       LEAF_RET                /* %eax better not be null ! */
-bit_lock_failed:
-       xorl    %eax,%eax
-       LEAF_RET
-
-LEAF_ENTRY(bit_unlock)
-       movl    L_ARG0,%ecx
-       movl    L_ARG1,%eax
-       lock
-       btr     %ecx,(%eax)
-       LEAF_RET
 
-/*
- * Atomic primitives, prototyped in kern/simple_lock.h
- */
-LEAF_ENTRY(hw_atomic_add)
-       movl    L_ARG0, %ecx            /* Load address of operand */
-       movl    L_ARG1, %eax            /* Load addend */
-       movl    %eax, %edx
-       lock
-       xaddl   %eax, (%ecx)            /* Atomic exchange and add */
-       addl    %edx, %eax              /* Calculate result */
-       LEAF_RET
-
-LEAF_ENTRY(hw_atomic_sub)
-       movl    L_ARG0, %ecx            /* Load address of operand */
-       movl    L_ARG1, %eax            /* Load subtrahend */
-       negl    %eax
-       movl    %eax, %edx
-       lock
-       xaddl   %eax, (%ecx)            /* Atomic exchange and add */
-       addl    %edx, %eax              /* Calculate result */
-       LEAF_RET
-
-LEAF_ENTRY(hw_atomic_or)
-       movl    L_ARG0, %ecx            /* Load address of operand */
-       movl    (%ecx), %eax
-1:
-       movl    L_ARG1, %edx            /* Load mask */
-       orl     %eax, %edx
-       lock
-       cmpxchgl        %edx, (%ecx)    /* Atomic CAS */
-       jne     1b
-       movl    %edx, %eax              /* Result */
-       LEAF_RET
-/*
- * A variant of hw_atomic_or which doesn't return a value.
- * The implementation is thus comparatively more efficient.
- */
-
-LEAF_ENTRY(hw_atomic_or_noret)
-       movl    L_ARG0, %ecx            /* Load address of operand */
-       movl    L_ARG1, %edx            /* Load mask */
-       lock
-       orl     %edx, (%ecx)            /* Atomic OR */
-       LEAF_RET
-
-LEAF_ENTRY(hw_atomic_and)
-       movl    L_ARG0, %ecx            /* Load address of operand */
-       movl    (%ecx), %eax
-1:
-       movl    L_ARG1, %edx            /* Load mask */
-       andl    %eax, %edx
-       lock
-       cmpxchgl        %edx, (%ecx)    /* Atomic CAS */
-       jne     1b
-       movl    %edx, %eax              /* Result */
-       LEAF_RET
-/*
- * A variant of hw_atomic_and which doesn't return a value.
- * The implementation is thus comparatively more efficient.
- */
-
-LEAF_ENTRY(hw_atomic_and_noret)
-       movl    L_ARG0, %ecx            /* Load address of operand */
-       movl    L_ARG1, %edx            /* Load mask */
-       lock
-       andl    %edx, (%ecx)            /* Atomic AND */
-       LEAF_RET
-
-#else /* !__i386__ */
-
-LEAF_ENTRY(i_bit_set)
-       lock
-       bts     %edi,(%rsi)
-       LEAF_RET
-
-LEAF_ENTRY(i_bit_clear)
-       lock
-       btr     %edi,(%rsi)
-       LEAF_RET
-
-
-LEAF_ENTRY(bit_lock)
-1:
-       lock
-       bts     %edi,(%rsi)
-       jb      1b
-       LEAF_RET
-
-
-LEAF_ENTRY(bit_lock_try)
-       lock
-       bts     %edi,(%rsi)
-       jb      bit_lock_failed
-       movl    $1, %eax
-       LEAF_RET
-bit_lock_failed:
-       xorl    %eax,%eax
-       LEAF_RET
-
-LEAF_ENTRY(bit_unlock)
-       lock
-       btr     %edi,(%rsi)
-       LEAF_RET
-
-       
-/*
- * Atomic primitives, prototyped in kern/simple_lock.h
- */
-LEAF_ENTRY(hw_atomic_add)
-       movl    %esi, %eax              /* Load addend */
-       lock
-       xaddl   %eax, (%rdi)            /* Atomic exchange and add */
-       addl    %esi, %eax              /* Calculate result */
-       LEAF_RET
-
-LEAF_ENTRY(hw_atomic_sub)
-       negl    %esi
-       movl    %esi, %eax
-       lock
-       xaddl   %eax, (%rdi)            /* Atomic exchange and add */
-       addl    %esi, %eax              /* Calculate result */
-       LEAF_RET
-
-LEAF_ENTRY(hw_atomic_or)
-       movl    (%rdi), %eax
-1:
-       movl    %esi, %edx              /* Load mask */
-       orl     %eax, %edx
-       lock
-       cmpxchgl        %edx, (%rdi)    /* Atomic CAS */
-       jne     1b
-       movl    %edx, %eax              /* Result */
-       LEAF_RET
-/*
- * A variant of hw_atomic_or which doesn't return a value.
- * The implementation is thus comparatively more efficient.
- */
-
-LEAF_ENTRY(hw_atomic_or_noret)
-       lock
-       orl     %esi, (%rdi)            /* Atomic OR */
-       LEAF_RET
-
-
-LEAF_ENTRY(hw_atomic_and)
-       movl    (%rdi), %eax
-1:
-       movl    %esi, %edx              /* Load mask */
-       andl    %eax, %edx
-       lock
-       cmpxchgl        %edx, (%rdi)    /* Atomic CAS */
-       jne     1b
-       movl    %edx, %eax              /* Result */
-       LEAF_RET
-/*
- * A variant of hw_atomic_and which doesn't return a value.
- * The implementation is thus comparatively more efficient.
- */
-
-LEAF_ENTRY(hw_atomic_and_noret)
-       lock
-       andl    %esi, (%rdi)            /* Atomic OR */
-       LEAF_RET
 
-#endif /* !__i386 __ */