X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/b0d623f7f2ae71ed96e60569f61f9a9a27016e80..4d15aeb193b2c68f1d38666c317f8d3734f5f083:/osfmk/i386/i386_lock.s diff --git a/osfmk/i386/i386_lock.s b/osfmk/i386/i386_lock.s index 267b4b0db..f54e040a1 100644 --- a/osfmk/i386/i386_lock.s +++ b/osfmk/i386/i386_lock.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -36,7 +36,6 @@ */ #include -#include #include #include #include @@ -48,47 +47,7 @@ #define PAUSE rep; nop - -#define PUSHF pushf -#define POPF popf -#define CLI cli - - -/* - * When performance isn't the only concern, it's - * nice to build stack frames... - */ -#define BUILD_STACK_FRAMES (GPROF || \ - ((MACH_LDEBUG) && MACH_KDB)) - -#if BUILD_STACK_FRAMES - -/* Stack-frame-relative: */ -#define L_PC B_PC -#define L_ARG0 B_ARG0 -#define L_ARG1 B_ARG1 - -#define LEAF_ENTRY(name) \ - Entry(name); \ - FRAME; \ - MCOUNT - -#define LEAF_ENTRY2(n1,n2) \ - Entry(n1); \ - Entry(n2); \ - FRAME; \ - MCOUNT - -#define LEAF_RET \ - EMARF; \ - ret - -#else /* BUILD_STACK_FRAMES */ - -/* Stack-pointer-relative: */ -#define L_PC S_PC -#define L_ARG0 S_ARG0 -#define L_ARG1 S_ARG1 +#include #define LEAF_ENTRY(name) \ Entry(name) @@ -100,21 +59,16 @@ #define LEAF_RET \ ret -#endif /* BUILD_STACK_FRAMES */ - - /* Non-leaf routines always have a stack frame: */ #define NONLEAF_ENTRY(name) \ Entry(name); \ - FRAME; \ - MCOUNT + FRAME #define NONLEAF_ENTRY2(n1,n2) \ Entry(n1); \ Entry(n2); \ - FRAME; \ - MCOUNT + FRAME #define NONLEAF_RET \ EMARF; \ @@ -123,19 +77,16 @@ /* For x86_64, the varargs ABI requires that %al indicate * how many SSE register contain arguments. In our case, 0 */ -#if __i386__ -#define LOAD_STRING_ARG0(label) pushl $##label ; -#define LOAD_ARG1(x) pushl x ; -#define CALL_PANIC() call EXT(panic) ; -#else +#define ALIGN_STACK() and $0xFFFFFFFFFFFFFFF0, %rsp ; #define LOAD_STRING_ARG0(label) leaq label(%rip), %rdi ; -#define LOAD_ARG1(x) movq x, %rsi ; +#define LOAD_ARG1(x) mov x, %esi ; +#define LOAD_PTR_ARG1(x) mov x, %rsi ; #define CALL_PANIC() xorb %al,%al ; call EXT(panic) ; -#endif #define CHECK_UNLOCK(current, owner) \ cmp current, owner ; \ je 1f ; \ + ALIGN_STACK() ; \ LOAD_STRING_ARG0(2f) ; \ CALL_PANIC() ; \ hlt ; \ @@ -157,6 +108,7 @@ #define CHECK_MUTEX_TYPE() \ cmpl $ MUTEX_TAG,M_TYPE ; \ je 1f ; \ + ALIGN_STACK() ; \ LOAD_STRING_ARG0(2f) ; \ CALL_PANIC() ; \ hlt ; \ @@ -177,7 +129,9 @@ jne 1f ; \ cmpl $0,%gs:CPU_PREEMPTION_LEVEL ; \ je 1f ; \ - LOAD_ARG1(%gs:CPU_PREEMPTION_LEVEL) ; \ + ALIGN_STACK() ; \ + movl %gs:CPU_PREEMPTION_LEVEL, %eax ; \ + LOAD_ARG1(%eax) ; \ LOAD_STRING_ARG0(2f) ; \ CALL_PANIC() ; \ hlt ; \ @@ -192,6 +146,7 @@ #define CHECK_MYLOCK(current, owner) \ cmp current, owner ; \ jne 1f ; \ + ALIGN_STACK() ; \ LOAD_STRING_ARG0(2f) ; \ CALL_PANIC() ; \ hlt ; \ @@ -206,32 +161,45 @@ #define CHECK_MYLOCK(thd) #endif /* MACH_LDEBUG */ - #define PREEMPTION_DISABLE \ - incl %gs:CPU_PREEMPTION_LEVEL - - + incl %gs:CPU_PREEMPTION_LEVEL + +#define PREEMPTION_LEVEL_DEBUG 1 +#if PREEMPTION_LEVEL_DEBUG +#define PREEMPTION_ENABLE \ + decl %gs:CPU_PREEMPTION_LEVEL ; \ + js 17f ; \ + jnz 19f ; \ + testl $AST_URGENT,%gs:CPU_PENDING_AST ; \ + jz 19f ; \ + PUSHF ; \ + testl $EFL_IF, S_PC ; \ + jz 18f ; \ + POPF ; \ + int $(T_PREEMPT) ; \ + jmp 19f ; \ +17: \ + call _preemption_underflow_panic ; \ +18: \ + POPF ; \ +19: +#else #define PREEMPTION_ENABLE \ decl %gs:CPU_PREEMPTION_LEVEL ; \ - jne 9f ; \ + jnz 19f ; \ + testl $AST_URGENT,%gs:CPU_PENDING_AST ; \ + jz 19f ; \ PUSHF ; \ - testl $ EFL_IF,S_PC ; \ - je 8f ; \ - CLI ; \ - movl %gs:CPU_PENDING_AST,%eax ; \ - testl $ AST_URGENT,%eax ; \ - je 8f ; \ - movl %gs:CPU_INTERRUPT_LEVEL,%eax ; \ - testl %eax,%eax ; \ - jne 8f ; \ + testl $EFL_IF, S_PC ; \ + jz 18f ; \ POPF ; \ int $(T_PREEMPT) ; \ - jmp 9f ; \ -8: \ + jmp 19f ; \ +18: \ POPF ; \ -9: +19: +#endif - #if CONFIG_DTRACE @@ -245,39 +213,6 @@ * a "nop" */ -#if defined(__i386__) - -#define LOCKSTAT_LABEL(lab) \ - .data ;\ - .globl lab ;\ - lab: ;\ - .long 9f ;\ - .text ;\ - 9: - -#define LOCKSTAT_RECORD(id, lck) \ - push %ebp ; \ - mov %esp,%ebp ; \ - sub $0x38,%esp /* size of dtrace_probe args */ ; \ - movl _lockstat_probemap + (id * 4),%eax ; \ - test %eax,%eax ; \ - je 9f ; \ - movl $0,36(%esp) ; \ - movl $0,40(%esp) ; \ - movl $0,28(%esp) ; \ - movl $0,32(%esp) ; \ - movl $0,20(%esp) ; \ - movl $0,24(%esp) ; \ - movl $0,12(%esp) ; \ - movl $0,16(%esp) ; \ - movl lck,4(%esp) /* copy lock pointer to arg 1 */ ; \ - movl $0,8(%esp) ; \ - movl %eax,(%esp) ; \ - call *_lockstat_probe ; \ -9: leave - /* ret - left to subsequent code, e.g. return values */ - -#elif defined(__x86_64__) #define LOCKSTAT_LABEL(lab) \ .data ;\ .globl lab ;\ @@ -301,9 +236,7 @@ call *_lockstat_probe(%rip) ; \ 9: leave /* ret - left to subsequent code, e.g. return values */ -#else -#error Unsupported architecture -#endif + #endif /* CONFIG_DTRACE */ /* @@ -311,69 +244,16 @@ * register initially, and then either a byte or register-sized * word is loaded/stored to the pointer */ - -#if defined(__i386__) -#define HW_LOCK_REGISTER %edx -#define LOAD_HW_LOCK_REGISTER mov L_ARG0, HW_LOCK_REGISTER -#define HW_LOCK_THREAD_REGISTER %ecx -#define LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER -#define HW_LOCK_MOV_WORD movl -#define HW_LOCK_EXAM_REGISTER %eax -#elif defined(__x86_64__) -#define HW_LOCK_REGISTER %rdi -#define LOAD_HW_LOCK_REGISTER -#define HW_LOCK_THREAD_REGISTER %rcx -#define LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER -#define HW_LOCK_MOV_WORD movq -#define HW_LOCK_EXAM_REGISTER %rax -#else -#error Unsupported architecture -#endif - -/* - * void hw_lock_init(hw_lock_t) - * - * Initialize a hardware lock. - */ -LEAF_ENTRY(hw_lock_init) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER) /* clear the lock */ - LEAF_RET - /* - * void hw_lock_byte_init(uint8_t *) + * void hw_lock_byte_init(volatile uint8_t *) * * Initialize a hardware byte lock. */ LEAF_ENTRY(hw_lock_byte_init) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - movb $0, (HW_LOCK_REGISTER) /* clear the lock */ + movb $0, (%rdi) /* clear the lock */ LEAF_RET -/* - * void hw_lock_lock(hw_lock_t) - * - * Acquire lock, spinning until it becomes available. - * MACH_RT: also return with preemption disabled. - */ -LEAF_ENTRY(hw_lock_lock) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - LOAD_HW_LOCK_THREAD_REGISTER /* get thread pointer */ - - PREEMPTION_DISABLE -1: - mov (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER /* lock locked? */ - jne 3f /* branch if so */ - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ - jne 3f - movl $1,%eax /* In case this was a timeout call */ - LEAF_RET /* if yes, then nothing left to do */ -3: - PAUSE /* pause for hyper-threading */ - jmp 1b /* try again */ - /* * void hw_lock_byte_lock(uint8_t *lock_byte) * @@ -382,155 +262,19 @@ LEAF_ENTRY(hw_lock_lock) */ LEAF_ENTRY(hw_lock_byte_lock) - LOAD_HW_LOCK_REGISTER /* Load lock pointer */ PREEMPTION_DISABLE movl $1, %ecx /* Set lock value */ 1: - movb (HW_LOCK_REGISTER), %al /* Load byte at address */ + movb (%rdi), %al /* Load byte at address */ testb %al,%al /* lock locked? */ jne 3f /* branch if so */ - lock; cmpxchg %cl,(HW_LOCK_REGISTER) /* attempt atomic compare exchange */ + lock; cmpxchg %cl,(%rdi) /* attempt atomic compare exchange */ jne 3f LEAF_RET /* if yes, then nothing left to do */ 3: PAUSE /* pause for hyper-threading */ jmp 1b /* try again */ -/* - * unsigned int hw_lock_to(hw_lock_t, unsigned int) - * - * Acquire lock, spinning until it becomes available or timeout. - * MACH_RT: also return with preemption disabled. - */ -LEAF_ENTRY(hw_lock_to) -1: - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - LOAD_HW_LOCK_THREAD_REGISTER - - /* - * Attempt to grab the lock immediately - * - fastpath without timeout nonsense. - */ - PREEMPTION_DISABLE - - mov (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER /* lock locked? */ - jne 2f /* branch if so */ - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ - jne 2f /* branch on failure */ - movl $1,%eax - LEAF_RET - -2: -#define INNER_LOOP_COUNT 1000 - /* - * Failed to get the lock so set the timeout - * and then spin re-checking the lock but pausing - * every so many (INNER_LOOP_COUNT) spins to check for timeout. - */ -#if __i386__ - movl L_ARG1,%ecx /* fetch timeout */ - push %edi - push %ebx - mov %edx,%edi - - lfence - rdtsc /* read cyclecount into %edx:%eax */ - lfence - addl %ecx,%eax /* fetch and timeout */ - adcl $0,%edx /* add carry */ - mov %edx,%ecx - mov %eax,%ebx /* %ecx:%ebx is the timeout expiry */ - mov %edi, %edx /* load lock back into %edx */ -#else - push %r9 - lfence - rdtsc /* read cyclecount into %edx:%eax */ - lfence - shlq $32, %rdx - orq %rdx, %rax /* load 64-bit quantity into %rax */ - addq %rax, %rsi /* %rsi is the timeout expiry */ -#endif - -4: - /* - * The inner-loop spin to look for the lock being freed. - */ -#if __i386__ - mov $(INNER_LOOP_COUNT),%edi -#else - mov $(INNER_LOOP_COUNT),%r9 -#endif -5: - PAUSE /* pause for hyper-threading */ - mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER /* spin checking lock value in cache */ - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER - je 6f /* zero => unlocked, try to grab it */ -#if __i386__ - decl %edi /* decrement inner loop count */ -#else - decq %r9 /* decrement inner loop count */ -#endif - jnz 5b /* time to check for timeout? */ - - /* - * Here after spinning INNER_LOOP_COUNT times, check for timeout - */ -#if __i386__ - mov %edx,%edi /* Save %edx */ - lfence - rdtsc /* cyclecount into %edx:%eax */ - lfence - xchg %edx,%edi /* cyclecount into %edi:%eax */ - cmpl %ecx,%edi /* compare high-order 32-bits */ - jb 4b /* continue spinning if less, or */ - cmpl %ebx,%eax /* compare low-order 32-bits */ - jb 4b /* continue if less, else bail */ - xor %eax,%eax /* with 0 return value */ - pop %ebx - pop %edi -#else - lfence - rdtsc /* cyclecount into %edx:%eax */ - lfence - shlq $32, %rdx - orq %rdx, %rax /* load 64-bit quantity into %rax */ - cmpq %rsi, %rax /* compare to timeout */ - jb 4b /* continue spinning if less, or */ - xor %rax,%rax /* with 0 return value */ - pop %r9 -#endif - LEAF_RET - -6: - /* - * Here to try to grab the lock that now appears to be free - * after contention. - */ - LOAD_HW_LOCK_THREAD_REGISTER - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ - jne 4b /* no - spin again */ - movl $1,%eax /* yes */ -#if __i386__ - pop %ebx - pop %edi -#else - pop %r9 -#endif - LEAF_RET - -/* - * void hw_lock_unlock(hw_lock_t) - * - * Unconditionally release lock. - * MACH_RT: release preemption level. - */ -LEAF_ENTRY(hw_lock_unlock) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER) /* clear the lock */ - PREEMPTION_ENABLE - LEAF_RET - /* * void hw_lock_byte_unlock(uint8_t *lock_byte) * @@ -539,48 +283,10 @@ LEAF_ENTRY(hw_lock_unlock) */ LEAF_ENTRY(hw_lock_byte_unlock) - LOAD_HW_LOCK_REGISTER /* Load lock pointer */ - movb $0, (HW_LOCK_REGISTER) /* Clear the lock byte */ + movb $0, (%rdi) /* Clear the lock byte */ PREEMPTION_ENABLE LEAF_RET -/* - * unsigned int hw_lock_try(hw_lock_t) - * MACH_RT: returns with preemption disabled on success. - */ -LEAF_ENTRY(hw_lock_try) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - LOAD_HW_LOCK_THREAD_REGISTER - PREEMPTION_DISABLE - - mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER - jne 1f - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ - jne 1f - - movl $1,%eax /* success */ - LEAF_RET - -1: - PREEMPTION_ENABLE /* failure: release preemption... */ - xorl %eax,%eax /* ...and return failure */ - LEAF_RET - -/* - * unsigned int hw_lock_held(hw_lock_t) - * MACH_RT: doesn't change preemption state. - * N.B. Racy, of course. - */ -LEAF_ENTRY(hw_lock_held) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER /* check lock value */ - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER - movl $1,%ecx - cmovne %ecx,%eax /* 0 => unlocked, 1 => locked */ - LEAF_RET - - /* * Reader-writer lock fastpaths. These currently exist for the * shared lock acquire, the exclusive lock acquire, the shared to @@ -606,36 +312,23 @@ LEAF_ENTRY(hw_lock_held) * register and examined */ -#if defined(__i386__) -#define LCK_RW_REGISTER %edx -#define LOAD_LCK_RW_REGISTER mov S_ARG0, LCK_RW_REGISTER -#define LCK_RW_FLAGS_REGISTER %eax -#define LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER -#elif defined(__x86_64__) -#define LCK_RW_REGISTER %rdi -#define LOAD_LCK_RW_REGISTER -#define LCK_RW_FLAGS_REGISTER %eax -#define LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER -#else -#error Unsupported architecture -#endif - #define RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE) /* * void lck_rw_lock_shared(lck_rw_t *) * */ Entry(lck_rw_lock_shared) - LOAD_LCK_RW_REGISTER + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count before atomic CAS */ 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + mov (%rdi), %eax /* Load state bitfield and interlock */ testl $(RW_LOCK_SHARED_MASK), %eax /* Eligible for fastpath? */ jne 3f movl %eax, %ecx /* original value in %eax for cmpxchgl */ incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f #if CONFIG_DTRACE @@ -646,8 +339,10 @@ Entry(lck_rw_lock_shared) */ LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + /* + Fall thru when patched, counting on lock pointer in %rdi + */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, %rdi) #endif ret 2: @@ -664,9 +359,8 @@ Entry(lck_rw_lock_shared) * */ Entry(lck_rw_try_lock_shared) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + mov (%rdi), %eax /* Load state bitfield and interlock */ testl $(LCK_RW_INTERLOCK), %eax jne 2f testl $(RW_TRY_LOCK_SHARED_MASK), %eax @@ -675,9 +369,13 @@ Entry(lck_rw_try_lock_shared) movl %eax, %ecx /* original value in %eax for cmpxchgl */ incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count on success. */ + /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */ + #if CONFIG_DTRACE movl $1, %eax /* @@ -687,8 +385,8 @@ Entry(lck_rw_try_lock_shared) */ LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, %rdi) #endif movl $1, %eax /* return TRUE */ ret @@ -706,21 +404,20 @@ Entry(lck_rw_try_lock_shared) * */ Entry(lck_rw_grab_shared) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + mov (%rdi), %eax /* Load state bitfield and interlock */ testl $(LCK_RW_INTERLOCK), %eax jne 5f testl $(RW_LOCK_EXCLUSIVE_HELD), %eax jne 3f 2: - movl %eax, %ecx /* original value in %eax for cmpxchgl */ - incl %ecx /* Increment reader refcount */ + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 4f - movl $1, %eax /* return success */ + movl $1, %eax /* return success */ ret 3: testl $(LCK_RW_SHARED_MASK), %eax @@ -728,7 +425,7 @@ Entry(lck_rw_grab_shared) testl $(LCK_RW_PRIV_EXCL), %eax je 2b 4: - xorl %eax, %eax /* return failure */ + xorl %eax, %eax /* return failure */ ret 5: PAUSE @@ -743,16 +440,17 @@ Entry(lck_rw_grab_shared) * */ Entry(lck_rw_lock_exclusive) - LOAD_LCK_RW_REGISTER + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count before atomic CAS */ 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + mov (%rdi), %eax /* Load state bitfield, interlock and shared count */ testl $(RW_LOCK_EXCLUSIVE_MASK), %eax /* Eligible for fastpath? */ jne 3f /* no, go slow */ movl %eax, %ecx /* original value in %eax for cmpxchgl */ orl $(LCK_RW_WANT_WRITE), %ecx lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f #if CONFIG_DTRACE @@ -763,8 +461,8 @@ Entry(lck_rw_lock_exclusive) */ LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, %rdi) #endif ret 2: @@ -784,20 +482,23 @@ Entry(lck_rw_lock_exclusive) * Returns FALSE if the lock is not held on return. */ Entry(lck_rw_try_lock_exclusive) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + mov (%rdi), %eax /* Load state bitfield, interlock and shared count */ testl $(LCK_RW_INTERLOCK), %eax jne 2f testl $(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax - jne 3f /* can't get it */ + jne 3f /* can't get it */ - movl %eax, %ecx /* original value in %eax for cmpxchgl */ + movl %eax, %ecx /* original value in %eax for cmpxchgl */ orl $(LCK_RW_WANT_WRITE), %ecx lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count on success. */ + /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */ + #if CONFIG_DTRACE movl $1, %eax /* @@ -807,8 +508,8 @@ Entry(lck_rw_try_lock_exclusive) */ LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, %rdi) #endif movl $1, %eax /* return TRUE */ ret @@ -841,9 +542,8 @@ Entry(lck_rw_try_lock_exclusive) * set RW_WANT_UPGRADE and get rid of the read count we hold */ Entry(lck_rw_lock_shared_to_exclusive) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + mov (%rdi), %eax /* Load state bitfield, interlock and shared count */ testl $(LCK_RW_INTERLOCK), %eax jne 7f testl $(LCK_RW_WANT_UPGRADE), %eax @@ -853,7 +553,7 @@ Entry(lck_rw_lock_shared_to_exclusive) orl $(LCK_RW_WANT_UPGRADE), %ecx /* ask for WANT_UPGRADE */ decl %ecx /* and shed our read count */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 7f /* we now own the WANT_UPGRADE */ testl $(LCK_RW_SHARED_MASK), %ecx /* check to see if all of the readers are drained */ @@ -868,8 +568,8 @@ Entry(lck_rw_lock_shared_to_exclusive) */ LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, %rdi) #endif movl $1, %eax /* return success */ ret @@ -882,19 +582,12 @@ Entry(lck_rw_lock_shared_to_exclusive) andl $(~LCK_W_WAITING), %ecx /* so clear the wait indicator */ 3: lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 7f -#if __i386__ - pushl %eax /* go check to see if we need to */ - push %edx /* wakeup anyone */ - call EXT(lck_rw_lock_shared_to_exclusive_failure) - addl $8, %esp -#else mov %eax, %esi /* put old flags as second arg */ /* lock is alread in %rdi */ call EXT(lck_rw_lock_shared_to_exclusive_failure) -#endif ret /* and pass the failure return along */ 7: PAUSE @@ -914,9 +607,8 @@ rwl_release_error_str: * */ Entry(lck_rw_done) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax /* Load state bitfield, interlock and reader count */ testl $(LCK_RW_INTERLOCK), %eax jne 7f /* wait for interlock to clear */ @@ -954,24 +646,18 @@ Entry(lck_rw_done) andl $(~LCK_R_WAITING), %ecx 6: lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 7f -#if __i386__ - pushl %eax - push %edx - call EXT(lck_rw_done_gen) - addl $8, %esp -#else mov %eax,%esi /* old flags in %rsi */ /* lock is in %rdi already */ call EXT(lck_rw_done_gen) -#endif ret 7: PAUSE jmp 1b 8: + ALIGN_STACK() LOAD_STRING_ARG0(rwl_release_error_str) CALL_PANIC() @@ -982,9 +668,8 @@ Entry(lck_rw_done) * */ Entry(lck_rw_lock_exclusive_to_shared) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax /* Load state bitfield, interlock and reader count */ testl $(LCK_RW_INTERLOCK), %eax jne 6f /* wait for interlock to clear */ @@ -1013,18 +698,11 @@ Entry(lck_rw_lock_exclusive_to_shared) andl $(~LCK_R_WAITING), %ecx 5: lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 6f -#if __i386__ - pushl %eax - push %edx - call EXT(lck_rw_lock_exclusive_to_shared_gen) - addl $8, %esp -#else mov %eax,%esi call EXT(lck_rw_lock_exclusive_to_shared_gen) -#endif ret 6: PAUSE @@ -1037,9 +715,8 @@ Entry(lck_rw_lock_exclusive_to_shared) * */ Entry(lck_rw_grab_want) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax /* Load state bitfield, interlock and reader count */ testl $(LCK_RW_INTERLOCK), %eax jne 3f /* wait for interlock to clear */ testl $(LCK_RW_WANT_WRITE), %eax /* want_write has been grabbed by someone else */ @@ -1048,7 +725,7 @@ Entry(lck_rw_grab_want) movl %eax, %ecx /* original value in %eax for cmpxchgl */ orl $(LCK_RW_WANT_WRITE), %ecx lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f /* we now own want_write */ movl $1, %eax /* return success */ @@ -1067,8 +744,7 @@ Entry(lck_rw_grab_want) * */ Entry(lck_rw_held_read_or_upgrade) - LOAD_LCK_RW_REGISTER - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax andl $(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax ret @@ -1112,165 +788,42 @@ Entry(lck_rw_held_read_or_upgrade) #define M_PTR MUTEX_PTR #define M_STATE MUTEX_STATE -#if defined(__i386__) - -#define LMTX_ARG0 B_ARG0 -#define LMTX_ARG1 B_ARG1 -#define LMTX_REG %edx -#define LMTX_A_REG %eax -#define LMTX_A_REG32 %eax -#define LMTX_C_REG %ecx -#define LMTX_C_REG32 %ecx -#define LMTX_D_REG %edx -#define LMTX_RET_REG %eax -#define LMTX_LGROUP_REG %esi -#define LMTX_SSTATE_REG %edi -#define LOAD_LMTX_REG(arg) mov arg, LMTX_REG -#define LOAD_REG_ARG0(reg) push reg -#define LOAD_REG_ARG1(reg) push reg -#define LMTX_CHK_EXTENDED cmp LMTX_REG, LMTX_ARG0 -#define LMTX_ASSERT_OWNED cmpl $(MUTEX_ASSERT_OWNED), LMTX_ARG1 - -#define LMTX_ENTER_EXTENDED \ - mov M_PTR(LMTX_REG), LMTX_REG ; \ - push LMTX_LGROUP_REG ; \ - push LMTX_SSTATE_REG ; \ - xor LMTX_SSTATE_REG, LMTX_SSTATE_REG ; \ - mov MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG ; \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - addl $1, GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG) ; \ - jnc 11f ; \ - incl GRP_MTX_STAT_UTIL+4(LMTX_LGROUP_REG) ; \ -11: - -#define LMTX_EXIT_EXTENDED \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG - - -#define LMTX_CHK_EXTENDED_EXIT \ - cmp LMTX_REG, LMTX_ARG0 ; \ - je 12f ; \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG ; \ -12: - - -#if LOG_FIRST_MISS_ALONE -#define LMTX_UPDATE_MISS \ - test $1, LMTX_SSTATE_REG ; \ - jnz 11f ; \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) ; \ - or $1, LMTX_SSTATE_REG ; \ -11: -#else -#define LMTX_UPDATE_MISS \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) -#endif - - -#if LOG_FIRST_MISS_ALONE -#define LMTX_UPDATE_WAIT \ - test $2, LMTX_SSTATE_REG ; \ - jnz 11f ; \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) ; \ - or $2, LMTX_SSTATE_REG ; \ -11: -#else -#define LMTX_UPDATE_WAIT \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) -#endif - - -/* - * Record the "direct wait" statistic, which indicates if a - * miss proceeded to block directly without spinning--occurs - * if the owner of the mutex isn't running on another processor - * at the time of the check. - */ -#define LMTX_UPDATE_DIRECT_WAIT \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG) - - -#define LMTX_CALLEXT1(func_name) \ - push LMTX_REG ; \ - push LMTX_REG ; \ - call EXT(func_name) ; \ - add $4, %esp ; \ - pop LMTX_REG - -#define LMTX_CALLEXT2(func_name, reg) \ - push LMTX_REG ; \ - push reg ; \ - push LMTX_REG ; \ - call EXT(func_name) ; \ - add $8, %esp ; \ - pop LMTX_REG - -#elif defined(__x86_64__) - -#define LMTX_ARG0 %rdi -#define LMTX_ARG1 %rsi -#define LMTX_REG_ORIG %rdi -#define LMTX_REG %rdx -#define LMTX_A_REG %rax -#define LMTX_A_REG32 %eax -#define LMTX_C_REG %rcx -#define LMTX_C_REG32 %ecx -#define LMTX_D_REG %rdx -#define LMTX_RET_REG %rax -#define LMTX_LGROUP_REG %r10 -#define LMTX_SSTATE_REG %r11 -#define LOAD_LMTX_REG(arg) mov %rdi, %rdx -#define LOAD_REG_ARG0(reg) mov reg, %rdi -#define LOAD_REG_ARG1(reg) mov reg, %rsi -#define LMTX_CHK_EXTENDED cmp LMTX_REG, LMTX_REG_ORIG -#define LMTX_ASSERT_OWNED cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1 #define LMTX_ENTER_EXTENDED \ - mov M_PTR(LMTX_REG), LMTX_REG ; \ - xor LMTX_SSTATE_REG, LMTX_SSTATE_REG ; \ - mov MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG ; \ + mov M_PTR(%rdx), %rdx ; \ + xor %r11, %r11 ; \ + mov MUTEX_GRP(%rdx), %r10 ; \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incq GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG) - -#define LMTX_EXIT_EXTENDED - -#define LMTX_CHK_EXTENDED_EXIT + incq GRP_MTX_STAT_UTIL(%r10) #if LOG_FIRST_MISS_ALONE #define LMTX_UPDATE_MISS \ - test $1, LMTX_SSTATE_REG ; \ + test $1, %r11 ; \ jnz 11f ; \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) ; \ - or $1, LMTX_SSTATE_REG ; \ + incl GRP_MTX_STAT_MISS(%r10) ; \ + or $1, %r11 ; \ 11: #else #define LMTX_UPDATE_MISS \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) + incl GRP_MTX_STAT_MISS(%r10) #endif #if LOG_FIRST_MISS_ALONE #define LMTX_UPDATE_WAIT \ - test $2, LMTX_SSTATE_REG ; \ + test $2, %r11 ; \ jnz 11f ; \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) ; \ - or $2, LMTX_SSTATE_REG ; \ + incl GRP_MTX_STAT_WAIT(%r10) ; \ + or $2, %r11 ; \ 11: #else #define LMTX_UPDATE_WAIT \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) + incl GRP_MTX_STAT_WAIT(%r10) #endif @@ -1282,47 +835,43 @@ Entry(lck_rw_held_read_or_upgrade) */ #define LMTX_UPDATE_DIRECT_WAIT \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG) + incl GRP_MTX_STAT_DIRECT_WAIT(%r10) #define LMTX_CALLEXT1(func_name) \ - LMTX_CHK_EXTENDED ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - push LMTX_LGROUP_REG ; \ - push LMTX_SSTATE_REG ; \ -12: push LMTX_REG_ORIG ; \ - push LMTX_REG ; \ - mov LMTX_REG, LMTX_ARG0 ; \ + push %r10 ; \ + push %r11 ; \ +12: push %rdi ; \ + push %rdx ; \ + mov %rdx, %rdi ; \ call EXT(func_name) ; \ - pop LMTX_REG ; \ - pop LMTX_REG_ORIG ; \ - LMTX_CHK_EXTENDED ; \ + pop %rdx ; \ + pop %rdi ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG ; \ + pop %r11 ; \ + pop %r10 ; \ 12: #define LMTX_CALLEXT2(func_name, reg) \ - LMTX_CHK_EXTENDED ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - push LMTX_LGROUP_REG ; \ - push LMTX_SSTATE_REG ; \ -12: push LMTX_REG_ORIG ; \ - push LMTX_REG ; \ - mov reg, LMTX_ARG1 ; \ - mov LMTX_REG, LMTX_ARG0 ; \ + push %r10 ; \ + push %r11 ; \ +12: push %rdi ; \ + push %rdx ; \ + mov reg, %rsi ; \ + mov %rdx, %rdi ; \ call EXT(func_name) ; \ - pop LMTX_REG ; \ - pop LMTX_REG_ORIG ; \ - LMTX_CHK_EXTENDED ; \ + pop %rdx ; \ + pop %rdi ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG ; \ + pop %r11 ; \ + pop %r10 ; \ 12: - -#else -#error Unsupported architecture -#endif #define M_WAITERS_MSK 0x0000ffff @@ -1332,8 +881,6 @@ Entry(lck_rw_held_read_or_upgrade) #define M_PROMOTED_MSK 0x04000000 #define M_SPIN_MSK 0x08000000 - - /* * void lck_mtx_assert(lck_mtx_t* l, unsigned int) * Takes the address of a lock, and an assertion type as parameters. @@ -1345,37 +892,41 @@ Entry(lck_rw_held_read_or_upgrade) */ NONLEAF_ENTRY(lck_mtx_assert) - LOAD_LMTX_REG(B_ARG0) /* Load lock address */ - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG /* Load current thread */ - - mov M_OWNER(LMTX_REG), LMTX_C_REG - cmp $(MUTEX_IND), LMTX_C_REG /* Is this an indirect mutex? */ - cmove M_PTR(LMTX_REG), LMTX_REG /* If so, take indirection */ + mov %rdi, %rdx /* Load lock address */ + mov %gs:CPU_ACTIVE_THREAD, %rax /* Load current thread */ - mov M_OWNER(LMTX_REG), LMTX_C_REG /* Load owner */ - LMTX_ASSERT_OWNED + mov M_STATE(%rdx), %ecx + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ + jne 0f + mov M_PTR(%rdx), %rdx /* If so, take indirection */ +0: + mov M_OWNER(%rdx), %rcx /* Load owner */ + cmp $(MUTEX_ASSERT_OWNED), %rsi jne 2f /* Assert ownership? */ - cmp LMTX_A_REG, LMTX_C_REG /* Current thread match? */ + cmp %rax, %rcx /* Current thread match? */ jne 3f /* no, go panic */ - testl $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(LMTX_REG) + testl $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(%rdx) je 3f 1: /* yes, we own it */ NONLEAF_RET 2: - cmp LMTX_A_REG, LMTX_C_REG /* Current thread match? */ + cmp %rax, %rcx /* Current thread match? */ jne 1b /* No, return */ - LOAD_REG_ARG1(LMTX_REG) + ALIGN_STACK() + LOAD_PTR_ARG1(%rdx) LOAD_STRING_ARG0(mutex_assert_owned_str) jmp 4f 3: - LOAD_REG_ARG1(LMTX_REG) + ALIGN_STACK() + LOAD_PTR_ARG1(%rdx) LOAD_STRING_ARG0(mutex_assert_not_owned_str) 4: CALL_PANIC() lck_mtx_destroyed: - LOAD_REG_ARG1(LMTX_REG) + ALIGN_STACK() + LOAD_PTR_ARG1(%rdx) LOAD_STRING_ARG0(mutex_interlock_destroyed_str) CALL_PANIC() @@ -1396,399 +947,465 @@ mutex_interlock_destroyed_str: * lck_mtx_try_lock() * lck_mtx_unlock() * lck_mtx_lock_spin() + * lck_mtx_lock_spin_always() + * lck_mtx_try_lock_spin() + * lck_mtx_try_lock_spin_always() * lck_mtx_convert_spin() */ +NONLEAF_ENTRY(lck_mtx_lock_spin_always) + mov %rdi, %rdx /* fetch lock pointer */ + jmp Llmls_avoid_check NONLEAF_ENTRY(lck_mtx_lock_spin) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ CHECK_PREEMPTION_LEVEL() +Llmls_avoid_check: + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ + jnz Llmls_slow +Llmls_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_SPIN_MSK), %ecx - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ - je Llmls_enter /* no - can't be INDIRECT or DESTROYED */ - - mov M_OWNER(LMTX_REG), LMTX_A_REG - cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ - je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex */ - jne Llmls_loop - - LMTX_ENTER_EXTENDED - - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_SPIN_MSK), LMTX_C_REG - je Llmls_loop - - LMTX_UPDATE_MISS -Llmls_loop: - PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 - - test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ - jne Llmls_loop -Llmls_enter: - test $(M_MLOCKED_MSK), LMTX_C_REG /* is the mutex locked */ - jne Llml_contended /* fall back to normal mutex handling */ - - PUSHF /* save interrupt state */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG - CLI /* disable interrupts */ + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 1f - - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of interlock */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne Llmls_busy_disabled - PREEMPTION_DISABLE - POPF /* restore interrupt state */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of interlock */ +#if MACH_LDEBUG + test %rax, %rax + jz 1f + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ +1: +#endif /* MACH_LDEBUG */ - LMTX_CHK_EXTENDED_EXIT /* return with the interlock held and preemption disabled */ leave #if CONFIG_DTRACE LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, %rdx) #endif ret -1: - POPF /* restore interrupt state */ - jmp Llmls_loop +Llmls_slow: + test $M_ILOCKED_MSK, %ecx /* is the interlock held */ + jz Llml_contended /* no, must have been the mutex */ + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ + je lck_mtx_destroyed + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex */ + jne Llmls_loop /* no... must be interlocked */ - -NONLEAF_ENTRY(lck_mtx_lock) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + LMTX_ENTER_EXTENDED - CHECK_PREEMPTION_LEVEL() + mov M_STATE(%rdx), %ecx + test $(M_SPIN_MSK), %ecx + jz Llmls_loop1 - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ - je Llml_enter /* no - can't be INDIRECT or DESTROYED */ + LMTX_UPDATE_MISS /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */ +Llmls_loop: + PAUSE + mov M_STATE(%rdx), %ecx +Llmls_loop1: + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx + jz Llmls_try + test $(M_MLOCKED_MSK), %ecx + jnz Llml_contended /* mutex owned by someone else, go contend for it */ + jmp Llmls_loop - mov M_OWNER(LMTX_REG), LMTX_A_REG - cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ - je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ - jne Llml_loop +Llmls_busy_disabled: + PREEMPTION_ENABLE + jmp Llmls_loop - LMTX_ENTER_EXTENDED - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_SPIN_MSK), LMTX_C_REG - je Llml_loop + +NONLEAF_ENTRY(lck_mtx_lock) + mov %rdi, %rdx /* fetch lock pointer */ - LMTX_UPDATE_MISS -Llml_loop: - PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 + CHECK_PREEMPTION_LEVEL() - test $(M_ILOCKED_MSK), LMTX_C_REG - jne Llml_loop -Llml_enter: - test $(M_MLOCKED_MSK), LMTX_C_REG - jne Llml_contended /* mutex owned by someone else, go contend for it */ + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ + jnz Llml_slow +Llml_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_MLOCKED_MSK), LMTX_C_REG + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne Llml_loop + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne Llml_busy_disabled - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ +#if MACH_LDEBUG + test %rax, %rax + jz 1f + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ +1: +#endif /* MACH_LDEBUG */ -Llml_acquired: - testl $(M_WAITERS_MSK), M_STATE(LMTX_REG) - je 1f + testl $(M_WAITERS_MSK), M_STATE(%rdx) + jz Llml_finish LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) -1: - LMTX_CHK_EXTENDED /* is this an extended mutex */ + +Llml_finish: + andl $(~M_ILOCKED_MSK), M_STATE(%rdx) + PREEMPTION_ENABLE + + cmp %rdx, %rdi /* is this an extended mutex */ jne 2f leave #if CONFIG_DTRACE LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, %rdx) #endif ret 2: - LMTX_EXIT_EXTENDED leave #if CONFIG_DTRACE LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, %rdx) #endif ret + +Llml_slow: + test $M_ILOCKED_MSK, %ecx /* is the interlock held */ + jz Llml_contended /* no, must have been the mutex */ + + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ + je lck_mtx_destroyed + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ + jne Llml_loop /* no... must be interlocked */ + LMTX_ENTER_EXTENDED + + mov M_STATE(%rdx), %ecx + test $(M_SPIN_MSK), %ecx + jz Llml_loop1 + + LMTX_UPDATE_MISS /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */ +Llml_loop: + PAUSE + mov M_STATE(%rdx), %ecx +Llml_loop1: + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx + jz Llml_try + test $(M_MLOCKED_MSK), %ecx + jnz Llml_contended /* mutex owned by someone else, go contend for it */ + jmp Llml_loop + +Llml_busy_disabled: + PREEMPTION_ENABLE + jmp Llml_loop + + Llml_contended: - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ je 0f LMTX_UPDATE_MISS 0: LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86) - test LMTX_RET_REG, LMTX_RET_REG - je Llml_acquired /* acquired mutex */ - cmp $1, LMTX_RET_REG /* check for direct wait status */ + test %rax, %rax + jz Llml_acquired /* acquired mutex, interlock held and preemption disabled */ + + cmp $1, %rax /* check for direct wait status */ je 2f - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ je 2f LMTX_UPDATE_DIRECT_WAIT 2: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK), LMTX_C_REG - jne 6f + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK), %ecx + jnz 6f + + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK), %ecx /* try to take the interlock */ - PUSHF /* save state of interrupt mask */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK), LMTX_C_REG /* try to take the interlock */ - CLI /* disable interrupts */ + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne 5f - test $(M_MLOCKED_MSK), LMTX_C_REG /* we've got the interlock and */ - jne 3f - or $(M_MLOCKED_MSK), LMTX_C_REG /* the mutex is free... grab it directly */ - and $(~M_ILOCKED_MSK), LMTX_C_REG + test $(M_MLOCKED_MSK), %ecx /* we've got the interlock and */ + jnz 3f + or $(M_MLOCKED_MSK), %ecx /* the mutex is free... grab it directly */ + mov %ecx, M_STATE(%rdx) - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ - mov LMTX_C_REG32, M_STATE(LMTX_REG) /* now drop the interlock */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ +#if MACH_LDEBUG + test %rax, %rax + jz 1f + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ +1: +#endif /* MACH_LDEBUG */ - POPF /* restore interrupt state */ - jmp Llml_acquired -3: /* interlock held, mutex busy */ - PREEMPTION_DISABLE - POPF /* restore interrupt state */ +Llml_acquired: + testl $(M_WAITERS_MSK), M_STATE(%rdx) + jnz 1f + mov M_OWNER(%rdx), %rax + mov TH_WAS_PROMOTED_ON_WAKEUP(%rax), %eax + test %eax, %eax + jz Llml_finish +1: + LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) + jmp Llml_finish - LMTX_CHK_EXTENDED /* is this an extended mutex */ +3: /* interlock held, mutex busy */ + cmp %rdx, %rdi /* is this an extended mutex */ je 4f LMTX_UPDATE_WAIT 4: LMTX_CALLEXT1(lck_mtx_lock_wait_x86) jmp Llml_contended 5: - POPF /* restore interrupt state */ + PREEMPTION_ENABLE 6: PAUSE jmp 2b - -NONLEAF_ENTRY(lck_mtx_try_lock_spin) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ +NONLEAF_ENTRY(lck_mtx_try_lock_spin_always) + mov %rdi, %rdx /* fetch lock pointer */ + jmp Llmts_avoid_check - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ - je Llmts_enter /* no - can't be INDIRECT or DESTROYED */ +NONLEAF_ENTRY(lck_mtx_try_lock_spin) + mov %rdi, %rdx /* fetch lock pointer */ - mov M_OWNER(LMTX_REG), LMTX_A_REG - cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ - je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ - jne Llmts_enter +Llmts_avoid_check: + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ + jnz Llmts_slow +Llmts_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_SPIN_MSK), %rcx - LMTX_ENTER_EXTENDED -Llmts_loop: - PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 -Llmts_enter: - test $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG - jne Llmts_fail - test $(M_ILOCKED_MSK), LMTX_C_REG - jne Llmts_loop - - PUSHF /* save interrupt state */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG - CLI /* disable interrupts */ + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 3f + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne Llmts_busy_disabled - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ - - PREEMPTION_DISABLE - POPF /* restore interrupt state */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ +#if MACH_LDEBUG + test %rax, %rax + jz 1f + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ +1: +#endif /* MACH_LDEBUG */ - LMTX_CHK_EXTENDED_EXIT leave #if CONFIG_DTRACE - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, %rdx) #endif - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ ret -3: - POPF /* restore interrupt state */ - jmp Llmts_loop - - -NONLEAF_ENTRY(lck_mtx_try_lock) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ +Llmts_slow: + test $(M_ILOCKED_MSK), %ecx /* is the interlock held */ + jz Llmts_fail /* no, must be held as a mutex */ - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ - je Llmt_enter /* no - can't be INDIRECT or DESTROYED */ - - mov M_OWNER(LMTX_REG), LMTX_A_REG - cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ - jne Llmt_enter + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ + jne Llmts_loop1 LMTX_ENTER_EXTENDED -Llmt_loop: +Llmts_loop: PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 -Llmt_enter: - test $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG - jne Llmt_fail - test $(M_ILOCKED_MSK), LMTX_C_REG - jne Llmt_loop + mov M_STATE(%rdx), %ecx +Llmts_loop1: + test $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx + jnz Llmts_fail + test $(M_ILOCKED_MSK), %ecx + jz Llmts_try + jmp Llmts_loop + +Llmts_busy_disabled: + PREEMPTION_ENABLE + jmp Llmts_loop + - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_MLOCKED_MSK), LMTX_C_REG + +NONLEAF_ENTRY(lck_mtx_try_lock) + mov %rdi, %rdx /* fetch lock pointer */ + + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ + jnz Llmt_slow +Llmt_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx + + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne Llmt_loop + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne Llmt_busy_disabled - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ +#if MACH_LDEBUG + test %rax, %rax + jz 1f + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ +1: +#endif /* MACH_LDEBUG */ - LMTX_CHK_EXTENDED_EXIT + test $(M_WAITERS_MSK), %ecx + jz 0f - test $(M_WAITERS_MSK), LMTX_C_REG - je 2f LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) -2: - leave +0: + andl $(~M_ILOCKED_MSK), M_STATE(%rdx) + PREEMPTION_ENABLE + leave #if CONFIG_DTRACE - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ /* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */ LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, %rdx) #endif - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ ret +Llmt_slow: + test $(M_ILOCKED_MSK), %ecx /* is the interlock held */ + jz Llmt_fail /* no, must be held as a mutex */ + + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ + je lck_mtx_destroyed + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ + jne Llmt_loop + + LMTX_ENTER_EXTENDED +Llmt_loop: + PAUSE + mov M_STATE(%rdx), %ecx +Llmt_loop1: + test $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx + jnz Llmt_fail + test $(M_ILOCKED_MSK), %ecx + jz Llmt_try + jmp Llmt_loop + +Llmt_busy_disabled: + PREEMPTION_ENABLE + jmp Llmt_loop + Llmt_fail: Llmts_fail: - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ je 0f LMTX_UPDATE_MISS - LMTX_EXIT_EXTENDED 0: - xor LMTX_RET_REG, LMTX_RET_REG + xor %rax, %rax NONLEAF_RET NONLEAF_ENTRY(lck_mtx_convert_spin) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ - mov M_OWNER(LMTX_REG), LMTX_A_REG - cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ - cmove M_PTR(LMTX_REG), LMTX_REG /* If so, take indirection */ + mov M_STATE(%rdx), %ecx + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ + jne 0f + mov M_PTR(%rdx), %rdx /* If so, take indirection */ + mov M_STATE(%rdx), %ecx +0: + test $(M_MLOCKED_MSK), %ecx /* already owned as a mutex, just return */ + jnz 2f + test $(M_WAITERS_MSK), %ecx /* are there any waiters? */ + jz 1f - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_MLOCKED_MSK), LMTX_C_REG /* already owned as a mutex, just return */ - jne 2f + LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) + mov M_STATE(%rdx), %ecx 1: - and $(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG /* convert from spin version to mutex */ - or $(M_MLOCKED_MSK), LMTX_C_REG - mov LMTX_C_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ + and $(~(M_ILOCKED_MSK | M_SPIN_MSK)), %ecx /* convert from spin version to mutex */ + or $(M_MLOCKED_MSK), %ecx + mov %ecx, M_STATE(%rdx) /* since I own the interlock, I don't need an atomic update */ - PREEMPTION_ENABLE /* only %eax is consumed */ - - test $(M_WAITERS_MSK), LMTX_C_REG /* are there any waiters? */ - je 2f - - LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) + PREEMPTION_ENABLE 2: NONLEAF_RET + -#if defined(__i386__) NONLEAF_ENTRY(lck_mtx_unlock) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ - mov M_OWNER(LMTX_REG), LMTX_A_REG - test LMTX_A_REG, LMTX_A_REG - jnz Llmu_prim - leave - ret -NONLEAF_ENTRY(lck_mtx_unlock_darwin10) -#else -NONLEAF_ENTRY(lck_mtx_unlock) -#endif - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ - mov M_OWNER(LMTX_REG), LMTX_A_REG + mov %rdi, %rdx /* fetch lock pointer */ +Llmu_entry: + mov M_STATE(%rdx), %ecx Llmu_prim: - cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ je Llmu_ext -0: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_MLOCKED_MSK), LMTX_C_REG /* check for full mutex */ - jne 1f - - xor LMTX_A_REG, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) - mov LMTX_C_REG, LMTX_A_REG /* keep original state in %ecx for later evaluation */ - and $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG - mov LMTX_A_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ - - PREEMPTION_ENABLE /* need to re-enable preemption - clobbers eax */ - jmp 2f -1: - test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ - jne 7f - PUSHF /* save interrupt state */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - and $(~M_MLOCKED_MSK), LMTX_C_REG /* drop mutex */ - or $(M_ILOCKED_MSK), LMTX_C_REG /* pick up interlock */ - CLI +Llmu_chktype: + test $(M_MLOCKED_MSK), %ecx /* check for full mutex */ + jz Llmu_unlock +Llmu_mutex: + test $(M_ILOCKED_MSK), %rcx /* have to wait for interlock to clear */ + jnz Llmu_busy + + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + and $(~M_MLOCKED_MSK), %ecx /* drop mutex */ + or $(M_ILOCKED_MSK), %ecx /* pick up interlock */ + + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 6f /* branch on failure to spin loop */ - - xor LMTX_A_REG, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) - mov LMTX_C_REG, LMTX_A_REG /* keep original state in %ecx for later evaluation */ - and $(~(M_ILOCKED_MSK | M_PROMOTED_MSK)), LMTX_A_REG - mov LMTX_A_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ - POPF /* restore interrupt state */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne Llmu_busy_disabled /* branch on failure to spin loop */ + +Llmu_unlock: + xor %rax, %rax + mov %rax, M_OWNER(%rdx) + mov %rcx, %rax /* keep original state in %ecx for later evaluation */ + and $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), %rax + + test $(M_WAITERS_MSK), %eax + jz 2f + dec %eax /* decrement waiter count */ 2: - test $(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG - je 3f - and $(M_PROMOTED_MSK), LMTX_C_REG + mov %eax, M_STATE(%rdx) /* since I own the interlock, I don't need an atomic update */ - LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG) +#if MACH_LDEBUG + /* perform lock statistics after drop to prevent delay */ + mov %gs:CPU_ACTIVE_THREAD, %rax + test %rax, %rax + jz 1f + decl TH_MUTEX_COUNT(%rax) /* lock statistic */ +1: +#endif /* MACH_LDEBUG */ + + test $(M_PROMOTED_MSK | M_WAITERS_MSK), %ecx + jz 3f + + LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, %rcx) 3: - LMTX_CHK_EXTENDED + PREEMPTION_ENABLE + + cmp %rdx, %rdi jne 4f leave @@ -1796,8 +1413,8 @@ Llmu_prim: /* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */ LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, LMTX_REG) + /* inherit lock pointer in %rdx from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %rdx) #endif ret 4: @@ -1806,483 +1423,137 @@ Llmu_prim: /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */ LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG) + /* inherit lock pointer in %rdx from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, %rdx) #endif ret -6: - POPF /* restore interrupt state */ -7: - PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 - jmp 1b -Llmu_ext: - mov M_PTR(LMTX_REG), LMTX_REG - mov M_OWNER(LMTX_REG), LMTX_A_REG - mov %gs:CPU_ACTIVE_THREAD, LMTX_C_REG - CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG) - jmp 0b -LEAF_ENTRY(lck_mtx_lock_decr_waiter) - LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ -1: - mov M_STATE(LMTX_REG), LMTX_C_REG32 +Llmu_busy_disabled: + PREEMPTION_ENABLE +Llmu_busy: + PAUSE + mov M_STATE(%rdx), %ecx + jmp Llmu_mutex - test $(M_WAITERS_MSK), LMTX_C_REG - je 2f - test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ - jne 3f +Llmu_ext: + mov M_PTR(%rdx), %rdx + mov M_OWNER(%rdx), %rax + mov %gs:CPU_ACTIVE_THREAD, %rcx + CHECK_UNLOCK(%rcx, %rax) + mov M_STATE(%rdx), %ecx + jmp Llmu_chktype - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - dec LMTX_C_REG /* decrement waiter count */ - lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 3f /* branch on failure to spin loop */ - mov $1, LMTX_RET_REG - LEAF_RET -2: - xor LMTX_RET_REG, LMTX_RET_REG - LEAF_RET -3: - PAUSE - jmp 1b +LEAF_ENTRY(lck_mtx_ilk_try_lock) + mov %rdi, %rdx /* fetch lock pointer - no indirection here */ - -LEAF_ENTRY(lck_mtx_lock_get_pri) - LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ -1: - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx - test $(M_WAITERS_MSK), LMTX_C_REG - jne 2f - test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ - jne 3f + test $(M_ILOCKED_MSK), %ecx /* can't have the interlock yet */ + jnz 3f + + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK), %ecx - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - and $(~M_PRIORITY_MSK), LMTX_C_REG /* no waiters, reset mutex priority to 0 */ + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 3f /* branch on failure to spin loop */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne 2f /* return failure after re-enabling preemption */ - xor LMTX_RET_REG, LMTX_RET_REG /* return mutex priority == 0 */ + mov $1, %rax /* return success with preemption disabled */ LEAF_RET 2: - mov LMTX_C_REG, LMTX_RET_REG - and $(M_PRIORITY_MSK), LMTX_RET_REG - shr $16, LMTX_RET_REG /* return current mutex priority */ - LEAF_RET + PREEMPTION_ENABLE /* need to re-enable preemption */ 3: - PAUSE - jmp 1b - + xor %rax, %rax /* return failure */ + LEAF_RET - LEAF_ENTRY(lck_mtx_ilk_unlock) - LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ + mov %rdi, %rdx /* fetch lock pointer - no indirection here */ - andl $(~M_ILOCKED_MSK), M_STATE(LMTX_REG) + andl $(~M_ILOCKED_MSK), M_STATE(%rdx) - PREEMPTION_ENABLE /* need to re-enable preemption */ + PREEMPTION_ENABLE /* need to re-enable preemption */ LEAF_RET - LEAF_ENTRY(lck_mtx_lock_grab_mutex) - LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ + mov %rdi, %rdx /* fetch lock pointer - no indirection here */ - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG /* can't have the mutex yet */ - jne 2f + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* can't have the mutex yet */ + jnz 3f + + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_MLOCKED_MSK), LMTX_C_REG + PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne 2f /* branch on failure to spin loop */ - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ +#if MACH_LDEBUG + test %rax, %rax + jz 1f + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ +1: +#endif /* MACH_LDEBUG */ - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ LEAF_RET 2: - xor LMTX_RET_REG, LMTX_RET_REG /* return failure */ - LEAF_RET - - - -LEAF_ENTRY(lck_mtx_lock_mark_promoted) - LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ -1: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - - test $(M_PROMOTED_MSK), LMTX_C_REG - jne 3f - test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ - jne 2f - - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_PROMOTED_MSK), LMTX_C_REG - lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 2f /* branch on failure to spin loop */ - - mov $1, LMTX_RET_REG - LEAF_RET -2: - PAUSE - jmp 1b + PREEMPTION_ENABLE 3: - xor LMTX_RET_REG, LMTX_RET_REG + xor %rax, %rax /* return failure */ LEAF_RET + - LEAF_ENTRY(lck_mtx_lock_mark_destroyed) - LOAD_LMTX_REG(L_ARG0) + mov %rdi, %rdx 1: - mov M_OWNER(LMTX_REG), LMTX_A_REG - - cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ - je 3f - cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + mov M_STATE(%rdx), %ecx + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ jne 2f - movl $(MUTEX_DESTROYED), M_OWNER(LMTX_REG) /* convert to destroyed state */ + movl $(MUTEX_DESTROYED), M_STATE(%rdx) /* convert to destroyed state */ jmp 3f 2: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - - test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ - jne 5f + test $(M_ILOCKED_MSK), %rcx /* have to wait for interlock to clear */ + jnz 5f - PUSHF /* save interrupt state */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK), LMTX_C_REG - CLI + PREEMPTION_DISABLE + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK), %ecx lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 4f /* branch on failure to spin loop */ - movl $(MUTEX_DESTROYED), M_OWNER(LMTX_REG) /* convert to destroyed state */ - POPF /* restore interrupt state */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne 4f /* branch on failure to spin loop */ + movl $(MUTEX_DESTROYED), M_STATE(%rdx) /* convert to destroyed state */ + PREEMPTION_ENABLE 3: - LEAF_RET /* return with M_ILOCKED set */ + LEAF_RET /* return with M_ILOCKED set */ 4: - POPF /* restore interrupt state */ + PREEMPTION_ENABLE 5: PAUSE jmp 1b - - -LEAF_ENTRY(_disable_preemption) -#if MACH_RT - _DISABLE_PREEMPTION -#endif /* MACH_RT */ - LEAF_RET - -LEAF_ENTRY(_enable_preemption) -#if MACH_RT -#if MACH_ASSERT - cmpl $0,%gs:CPU_PREEMPTION_LEVEL - jg 1f -#if __i386__ - pushl %gs:CPU_PREEMPTION_LEVEL -#else - movl %gs:CPU_PREEMPTION_LEVEL,%esi -#endif - LOAD_STRING_ARG0(_enable_preemption_less_than_zero) - CALL_PANIC() - hlt - .cstring -_enable_preemption_less_than_zero: - .asciz "_enable_preemption: preemption_level(%d) < 0!" - .text -1: -#endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION -#endif /* MACH_RT */ - LEAF_RET - -LEAF_ENTRY(_enable_preemption_no_check) -#if MACH_RT -#if MACH_ASSERT - cmpl $0,%gs:CPU_PREEMPTION_LEVEL - jg 1f - LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero) - CALL_PANIC() - hlt - .cstring -_enable_preemption_no_check_less_than_zero: - .asciz "_enable_preemption_no_check: preemption_level <= 0!" - .text -1: -#endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION_NO_CHECK -#endif /* MACH_RT */ - LEAF_RET - - -LEAF_ENTRY(_mp_disable_preemption) -#if MACH_RT - _DISABLE_PREEMPTION -#endif /* MACH_RT */ - LEAF_RET - -LEAF_ENTRY(_mp_enable_preemption) -#if MACH_RT -#if MACH_ASSERT - cmpl $0,%gs:CPU_PREEMPTION_LEVEL - jg 1f -#if __i386__ - pushl %gs:CPU_PREEMPTION_LEVEL -#else - movl %gs:CPU_PREEMPTION_LEVEL,%esi -#endif - LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero) +LEAF_ENTRY(preemption_underflow_panic) + FRAME + incl %gs:CPU_PREEMPTION_LEVEL + ALIGN_STACK() + LOAD_STRING_ARG0(16f) CALL_PANIC() hlt - .cstring -_mp_enable_preemption_less_than_zero: - .asciz "_mp_enable_preemption: preemption_level (%d) <= 0!" + .data +16: String "Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock" .text -1: -#endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION -#endif /* MACH_RT */ - LEAF_RET - -LEAF_ENTRY(_mp_enable_preemption_no_check) -#if MACH_RT -#if MACH_ASSERT - cmpl $0,%gs:CPU_PREEMPTION_LEVEL - jg 1f - LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero) - CALL_PANIC() - hlt - .cstring -_mp_enable_preemption_no_check_less_than_zero: - .asciz "_mp_enable_preemption_no_check: preemption_level <= 0!" - .text -1: -#endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION_NO_CHECK -#endif /* MACH_RT */ - LEAF_RET - -#if __i386__ - -LEAF_ENTRY(i_bit_set) - movl L_ARG0,%edx - movl L_ARG1,%eax - lock - bts %edx,(%eax) - LEAF_RET - -LEAF_ENTRY(i_bit_clear) - movl L_ARG0,%edx - movl L_ARG1,%eax - lock - btr %edx,(%eax) - LEAF_RET - - -LEAF_ENTRY(bit_lock) - movl L_ARG0,%ecx - movl L_ARG1,%eax -1: - lock - bts %ecx,(%eax) - jb 1b - LEAF_RET - - -LEAF_ENTRY(bit_lock_try) - movl L_ARG0,%ecx - movl L_ARG1,%eax - lock - bts %ecx,(%eax) - jb bit_lock_failed - LEAF_RET /* %eax better not be null ! */ -bit_lock_failed: - xorl %eax,%eax - LEAF_RET - -LEAF_ENTRY(bit_unlock) - movl L_ARG0,%ecx - movl L_ARG1,%eax - lock - btr %ecx,(%eax) - LEAF_RET -/* - * Atomic primitives, prototyped in kern/simple_lock.h - */ -LEAF_ENTRY(hw_atomic_add) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %eax /* Load addend */ - movl %eax, %edx - lock - xaddl %eax, (%ecx) /* Atomic exchange and add */ - addl %edx, %eax /* Calculate result */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_sub) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %eax /* Load subtrahend */ - negl %eax - movl %eax, %edx - lock - xaddl %eax, (%ecx) /* Atomic exchange and add */ - addl %edx, %eax /* Calculate result */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_or) - movl L_ARG0, %ecx /* Load address of operand */ - movl (%ecx), %eax -1: - movl L_ARG1, %edx /* Load mask */ - orl %eax, %edx - lock - cmpxchgl %edx, (%ecx) /* Atomic CAS */ - jne 1b - movl %edx, %eax /* Result */ - LEAF_RET -/* - * A variant of hw_atomic_or which doesn't return a value. - * The implementation is thus comparatively more efficient. - */ - -LEAF_ENTRY(hw_atomic_or_noret) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %edx /* Load mask */ - lock - orl %edx, (%ecx) /* Atomic OR */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_and) - movl L_ARG0, %ecx /* Load address of operand */ - movl (%ecx), %eax -1: - movl L_ARG1, %edx /* Load mask */ - andl %eax, %edx - lock - cmpxchgl %edx, (%ecx) /* Atomic CAS */ - jne 1b - movl %edx, %eax /* Result */ - LEAF_RET -/* - * A variant of hw_atomic_and which doesn't return a value. - * The implementation is thus comparatively more efficient. - */ - -LEAF_ENTRY(hw_atomic_and_noret) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %edx /* Load mask */ - lock - andl %edx, (%ecx) /* Atomic AND */ - LEAF_RET - -#else /* !__i386__ */ - -LEAF_ENTRY(i_bit_set) - lock - bts %edi,(%rsi) - LEAF_RET - -LEAF_ENTRY(i_bit_clear) - lock - btr %edi,(%rsi) - LEAF_RET - - -LEAF_ENTRY(bit_lock) -1: - lock - bts %edi,(%rsi) - jb 1b - LEAF_RET - - -LEAF_ENTRY(bit_lock_try) - lock - bts %edi,(%rsi) - jb bit_lock_failed - movl $1, %eax - LEAF_RET -bit_lock_failed: - xorl %eax,%eax - LEAF_RET - -LEAF_ENTRY(bit_unlock) - lock - btr %edi,(%rsi) - LEAF_RET - - -/* - * Atomic primitives, prototyped in kern/simple_lock.h - */ -LEAF_ENTRY(hw_atomic_add) - movl %esi, %eax /* Load addend */ - lock - xaddl %eax, (%rdi) /* Atomic exchange and add */ - addl %esi, %eax /* Calculate result */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_sub) - negl %esi - movl %esi, %eax - lock - xaddl %eax, (%rdi) /* Atomic exchange and add */ - addl %esi, %eax /* Calculate result */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_or) - movl (%rdi), %eax -1: - movl %esi, %edx /* Load mask */ - orl %eax, %edx - lock - cmpxchgl %edx, (%rdi) /* Atomic CAS */ - jne 1b - movl %edx, %eax /* Result */ - LEAF_RET -/* - * A variant of hw_atomic_or which doesn't return a value. - * The implementation is thus comparatively more efficient. - */ - -LEAF_ENTRY(hw_atomic_or_noret) - lock - orl %esi, (%rdi) /* Atomic OR */ - LEAF_RET - - -LEAF_ENTRY(hw_atomic_and) - movl (%rdi), %eax -1: - movl %esi, %edx /* Load mask */ - andl %eax, %edx - lock - cmpxchgl %edx, (%rdi) /* Atomic CAS */ - jne 1b - movl %edx, %eax /* Result */ - LEAF_RET -/* - * A variant of hw_atomic_and which doesn't return a value. - * The implementation is thus comparatively more efficient. - */ - -LEAF_ENTRY(hw_atomic_and_noret) - lock - andl %esi, (%rdi) /* Atomic OR */ - LEAF_RET -#endif /* !__i386 __ */