+NONLEAF_ENTRY(lck_mtx_unlock)
+ mov %rdi, %rdx /* fetch lock pointer */
+Llmu_entry:
+ mov M_STATE(%rdx), %ecx
+Llmu_prim:
+ cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */
+ je Llmu_ext
+
+Llmu_chktype:
+ test $(M_MLOCKED_MSK), %ecx /* check for full mutex */
+ jz Llmu_unlock
+Llmu_mutex:
+ test $(M_ILOCKED_MSK), %rcx /* have to wait for interlock to clear */
+ jnz Llmu_busy
+
+ mov %rcx, %rax /* eax contains snapshot for cmpxchgl */
+ and $(~M_MLOCKED_MSK), %ecx /* drop mutex */
+ or $(M_ILOCKED_MSK), %ecx /* pick up interlock */
+
+ PREEMPTION_DISABLE
+ lock
+ cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */
+ jne Llmu_busy_disabled /* branch on failure to spin loop */
+
+Llmu_unlock:
+ xor %rax, %rax
+ mov %rax, M_OWNER(%rdx)
+ mov %rcx, %rax /* keep original state in %ecx for later evaluation */
+ and $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), %rax
+
+ test $(M_WAITERS_MSK), %eax
+ jz 2f
+ dec %eax /* decrement waiter count */
+2:
+ mov %eax, M_STATE(%rdx) /* since I own the interlock, I don't need an atomic update */