2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Locking primitives implementation
64 #include <mach_ldebug.h>
66 #include <kern/lock.h>
67 #include <kern/locks.h>
68 #include <kern/kalloc.h>
69 #include <kern/misc_protos.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/cpu_data.h>
73 #include <kern/cpu_number.h>
74 #include <kern/sched_prim.h>
76 #include <kern/debug.h>
79 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
80 #include <machine/machine_cpu.h>
83 #include <sys/kdebug.h>
84 #include <mach/branch_predicates.h>
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
92 #define NEED_DTRACE_DEFS
93 #include <../bsd/sys/lockstat.h>
96 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
97 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
98 #define LCK_RW_LCK_SHARED_CODE 0x102
99 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
100 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
101 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
103 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
104 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
105 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
106 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
107 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
108 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
109 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
110 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
113 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
115 unsigned int LcksOpts
=0;
121 * Perform simple lock checks.
123 int uslock_check
= 1;
124 int max_lock_loops
= 100000000;
125 decl_simple_lock_data(extern , printf_lock
)
126 decl_simple_lock_data(extern , panic_lock
)
127 #endif /* USLOCK_DEBUG */
131 * We often want to know the addresses of the callers
132 * of the various lock routines. However, this information
133 * is only used for debugging and statistics.
136 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
137 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
139 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
140 #define DECL_PC(pc) pc_t pc;
141 #else /* ANY_LOCK_DEBUG */
145 * Eliminate lint complaints about unused local pc variables.
147 #define OBTAIN_PC(pc) ++pc
149 #define OBTAIN_PC(pc)
151 #endif /* USLOCK_DEBUG */
155 * Portable lock package implementation of usimple_locks.
159 #define USLDBG(stmt) stmt
160 void usld_lock_init(usimple_lock_t
, unsigned short);
161 void usld_lock_pre(usimple_lock_t
, pc_t
);
162 void usld_lock_post(usimple_lock_t
, pc_t
);
163 void usld_unlock(usimple_lock_t
, pc_t
);
164 void usld_lock_try_pre(usimple_lock_t
, pc_t
);
165 void usld_lock_try_post(usimple_lock_t
, pc_t
);
166 int usld_lock_common_checks(usimple_lock_t
, char *);
167 #else /* USLOCK_DEBUG */
169 #endif /* USLOCK_DEBUG */
172 extern int lck_rw_grab_want(lck_rw_t
*lck
);
173 extern int lck_rw_grab_shared(lck_rw_t
*lck
);
174 extern int lck_rw_held_read_or_upgrade(lck_rw_t
*lck
);
178 * Forward definitions
181 void lck_rw_lock_shared_gen(
184 void lck_rw_lock_exclusive_gen(
187 boolean_t
lck_rw_lock_shared_to_exclusive_success(
190 boolean_t
lck_rw_lock_shared_to_exclusive_failure(
192 int prior_lock_state
);
194 void lck_rw_lock_exclusive_to_shared_gen(
196 int prior_lock_state
);
198 lck_rw_type_t
lck_rw_done_gen(
200 int prior_lock_state
);
203 * Routine: lck_spin_alloc_init
212 if ((lck
= (lck_spin_t
*)kalloc(sizeof(lck_spin_t
))) != 0)
213 lck_spin_init(lck
, grp
, attr
);
219 * Routine: lck_spin_free
226 lck_spin_destroy(lck
, grp
);
227 kfree(lck
, sizeof(lck_spin_t
));
231 * Routine: lck_spin_init
237 __unused lck_attr_t
*attr
)
239 usimple_lock_init((usimple_lock_t
) lck
, 0);
240 lck_grp_reference(grp
);
241 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
245 * Routine: lck_spin_destroy
252 if (lck
->interlock
== LCK_SPIN_TAG_DESTROYED
)
254 lck
->interlock
= LCK_SPIN_TAG_DESTROYED
;
255 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
256 lck_grp_deallocate(grp
);
261 * Routine: lck_spin_lock
267 usimple_lock((usimple_lock_t
) lck
);
271 * Routine: lck_spin_unlock
277 usimple_unlock((usimple_lock_t
) lck
);
282 * Routine: lck_spin_try_lock
288 return((boolean_t
)usimple_lock_try((usimple_lock_t
) lck
));
292 * Initialize a usimple_lock.
294 * No change in preemption state.
299 __unused
unsigned short tag
)
301 #ifndef MACHINE_SIMPLE_LOCK
302 USLDBG(usld_lock_init(l
, tag
));
303 hw_lock_init(&l
->interlock
);
305 simple_lock_init((simple_lock_t
)l
,tag
);
309 volatile uint32_t spinlock_owner_cpu
= ~0;
310 volatile usimple_lock_t spinlock_timed_out
;
312 static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr
) {
316 for (i
= 0; i
< real_ncpus
; i
++) {
317 if ((uintptr_t)cpu_data_ptr
[i
]->cpu_active_thread
== thread_addr
) {
318 spinlock_owner_cpu
= i
;
319 if ((uint32_t) cpu_number() == i
)
321 cpu_datap(i
)->cpu_NMI_acknowledged
= FALSE
;
322 cpu_NMI_interrupt(i
);
323 deadline
= mach_absolute_time() + (LockTimeOut
* 2);
324 while (mach_absolute_time() < deadline
&& cpu_datap(i
)->cpu_NMI_acknowledged
== FALSE
)
330 return spinlock_owner_cpu
;
334 * Acquire a usimple_lock.
336 * Returns with preemption disabled. Note
337 * that the hw_lock routines are responsible for
338 * maintaining preemption state.
344 #ifndef MACHINE_SIMPLE_LOCK
348 USLDBG(usld_lock_pre(l
, pc
));
350 if(__improbable(hw_lock_to(&l
->interlock
, LockTimeOutTSC
) == 0)) {
351 boolean_t uslock_acquired
= FALSE
;
352 while (machine_timeout_suspended()) {
354 if ((uslock_acquired
= hw_lock_to(&l
->interlock
, LockTimeOutTSC
)))
358 if (uslock_acquired
== FALSE
) {
360 uintptr_t lowner
= (uintptr_t)l
->interlock
.lock_data
;
361 spinlock_timed_out
= l
;
362 lock_cpu
= spinlock_timeout_NMI(lowner
);
363 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l
, lowner
, current_thread(), lock_cpu
, (uintptr_t)l
->interlock
.lock_data
);
366 USLDBG(usld_lock_post(l
, pc
));
368 simple_lock((simple_lock_t
)l
);
374 * Release a usimple_lock.
376 * Returns with preemption enabled. Note
377 * that the hw_lock routines are responsible for
378 * maintaining preemption state.
384 #ifndef MACHINE_SIMPLE_LOCK
388 USLDBG(usld_unlock(l
, pc
));
389 hw_lock_unlock(&l
->interlock
);
391 simple_unlock_rwmb((simple_lock_t
)l
);
397 * Conditionally acquire a usimple_lock.
399 * On success, returns with preemption disabled.
400 * On failure, returns with preemption in the same state
401 * as when first invoked. Note that the hw_lock routines
402 * are responsible for maintaining preemption state.
404 * XXX No stats are gathered on a miss; I preserved this
405 * behavior from the original assembly-language code, but
406 * doesn't it make sense to log misses? XXX
412 #ifndef MACHINE_SIMPLE_LOCK
413 unsigned int success
;
417 USLDBG(usld_lock_try_pre(l
, pc
));
418 if ((success
= hw_lock_try(&l
->interlock
))) {
419 USLDBG(usld_lock_try_post(l
, pc
));
423 return(simple_lock_try((simple_lock_t
)l
));
429 * States of a usimple_lock. The default when initializing
430 * a usimple_lock is setting it up for debug checking.
432 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
433 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
434 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
435 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
436 #define USLOCK_CHECKING(l) (uslock_check && \
437 ((l)->debug.state & USLOCK_CHECKED))
440 * Trace activities of a particularly interesting lock.
442 void usl_trace(usimple_lock_t
, int, pc_t
, const char *);
446 * Initialize the debugging information contained
452 __unused
unsigned short tag
)
454 if (l
== USIMPLE_LOCK_NULL
)
455 panic("lock initialization: null lock pointer");
456 l
->lock_type
= USLOCK_TAG
;
457 l
->debug
.state
= uslock_check
? USLOCK_INITIALIZED
: 0;
458 l
->debug
.lock_cpu
= l
->debug
.unlock_cpu
= 0;
459 l
->debug
.lock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
460 l
->debug
.lock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
461 l
->debug
.duration
[0] = l
->debug
.duration
[1] = 0;
462 l
->debug
.unlock_cpu
= l
->debug
.unlock_cpu
= 0;
463 l
->debug
.unlock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
464 l
->debug
.unlock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
469 * These checks apply to all usimple_locks, not just
470 * those with USLOCK_CHECKED turned on.
473 usld_lock_common_checks(
477 if (l
== USIMPLE_LOCK_NULL
)
478 panic("%s: null lock pointer", caller
);
479 if (l
->lock_type
!= USLOCK_TAG
)
480 panic("%s: %p is not a usimple lock, 0x%x", caller
, l
, l
->lock_type
);
481 if (!(l
->debug
.state
& USLOCK_INIT
))
482 panic("%s: %p is not an initialized lock, 0x%x", caller
, l
, l
->debug
.state
);
483 return USLOCK_CHECKING(l
);
488 * Debug checks on a usimple_lock just before attempting
497 char caller
[] = "usimple_lock";
500 if (!usld_lock_common_checks(l
, caller
))
504 * Note that we have a weird case where we are getting a lock when we are]
505 * in the process of putting the system to sleep. We are running with no
506 * current threads, therefore we can't tell if we are trying to retake a lock
507 * we have or someone on the other processor has it. Therefore we just
508 * ignore this test if the locking thread is 0.
511 if ((l
->debug
.state
& USLOCK_TAKEN
) && l
->debug
.lock_thread
&&
512 l
->debug
.lock_thread
== (void *) current_thread()) {
513 printf("%s: lock %p already locked (at %p) by",
514 caller
, l
, l
->debug
.lock_pc
);
515 printf(" current thread %p (new attempt at pc %p)\n",
516 l
->debug
.lock_thread
, pc
);
519 mp_disable_preemption();
520 usl_trace(l
, cpu_number(), pc
, caller
);
521 mp_enable_preemption();
526 * Debug checks on a usimple_lock just after acquiring it.
528 * Pre-emption has been disabled at this point,
529 * so we are safe in using cpu_number.
537 char caller
[] = "successful usimple_lock";
540 if (!usld_lock_common_checks(l
, caller
))
543 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
544 panic("%s: lock %p became uninitialized",
546 if ((l
->debug
.state
& USLOCK_TAKEN
))
547 panic("%s: lock 0x%p became TAKEN by someone else",
550 mycpu
= cpu_number();
551 l
->debug
.lock_thread
= (void *)current_thread();
552 l
->debug
.state
|= USLOCK_TAKEN
;
553 l
->debug
.lock_pc
= pc
;
554 l
->debug
.lock_cpu
= mycpu
;
556 usl_trace(l
, mycpu
, pc
, caller
);
561 * Debug checks on a usimple_lock just before
562 * releasing it. Note that the caller has not
563 * yet released the hardware lock.
565 * Preemption is still disabled, so there's
566 * no problem using cpu_number.
574 char caller
[] = "usimple_unlock";
577 if (!usld_lock_common_checks(l
, caller
))
580 mycpu
= cpu_number();
582 if (!(l
->debug
.state
& USLOCK_TAKEN
))
583 panic("%s: lock 0x%p hasn't been taken",
585 if (l
->debug
.lock_thread
!= (void *) current_thread())
586 panic("%s: unlocking lock 0x%p, owned by thread %p",
587 caller
, l
, l
->debug
.lock_thread
);
588 if (l
->debug
.lock_cpu
!= mycpu
) {
589 printf("%s: unlocking lock 0x%p on cpu 0x%x",
591 printf(" (acquired on cpu 0x%x)\n", l
->debug
.lock_cpu
);
594 usl_trace(l
, mycpu
, pc
, caller
);
596 l
->debug
.unlock_thread
= l
->debug
.lock_thread
;
597 l
->debug
.lock_thread
= INVALID_PC
;
598 l
->debug
.state
&= ~USLOCK_TAKEN
;
599 l
->debug
.unlock_pc
= pc
;
600 l
->debug
.unlock_cpu
= mycpu
;
605 * Debug checks on a usimple_lock just before
606 * attempting to acquire it.
608 * Preemption isn't guaranteed to be disabled.
615 char caller
[] = "usimple_lock_try";
617 if (!usld_lock_common_checks(l
, caller
))
619 mp_disable_preemption();
620 usl_trace(l
, cpu_number(), pc
, caller
);
621 mp_enable_preemption();
626 * Debug checks on a usimple_lock just after
627 * successfully attempting to acquire it.
629 * Preemption has been disabled by the
630 * lock acquisition attempt, so it's safe
639 char caller
[] = "successful usimple_lock_try";
641 if (!usld_lock_common_checks(l
, caller
))
644 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
645 panic("%s: lock 0x%p became uninitialized",
647 if ((l
->debug
.state
& USLOCK_TAKEN
))
648 panic("%s: lock 0x%p became TAKEN by someone else",
651 mycpu
= cpu_number();
652 l
->debug
.lock_thread
= (void *) current_thread();
653 l
->debug
.state
|= USLOCK_TAKEN
;
654 l
->debug
.lock_pc
= pc
;
655 l
->debug
.lock_cpu
= mycpu
;
657 usl_trace(l
, mycpu
, pc
, caller
);
662 * For very special cases, set traced_lock to point to a
663 * specific lock of interest. The result is a series of
664 * XPRs showing lock operations on that lock. The lock_seq
665 * value is used to show the order of those operations.
667 usimple_lock_t traced_lock
;
668 unsigned int lock_seq
;
675 const char * op_name
)
677 if (traced_lock
== l
) {
679 "seq %d, cpu %d, %s @ %x\n",
680 (uintptr_t) lock_seq
, (uintptr_t) mycpu
,
681 (uintptr_t) op_name
, (uintptr_t) pc
, 0);
687 #endif /* USLOCK_DEBUG */
690 * Routine: lock_alloc
692 * Allocate a lock for external users who cannot
693 * hard-code the structure definition into their
695 * For now just use kalloc, but a zone is probably
706 if ((l
= (lock_t
*)kalloc(sizeof(lock_t
))) != 0)
707 lock_init(l
, can_sleep
, tag
, tag1
);
714 * Free a lock allocated for external users.
715 * For now just use kfree, but a zone is probably
722 kfree(l
, sizeof(lock_t
));
729 * Initialize a lock; required before use.
730 * Note that clients declare the "struct lock"
731 * variables and then initialize them, rather
732 * than getting a new one from this module.
738 __unused
unsigned short tag
,
739 __unused
unsigned short tag1
)
741 hw_lock_byte_init(&l
->lck_rw_interlock
);
742 l
->lck_rw_want_write
= FALSE
;
743 l
->lck_rw_want_upgrade
= FALSE
;
744 l
->lck_rw_shared_count
= 0;
745 l
->lck_rw_can_sleep
= can_sleep
;
747 l
->lck_rw_priv_excl
= 1;
748 l
->lck_r_waiting
= l
->lck_w_waiting
= 0;
753 * Sleep locks. These use the same data structure and algorithm
754 * as the spin locks, but the process sleeps while it is waiting
755 * for the lock. These work on uniprocessor systems.
758 #define DECREMENTER_TIMEOUT 1000000
764 lck_rw_lock_exclusive(l
);
771 (void) lck_rw_done(l
);
778 lck_rw_lock_shared(l
);
783 * Routine: lock_read_to_write
785 * Improves a read-only lock to one with
786 * write permission. If another reader has
787 * already requested an upgrade to a write lock,
788 * no lock is held upon return.
790 * Returns FALSE if the upgrade *failed*.
797 return lck_rw_lock_shared_to_exclusive(l
);
804 lck_rw_lock_exclusive_to_shared(l
);
810 * Routine: lck_rw_alloc_init
818 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0) {
819 bzero(lck
, sizeof(lck_rw_t
));
820 lck_rw_init(lck
, grp
, attr
);
827 * Routine: lck_rw_free
833 lck_rw_destroy(lck
, grp
);
834 kfree(lck
, sizeof(lck_rw_t
));
838 * Routine: lck_rw_init
846 lck_attr_t
*lck_attr
= (attr
!= LCK_ATTR_NULL
) ?
847 attr
: &LockDefaultLckAttr
;
849 hw_lock_byte_init(&lck
->lck_rw_interlock
);
850 lck
->lck_rw_want_write
= FALSE
;
851 lck
->lck_rw_want_upgrade
= FALSE
;
852 lck
->lck_rw_shared_count
= 0;
853 lck
->lck_rw_can_sleep
= TRUE
;
854 lck
->lck_r_waiting
= lck
->lck_w_waiting
= 0;
856 lck
->lck_rw_priv_excl
= ((lck_attr
->lck_attr_val
&
857 LCK_ATTR_RW_SHARED_PRIORITY
) == 0);
859 lck_grp_reference(grp
);
860 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
864 * Routine: lck_rw_destroy
871 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
)
873 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
874 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
875 lck_grp_deallocate(grp
);
880 * Sleep locks. These use the same data structure and algorithm
881 * as the spin locks, but the process sleeps while it is waiting
882 * for the lock. These work on uniprocessor systems.
885 #define DECREMENTER_TIMEOUT 1000000
887 #define RW_LOCK_READER_EVENT(x) \
888 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
890 #define RW_LOCK_WRITER_EVENT(x) \
891 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
894 * We disable interrupts while holding the RW interlock to prevent an
895 * interrupt from exacerbating hold time.
896 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
899 lck_interlock_lock(lck_rw_t
*lck
)
903 istate
= ml_set_interrupts_enabled(FALSE
);
904 hw_lock_byte_lock(&lck
->lck_rw_interlock
);
910 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
912 hw_lock_byte_unlock(&lck
->lck_rw_interlock
);
913 ml_set_interrupts_enabled(istate
);
917 * This inline is used when busy-waiting for an rw lock.
918 * If interrupts were disabled when the lock primitive was called,
919 * we poll the IPI handler for pending tlb flushes.
920 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
923 lck_rw_lock_pause(boolean_t interrupts_enabled
)
925 if (!interrupts_enabled
)
926 handle_pending_TLB_flushes();
932 * compute the deadline to spin against when
933 * waiting for a change of state on a lck_rw_t
935 static inline uint64_t
936 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
938 if (lck
->lck_rw_can_sleep
) {
939 if (lck
->lck_r_waiting
|| lck
->lck_w_waiting
|| lck
->lck_rw_shared_count
> machine_info
.max_cpus
) {
941 * there are already threads waiting on this lock... this
942 * implies that they have spun beyond their deadlines waiting for
943 * the desired state to show up so we will not bother spinning at this time...
945 * the current number of threads sharing this lock exceeds our capacity to run them
946 * concurrently and since all states we're going to spin for require the rw_shared_count
947 * to be at 0, we'll not bother spinning since the latency for this to happen is
950 return (mach_absolute_time());
952 return (mach_absolute_time() + MutexSpin
);
954 return (mach_absolute_time() + (100000LL * 1000000000LL));
959 * Routine: lck_rw_lock_exclusive
962 lck_rw_lock_exclusive_gen(
965 uint64_t deadline
= 0;
969 wait_result_t res
= 0;
970 boolean_t istate
= -1;
973 boolean_t dtrace_ls_initialized
= FALSE
;
974 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
975 uint64_t wait_interval
= 0;
976 int readers_at_sleep
= 0;
980 * Try to acquire the lck_rw_want_write bit.
982 while ( !lck_rw_grab_want(lck
)) {
985 if (dtrace_ls_initialized
== FALSE
) {
986 dtrace_ls_initialized
= TRUE
;
987 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
988 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
989 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
990 if (dtrace_ls_enabled
) {
992 * Either sleeping or spinning is happening,
993 * start a timing of our delay interval now.
995 readers_at_sleep
= lck
->lck_rw_shared_count
;
996 wait_interval
= mach_absolute_time();
1001 istate
= ml_get_interrupts_enabled();
1003 deadline
= lck_rw_deadline_for_spin(lck
);
1005 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1007 while (((gotlock
= lck_rw_grab_want(lck
)) == 0) && mach_absolute_time() < deadline
)
1008 lck_rw_lock_pause(istate
);
1010 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, (int)lck
, 0, 0, gotlock
, 0);
1015 * if we get here, the deadline has expired w/o us
1016 * being able to grab the lock exclusively
1017 * check to see if we're allowed to do a thread_block
1019 if (lck
->lck_rw_can_sleep
) {
1021 istate
= lck_interlock_lock(lck
);
1023 if (lck
->lck_rw_want_write
) {
1025 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1027 lck
->lck_w_waiting
= TRUE
;
1029 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1030 lck_interlock_unlock(lck
, istate
);
1032 if (res
== THREAD_WAITING
) {
1033 res
= thread_block(THREAD_CONTINUE_NULL
);
1036 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, (int)lck
, res
, slept
, 0, 0);
1038 lck
->lck_rw_want_write
= TRUE
;
1039 lck_interlock_unlock(lck
, istate
);
1045 * Wait for readers (and upgrades) to finish...
1046 * the test for these conditions must be done simultaneously with
1047 * a check of the interlock not being held since
1048 * the rw_shared_count will drop to 0 first and then want_upgrade
1049 * will be set to 1 in the shared_to_exclusive scenario... those
1050 * adjustments are done behind the interlock and represent an
1051 * atomic change in state and must be considered as such
1052 * however, once we see the read count at 0, the want_upgrade not set
1053 * and the interlock not held, we are safe to proceed
1055 while (lck_rw_held_read_or_upgrade(lck
)) {
1059 * Either sleeping or spinning is happening, start
1060 * a timing of our delay interval now. If we set it
1061 * to -1 we don't have accurate data so we cannot later
1062 * decide to record a dtrace spin or sleep event.
1064 if (dtrace_ls_initialized
== FALSE
) {
1065 dtrace_ls_initialized
= TRUE
;
1066 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1067 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1068 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1069 if (dtrace_ls_enabled
) {
1071 * Either sleeping or spinning is happening,
1072 * start a timing of our delay interval now.
1074 readers_at_sleep
= lck
->lck_rw_shared_count
;
1075 wait_interval
= mach_absolute_time();
1080 istate
= ml_get_interrupts_enabled();
1082 deadline
= lck_rw_deadline_for_spin(lck
);
1084 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1086 while ((lockheld
= lck_rw_held_read_or_upgrade(lck
)) && mach_absolute_time() < deadline
)
1087 lck_rw_lock_pause(istate
);
1089 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, (int)lck
, 0, 0, lockheld
, 0);
1094 * if we get here, the deadline has expired w/o us
1095 * being able to grab the lock exclusively
1096 * check to see if we're allowed to do a thread_block
1098 if (lck
->lck_rw_can_sleep
) {
1100 istate
= lck_interlock_lock(lck
);
1102 if (lck
->lck_rw_shared_count
!= 0 || lck
->lck_rw_want_upgrade
) {
1103 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1105 lck
->lck_w_waiting
= TRUE
;
1107 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1108 lck_interlock_unlock(lck
, istate
);
1110 if (res
== THREAD_WAITING
) {
1111 res
= thread_block(THREAD_CONTINUE_NULL
);
1114 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, (int)lck
, res
, slept
, 0, 0);
1116 lck_interlock_unlock(lck
, istate
);
1118 * must own the lock now, since we checked for
1119 * readers or upgrade owner behind the interlock
1120 * no need for a call to 'lck_rw_held_read_or_upgrade'
1129 * Decide what latencies we suffered that are Dtrace events.
1130 * If we have set wait_interval, then we either spun or slept.
1131 * At least we get out from under the interlock before we record
1132 * which is the best we can do here to minimize the impact
1134 * If we have set wait_interval to -1, then dtrace was not enabled when we
1135 * started sleeping/spinning so we don't record this event.
1137 if (dtrace_ls_enabled
== TRUE
) {
1139 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN
, lck
,
1140 mach_absolute_time() - wait_interval
, 1);
1143 * For the blocking case, we also record if when we blocked
1144 * it was held for read or write, and how many readers.
1145 * Notice that above we recorded this before we dropped
1146 * the interlock so the count is accurate.
1148 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK
, lck
,
1149 mach_absolute_time() - wait_interval
, 1,
1150 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1153 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lck
, 1);
1159 * Routine: lck_rw_done_gen
1161 * called from the assembly language wrapper...
1162 * prior_lock_state is the value in the 1st
1163 * word of the lock at the time of a successful
1164 * atomic compare and exchange with the new value...
1165 * it represents the state of the lock before we
1166 * decremented the rw_shared_count or cleared either
1167 * rw_want_upgrade or rw_want_write and
1168 * the lck_x_waiting bits... since the wrapper
1169 * routine has already changed the state atomically,
1170 * we just need to decide if we should
1171 * wake up anyone and what value to return... we do
1172 * this by examining the state of the lock before
1178 int prior_lock_state
)
1181 lck_rw_type_t lock_type
;
1184 * prior_lock state is a snapshot of the 1st word of the
1185 * lock in question... we'll fake up a pointer to it
1186 * and carefully not access anything beyond whats defined
1187 * in the first word of a lck_rw_t
1189 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1191 if (fake_lck
->lck_rw_shared_count
<= 1) {
1192 if (fake_lck
->lck_w_waiting
)
1193 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1195 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1196 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1198 if (fake_lck
->lck_rw_shared_count
)
1199 lock_type
= LCK_RW_TYPE_SHARED
;
1201 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1204 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1212 * Routine: lck_rw_unlock
1217 lck_rw_type_t lck_rw_type
)
1219 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1220 lck_rw_unlock_shared(lck
);
1221 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1222 lck_rw_unlock_exclusive(lck
);
1224 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type
);
1229 * Routine: lck_rw_unlock_shared
1232 lck_rw_unlock_shared(
1237 ret
= lck_rw_done(lck
);
1239 if (ret
!= LCK_RW_TYPE_SHARED
)
1240 panic("lck_rw_unlock(): lock held in mode: %d\n", ret
);
1245 * Routine: lck_rw_unlock_exclusive
1248 lck_rw_unlock_exclusive(
1253 ret
= lck_rw_done(lck
);
1255 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
)
1256 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret
);
1261 * Routine: lck_rw_lock
1266 lck_rw_type_t lck_rw_type
)
1268 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1269 lck_rw_lock_shared(lck
);
1270 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1271 lck_rw_lock_exclusive(lck
);
1273 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type
);
1278 * Routine: lck_rw_lock_shared_gen
1280 * assembly fast path code has determined that this lock
1281 * is held exclusively... this is where we spin/block
1282 * until we can acquire the lock in the shared mode
1285 lck_rw_lock_shared_gen(
1288 uint64_t deadline
= 0;
1291 wait_result_t res
= 0;
1292 boolean_t istate
= -1;
1295 uint64_t wait_interval
= 0;
1296 int readers_at_sleep
= 0;
1297 boolean_t dtrace_ls_initialized
= FALSE
;
1298 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1301 while ( !lck_rw_grab_shared(lck
)) {
1304 if (dtrace_ls_initialized
== FALSE
) {
1305 dtrace_ls_initialized
= TRUE
;
1306 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1307 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1308 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1309 if (dtrace_ls_enabled
) {
1311 * Either sleeping or spinning is happening,
1312 * start a timing of our delay interval now.
1314 readers_at_sleep
= lck
->lck_rw_shared_count
;
1315 wait_interval
= mach_absolute_time();
1320 istate
= ml_get_interrupts_enabled();
1322 deadline
= lck_rw_deadline_for_spin(lck
);
1324 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1325 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1327 while (((gotlock
= lck_rw_grab_shared(lck
)) == 0) && mach_absolute_time() < deadline
)
1328 lck_rw_lock_pause(istate
);
1330 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1331 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1336 * if we get here, the deadline has expired w/o us
1337 * being able to grab the lock for read
1338 * check to see if we're allowed to do a thread_block
1340 if (lck
->lck_rw_can_sleep
) {
1342 istate
= lck_interlock_lock(lck
);
1344 if ((lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
) &&
1345 ((lck
->lck_rw_shared_count
== 0) || lck
->lck_rw_priv_excl
)) {
1347 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1348 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1350 lck
->lck_r_waiting
= TRUE
;
1352 res
= assert_wait(RW_LOCK_READER_EVENT(lck
), THREAD_UNINT
);
1353 lck_interlock_unlock(lck
, istate
);
1355 if (res
== THREAD_WAITING
) {
1356 res
= thread_block(THREAD_CONTINUE_NULL
);
1359 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1360 (int)lck
, res
, slept
, 0, 0);
1362 lck
->lck_rw_shared_count
++;
1363 lck_interlock_unlock(lck
, istate
);
1370 if (dtrace_ls_enabled
== TRUE
) {
1372 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1374 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1375 mach_absolute_time() - wait_interval
, 0,
1376 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1379 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1385 * Routine: lck_rw_lock_shared_to_exclusive_failure
1387 * assembly fast path code has already dropped our read
1388 * count and determined that someone else owns 'lck_rw_want_upgrade'
1389 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1390 * all we need to do here is determine if a wakeup is needed
1393 lck_rw_lock_shared_to_exclusive_failure(
1395 int prior_lock_state
)
1400 * prior_lock state is a snapshot of the 1st word of the
1401 * lock in question... we'll fake up a pointer to it
1402 * and carefully not access anything beyond whats defined
1403 * in the first word of a lck_rw_t
1405 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1407 if (fake_lck
->lck_w_waiting
&& fake_lck
->lck_rw_shared_count
== 1) {
1409 * Someone else has requested upgrade.
1410 * Since we've released the read lock, wake
1411 * him up if he's blocked waiting
1413 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1415 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1416 (int)lck
, lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1423 * Routine: lck_rw_lock_shared_to_exclusive_failure
1425 * assembly fast path code has already dropped our read
1426 * count and successfully acquired 'lck_rw_want_upgrade'
1427 * we just need to wait for the rest of the readers to drain
1428 * and then we can return as the exclusive holder of this lock
1431 lck_rw_lock_shared_to_exclusive_success(
1434 uint64_t deadline
= 0;
1436 int still_shared
= 0;
1438 boolean_t istate
= -1;
1441 uint64_t wait_interval
= 0;
1442 int readers_at_sleep
= 0;
1443 boolean_t dtrace_ls_initialized
= FALSE
;
1444 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1447 while (lck
->lck_rw_shared_count
!= 0) {
1450 if (dtrace_ls_initialized
== FALSE
) {
1451 dtrace_ls_initialized
= TRUE
;
1452 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1453 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1454 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1455 if (dtrace_ls_enabled
) {
1457 * Either sleeping or spinning is happening,
1458 * start a timing of our delay interval now.
1460 readers_at_sleep
= lck
->lck_rw_shared_count
;
1461 wait_interval
= mach_absolute_time();
1466 istate
= ml_get_interrupts_enabled();
1468 deadline
= lck_rw_deadline_for_spin(lck
);
1470 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1471 (int)lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1473 while ((still_shared
= lck
->lck_rw_shared_count
) && mach_absolute_time() < deadline
)
1474 lck_rw_lock_pause(istate
);
1476 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1477 (int)lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1482 * if we get here, the deadline has expired w/o
1483 * the rw_shared_count having drained to 0
1484 * check to see if we're allowed to do a thread_block
1486 if (lck
->lck_rw_can_sleep
) {
1488 istate
= lck_interlock_lock(lck
);
1490 if (lck
->lck_rw_shared_count
!= 0) {
1491 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1492 (int)lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1494 lck
->lck_w_waiting
= TRUE
;
1496 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1497 lck_interlock_unlock(lck
, istate
);
1499 if (res
== THREAD_WAITING
) {
1500 res
= thread_block(THREAD_CONTINUE_NULL
);
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1504 (int)lck
, res
, slept
, 0, 0);
1506 lck_interlock_unlock(lck
, istate
);
1513 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1515 if (dtrace_ls_enabled
== TRUE
) {
1517 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1519 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lck
,
1520 mach_absolute_time() - wait_interval
, 1,
1521 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1524 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lck
, 1);
1531 * Routine: lck_rw_lock_exclusive_to_shared
1533 * assembly fast path has already dropped
1534 * our exclusive state and bumped lck_rw_shared_count
1535 * all we need to do here is determine if anyone
1536 * needs to be awakened.
1539 lck_rw_lock_exclusive_to_shared_gen(
1541 int prior_lock_state
)
1546 * prior_lock state is a snapshot of the 1st word of the
1547 * lock in question... we'll fake up a pointer to it
1548 * and carefully not access anything beyond whats defined
1549 * in the first word of a lck_rw_t
1551 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1553 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1554 (int)lck
, fake_lck
->lck_rw_want_write
, fake_lck
->lck_rw_want_upgrade
, 0, 0);
1557 * don't wake up anyone waiting to take the lock exclusively
1558 * since we hold a read count... when the read count drops to 0,
1559 * the writers will be woken.
1561 * wake up any waiting readers if we don't have any writers waiting,
1562 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1564 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1565 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1568 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1571 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1577 * Routine: lck_rw_try_lock
1582 lck_rw_type_t lck_rw_type
)
1584 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1585 return(lck_rw_try_lock_shared(lck
));
1586 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1587 return(lck_rw_try_lock_exclusive(lck
));
1589 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type
);
1600 case LCK_RW_ASSERT_SHARED
:
1601 if (lck
->lck_rw_shared_count
!= 0) {
1605 case LCK_RW_ASSERT_EXCLUSIVE
:
1606 if ((lck
->lck_rw_want_write
||
1607 lck
->lck_rw_want_upgrade
) &&
1608 lck
->lck_rw_shared_count
== 0) {
1612 case LCK_RW_ASSERT_HELD
:
1613 if (lck
->lck_rw_want_write
||
1614 lck
->lck_rw_want_upgrade
||
1615 lck
->lck_rw_shared_count
!= 0) {
1623 panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck
, type
, *(uint32_t *)lck
);
1627 extern zone_t lck_mtx_zone
;
1630 * Routine: lck_mtx_alloc_init
1639 if ((lck
= (lck_mtx_t
*)zalloc(lck_mtx_zone
)) != 0)
1640 lck_mtx_init(lck
, grp
, attr
);
1642 if ((lck
= (lck_mtx_t
*)kalloc(sizeof(lck_mtx_t
))) != 0)
1643 lck_mtx_init(lck
, grp
, attr
);
1649 * Routine: lck_mtx_free
1656 lck_mtx_destroy(lck
, grp
);
1658 zfree(lck_mtx_zone
, lck
);
1660 kfree(lck
, sizeof(lck_mtx_t
));
1665 * Routine: lck_mtx_ext_init
1673 bzero((void *)lck
, sizeof(lck_mtx_ext_t
));
1675 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
1676 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
1677 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
1680 lck
->lck_mtx_grp
= grp
;
1682 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
)
1683 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
1685 lck
->lck_mtx
.lck_mtx_is_ext
= 1;
1686 #if defined(__x86_64__)
1687 lck
->lck_mtx
.lck_mtx_sw
.lck_mtxd
.lck_mtxd_pad32
= 0xFFFFFFFF;
1692 * Routine: lck_mtx_init
1700 lck_mtx_ext_t
*lck_ext
;
1701 lck_attr_t
*lck_attr
;
1703 if (attr
!= LCK_ATTR_NULL
)
1706 lck_attr
= &LockDefaultLckAttr
;
1708 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
1709 if ((lck_ext
= (lck_mtx_ext_t
*)kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
1710 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
1711 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
1712 lck
->lck_mtx_ptr
= lck_ext
;
1715 lck
->lck_mtx_owner
= 0;
1716 lck
->lck_mtx_state
= 0;
1718 #if defined(__x86_64__)
1719 lck
->lck_mtx_sw
.lck_mtxd
.lck_mtxd_pad32
= 0xFFFFFFFF;
1721 lck_grp_reference(grp
);
1722 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
1726 * Routine: lck_mtx_init_ext
1731 lck_mtx_ext_t
*lck_ext
,
1735 lck_attr_t
*lck_attr
;
1737 if (attr
!= LCK_ATTR_NULL
)
1740 lck_attr
= &LockDefaultLckAttr
;
1742 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
1743 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
1744 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
1745 lck
->lck_mtx_ptr
= lck_ext
;
1747 lck
->lck_mtx_owner
= 0;
1748 lck
->lck_mtx_state
= 0;
1750 #if defined(__x86_64__)
1751 lck
->lck_mtx_sw
.lck_mtxd
.lck_mtxd_pad32
= 0xFFFFFFFF;
1754 lck_grp_reference(grp
);
1755 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
1759 * Routine: lck_mtx_destroy
1766 boolean_t lck_is_indirect
;
1768 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
)
1770 lck_is_indirect
= (lck
->lck_mtx_tag
== LCK_MTX_TAG_INDIRECT
);
1772 lck_mtx_lock_mark_destroyed(lck
);
1774 if (lck_is_indirect
)
1775 kfree(lck
->lck_mtx_ptr
, sizeof(lck_mtx_ext_t
));
1776 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
1777 lck_grp_deallocate(grp
);
1782 #define LCK_MTX_LCK_WAIT_CODE 0x20
1783 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
1784 #define LCK_MTX_LCK_SPIN_CODE 0x22
1785 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1786 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
1790 * Routine: lck_mtx_unlock_wakeup_x86
1792 * Invoked on unlock when there is
1793 * contention (i.e. the assembly routine sees that
1794 * that mutex->lck_mtx_waiters != 0 or
1795 * that mutex->lck_mtx_promoted != 0...
1797 * neither the mutex or interlock is held
1800 lck_mtx_unlock_wakeup_x86 (
1802 int prior_lock_state
)
1807 * prior_lock state is a snapshot of the 2nd word of the
1808 * lock in question... we'll fake up a lock with the bits
1809 * copied into place and carefully not access anything
1810 * beyond whats defined in the second word of a lck_mtx_t
1812 fake_lck
.lck_mtx_state
= prior_lock_state
;
1814 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_START
,
1815 mutex
, fake_lck
.lck_mtx_promoted
, fake_lck
.lck_mtx_waiters
, fake_lck
.lck_mtx_pri
, 0);
1817 if (__probable(fake_lck
.lck_mtx_waiters
)) {
1819 if (fake_lck
.lck_mtx_waiters
> 1)
1820 thread_wakeup_one_with_pri((event_t
)(((unsigned int*)mutex
)+(sizeof(lck_mtx_t
)-1)/sizeof(unsigned int)), fake_lck
.lck_mtx_pri
);
1822 thread_wakeup_one((event_t
)(((unsigned int*)mutex
)+(sizeof(lck_mtx_t
)-1)/sizeof(unsigned int)));
1825 if (__improbable(fake_lck
.lck_mtx_promoted
)) {
1826 thread_t thread
= current_thread();
1829 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_DEMOTE_CODE
) | DBG_FUNC_NONE
,
1830 thread_tid(thread
), thread
->promotions
, thread
->sched_flags
& TH_SFLAG_PROMOTED
, 0, 0);
1832 if (thread
->promotions
> 0) {
1833 spl_t s
= splsched();
1835 thread_lock(thread
);
1837 if (--thread
->promotions
== 0 && (thread
->sched_flags
& TH_SFLAG_PROMOTED
)) {
1839 thread
->sched_flags
&= ~TH_SFLAG_PROMOTED
;
1841 if (thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) {
1842 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_DEMOTE
) | DBG_FUNC_NONE
,
1843 thread
->sched_pri
, DEPRESSPRI
, 0, mutex
, 0);
1845 set_sched_pri(thread
, DEPRESSPRI
);
1848 if (thread
->priority
< thread
->sched_pri
) {
1849 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_DEMOTE
) | DBG_FUNC_NONE
,
1850 thread
->sched_pri
, thread
->priority
, 0, mutex
, 0);
1852 SCHED(compute_priority
)(thread
, FALSE
);
1856 thread_unlock(thread
);
1860 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_END
,
1861 mutex
, 0, mutex
->lck_mtx_waiters
, 0, 0);
1866 * Routine: lck_mtx_lock_acquire_x86
1868 * Invoked on acquiring the mutex when there is
1869 * contention (i.e. the assembly routine sees that
1870 * that mutex->lck_mtx_waiters != 0 or
1871 * thread->was_promoted_on_wakeup != 0)...
1873 * mutex is owned... interlock is held... preemption is disabled
1876 lck_mtx_lock_acquire_x86(
1883 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_START
,
1884 mutex
, thread
->was_promoted_on_wakeup
, mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
1886 if (mutex
->lck_mtx_waiters
)
1887 priority
= mutex
->lck_mtx_pri
;
1891 thread
= (thread_t
)mutex
->lck_mtx_owner
; /* faster then current_thread() */
1893 if (thread
->sched_pri
< priority
|| thread
->was_promoted_on_wakeup
) {
1895 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_PROMOTE
) | DBG_FUNC_NONE
,
1896 thread
->sched_pri
, priority
, thread
->was_promoted_on_wakeup
, mutex
, 0);
1899 thread_lock(thread
);
1901 if (thread
->sched_pri
< priority
)
1902 set_sched_pri(thread
, priority
);
1904 if (mutex
->lck_mtx_promoted
== 0) {
1905 mutex
->lck_mtx_promoted
= 1;
1907 thread
->promotions
++;
1908 thread
->sched_flags
|= TH_SFLAG_PROMOTED
;
1910 thread
->was_promoted_on_wakeup
= 0;
1912 thread_unlock(thread
);
1915 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_END
,
1916 mutex
, 0, mutex
->lck_mtx_waiters
, 0, 0);
1922 * Routine: lck_mtx_lock_spinwait_x86
1924 * Invoked trying to acquire a mutex when there is contention but
1925 * the holder is running on another processor. We spin for up to a maximum
1926 * time waiting for the lock to be released.
1928 * Called with the interlock unlocked.
1929 * returns 0 if mutex acquired
1930 * returns 1 if we spun
1931 * returns 2 if we didn't spin due to the holder not running
1934 lck_mtx_lock_spinwait_x86(
1943 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
1944 mutex
, mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, 0, 0);
1946 deadline
= mach_absolute_time() + MutexSpin
;
1950 * - mutex is locked, and
1951 * - its locked as a spin lock, and
1952 * - owner is running on another processor, and
1953 * - owner (processor) is not idling, and
1954 * - we haven't spun for long enough.
1957 if (__probable(lck_mtx_lock_grab_mutex(mutex
))) {
1961 if ((holder
= (thread_t
) mutex
->lck_mtx_owner
) != NULL
) {
1963 if ( !(holder
->machine
.specFlags
& OnProc
) ||
1964 (holder
->state
& TH_IDLE
)) {
1974 } while (mach_absolute_time() < deadline
);
1979 * We've already kept a count via deadline of how long we spun.
1980 * If dtrace is active, then we compute backwards to decide how
1983 * Note that we record a different probe id depending on whether
1984 * this is a direct or indirect mutex. This allows us to
1985 * penalize only lock groups that have debug/stats enabled
1986 * with dtrace processing if desired.
1988 if (__probable(mutex
->lck_mtx_is_ext
== 0)) {
1989 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, mutex
,
1990 mach_absolute_time() - (deadline
- MutexSpin
));
1992 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, mutex
,
1993 mach_absolute_time() - (deadline
- MutexSpin
));
1995 /* The lockstat acquire event is recorded by the assembly code beneath us. */
1998 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
1999 mutex
, mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, retval
, 0);
2007 * Routine: lck_mtx_lock_wait_x86
2009 * Invoked in order to wait on contention.
2011 * Called with the interlock locked and
2012 * preemption disabled...
2013 * returns it unlocked and with preemption enabled
2016 lck_mtx_lock_wait_x86 (
2019 thread_t self
= current_thread();
2024 uint64_t sleep_start
= 0;
2026 if (lockstat_probemap
[LS_LCK_MTX_LOCK_BLOCK
] || lockstat_probemap
[LS_LCK_MTX_EXT_LOCK_BLOCK
]) {
2027 sleep_start
= mach_absolute_time();
2030 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_START
,
2031 mutex
, mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
2033 priority
= self
->sched_pri
;
2035 if (priority
< self
->priority
)
2036 priority
= self
->priority
;
2037 if (priority
< BASEPRI_DEFAULT
)
2038 priority
= BASEPRI_DEFAULT
;
2040 if (mutex
->lck_mtx_waiters
== 0 || priority
> mutex
->lck_mtx_pri
)
2041 mutex
->lck_mtx_pri
= priority
;
2042 mutex
->lck_mtx_waiters
++;
2044 if ( (holder
= (thread_t
)mutex
->lck_mtx_owner
) &&
2045 holder
->sched_pri
< mutex
->lck_mtx_pri
) {
2048 thread_lock(holder
);
2050 if (holder
->sched_pri
< mutex
->lck_mtx_pri
) {
2051 KERNEL_DEBUG_CONSTANT(
2052 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_PROMOTE
) | DBG_FUNC_NONE
,
2053 holder
->sched_pri
, priority
, thread_tid(holder
), mutex
, 0);
2055 set_sched_pri(holder
, priority
);
2057 if (mutex
->lck_mtx_promoted
== 0) {
2058 holder
->promotions
++;
2059 holder
->sched_flags
|= TH_SFLAG_PROMOTED
;
2061 mutex
->lck_mtx_promoted
= 1;
2064 thread_unlock(holder
);
2067 assert_wait((event_t
)(((unsigned int*)mutex
)+((sizeof(lck_mtx_t
)-1)/sizeof(unsigned int))), THREAD_UNINT
);
2069 lck_mtx_ilk_unlock(mutex
);
2071 thread_block(THREAD_CONTINUE_NULL
);
2073 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_END
,
2074 mutex
, mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
2078 * Record the Dtrace lockstat probe for blocking, block time
2079 * measured from when we were entered.
2082 if (mutex
->lck_mtx_is_ext
== 0) {
2083 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK
, mutex
,
2084 mach_absolute_time() - sleep_start
);
2086 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK
, mutex
,
2087 mach_absolute_time() - sleep_start
);