2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Locking primitives implementation
65 #include <mach_ldebug.h>
67 #include <kern/lock.h>
68 #include <kern/locks.h>
69 #include <kern/kalloc.h>
70 #include <kern/misc_protos.h>
71 #include <kern/thread.h>
72 #include <kern/processor.h>
73 #include <kern/cpu_data.h>
74 #include <kern/cpu_number.h>
75 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
81 #include <ddb/db_command.h>
82 #include <ddb/db_output.h>
83 #include <ddb/db_sym.h>
84 #include <ddb/db_print.h>
86 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
87 #include <machine/machine_cpu.h>
90 #include <sys/kdebug.h>
93 * We need only enough declarations from the BSD-side to be able to
94 * test if our probe is active, and to call __dtrace_probe(). Setting
95 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
98 #define NEED_DTRACE_DEFS
99 #include <../bsd/sys/lockstat.h>
102 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
103 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
104 #define LCK_RW_LCK_SHARED_CODE 0x102
105 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
106 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
107 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
109 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
110 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
111 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
112 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
113 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
114 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
115 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
116 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
119 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
121 unsigned int LcksOpts
=0;
126 void db_print_simple_lock(
128 #endif /* MACH_KDB */
133 * Perform simple lock checks.
135 int uslock_check
= 1;
136 int max_lock_loops
= 100000000;
137 decl_simple_lock_data(extern , printf_lock
)
138 decl_simple_lock_data(extern , panic_lock
)
139 #endif /* USLOCK_DEBUG */
143 * We often want to know the addresses of the callers
144 * of the various lock routines. However, this information
145 * is only used for debugging and statistics.
148 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
149 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
151 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
152 #define DECL_PC(pc) pc_t pc;
153 #else /* ANY_LOCK_DEBUG */
157 * Eliminate lint complaints about unused local pc variables.
159 #define OBTAIN_PC(pc) ++pc
161 #define OBTAIN_PC(pc)
163 #endif /* USLOCK_DEBUG */
167 * Portable lock package implementation of usimple_locks.
171 #define USLDBG(stmt) stmt
172 void usld_lock_init(usimple_lock_t
, unsigned short);
173 void usld_lock_pre(usimple_lock_t
, pc_t
);
174 void usld_lock_post(usimple_lock_t
, pc_t
);
175 void usld_unlock(usimple_lock_t
, pc_t
);
176 void usld_lock_try_pre(usimple_lock_t
, pc_t
);
177 void usld_lock_try_post(usimple_lock_t
, pc_t
);
178 int usld_lock_common_checks(usimple_lock_t
, char *);
179 #else /* USLOCK_DEBUG */
181 #endif /* USLOCK_DEBUG */
184 extern int lck_rw_grab_want(lck_rw_t
*lck
);
185 extern int lck_rw_grab_shared(lck_rw_t
*lck
);
186 extern int lck_rw_held_read_or_upgrade(lck_rw_t
*lck
);
190 * Forward definitions
193 void lck_rw_lock_shared_gen(
196 void lck_rw_lock_exclusive_gen(
199 boolean_t
lck_rw_lock_shared_to_exclusive_success(
202 boolean_t
lck_rw_lock_shared_to_exclusive_failure(
204 int prior_lock_state
);
206 void lck_rw_lock_exclusive_to_shared_gen(
208 int prior_lock_state
);
210 lck_rw_type_t
lck_rw_done_gen(
212 int prior_lock_state
);
216 * Routine: lck_spin_alloc_init
225 if ((lck
= (lck_spin_t
*)kalloc(sizeof(lck_spin_t
))) != 0)
226 lck_spin_init(lck
, grp
, attr
);
232 * Routine: lck_spin_free
239 lck_spin_destroy(lck
, grp
);
240 kfree(lck
, sizeof(lck_spin_t
));
244 * Routine: lck_spin_init
250 __unused lck_attr_t
*attr
)
252 usimple_lock_init((usimple_lock_t
) lck
, 0);
253 lck_grp_reference(grp
);
254 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
258 * Routine: lck_spin_destroy
265 if (lck
->interlock
== LCK_SPIN_TAG_DESTROYED
)
267 lck
->interlock
= LCK_SPIN_TAG_DESTROYED
;
268 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
269 lck_grp_deallocate(grp
);
274 * Routine: lck_spin_lock
280 usimple_lock((usimple_lock_t
) lck
);
284 * Routine: lck_spin_unlock
290 usimple_unlock((usimple_lock_t
) lck
);
295 * Routine: lck_spin_try_lock
301 return((boolean_t
)usimple_lock_try((usimple_lock_t
) lck
));
305 * Initialize a usimple_lock.
307 * No change in preemption state.
312 __unused
unsigned short tag
)
314 #ifndef MACHINE_SIMPLE_LOCK
315 USLDBG(usld_lock_init(l
, tag
));
316 hw_lock_init(&l
->interlock
);
318 simple_lock_init((simple_lock_t
)l
,tag
);
322 volatile uint32_t spinlock_owner_cpu
= ~0;
323 volatile usimple_lock_t spinlock_timed_out
;
325 static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr
) {
329 for (i
= 0; i
< real_ncpus
; i
++) {
330 if ((uintptr_t)cpu_data_ptr
[i
]->cpu_active_thread
== thread_addr
) {
331 spinlock_owner_cpu
= i
;
332 if ((uint32_t)cpu_number() == i
)
334 cpu_datap(i
)->cpu_NMI_acknowledged
= FALSE
;
335 cpu_NMI_interrupt(i
);
336 deadline
= mach_absolute_time() + (LockTimeOut
* 2);
337 while (mach_absolute_time() < deadline
&& cpu_datap(i
)->cpu_NMI_acknowledged
== FALSE
)
343 return spinlock_owner_cpu
;
347 * Acquire a usimple_lock.
349 * Returns with preemption disabled. Note
350 * that the hw_lock routines are responsible for
351 * maintaining preemption state.
357 #ifndef MACHINE_SIMPLE_LOCK
361 USLDBG(usld_lock_pre(l
, pc
));
362 /* Try to get the lock with a timeout */
363 if(!hw_lock_to(&l
->interlock
, LockTimeOutTSC
)) {
364 boolean_t uslock_acquired
= FALSE
;
365 while (machine_timeout_suspended()) {
367 if ((uslock_acquired
= hw_lock_to(&l
->interlock
, LockTimeOutTSC
)))
370 if (uslock_acquired
== FALSE
) {
372 spinlock_timed_out
= l
;
373 lock_cpu
= spinlock_timeout_NMI((uintptr_t)l
->interlock
.lock_data
);
374 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l
, (uintptr_t)l
->interlock
.lock_data
, current_thread(), lock_cpu
);
377 USLDBG(usld_lock_post(l
, pc
));
379 simple_lock((simple_lock_t
)l
);
385 * Release a usimple_lock.
387 * Returns with preemption enabled. Note
388 * that the hw_lock routines are responsible for
389 * maintaining preemption state.
395 #ifndef MACHINE_SIMPLE_LOCK
399 USLDBG(usld_unlock(l
, pc
));
400 hw_lock_unlock(&l
->interlock
);
402 simple_unlock_rwmb((simple_lock_t
)l
);
408 * Conditionally acquire a usimple_lock.
410 * On success, returns with preemption disabled.
411 * On failure, returns with preemption in the same state
412 * as when first invoked. Note that the hw_lock routines
413 * are responsible for maintaining preemption state.
415 * XXX No stats are gathered on a miss; I preserved this
416 * behavior from the original assembly-language code, but
417 * doesn't it make sense to log misses? XXX
423 #ifndef MACHINE_SIMPLE_LOCK
424 unsigned int success
;
428 USLDBG(usld_lock_try_pre(l
, pc
));
429 if ((success
= hw_lock_try(&l
->interlock
))) {
430 USLDBG(usld_lock_try_post(l
, pc
));
434 return(simple_lock_try((simple_lock_t
)l
));
440 * States of a usimple_lock. The default when initializing
441 * a usimple_lock is setting it up for debug checking.
443 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
444 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
445 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
446 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
447 #define USLOCK_CHECKING(l) (uslock_check && \
448 ((l)->debug.state & USLOCK_CHECKED))
451 * Trace activities of a particularly interesting lock.
453 void usl_trace(usimple_lock_t
, int, pc_t
, const char *);
457 * Initialize the debugging information contained
463 __unused
unsigned short tag
)
465 if (l
== USIMPLE_LOCK_NULL
)
466 panic("lock initialization: null lock pointer");
467 l
->lock_type
= USLOCK_TAG
;
468 l
->debug
.state
= uslock_check
? USLOCK_INITIALIZED
: 0;
469 l
->debug
.lock_cpu
= l
->debug
.unlock_cpu
= 0;
470 l
->debug
.lock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
471 l
->debug
.lock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
472 l
->debug
.duration
[0] = l
->debug
.duration
[1] = 0;
473 l
->debug
.unlock_cpu
= l
->debug
.unlock_cpu
= 0;
474 l
->debug
.unlock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
475 l
->debug
.unlock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
480 * These checks apply to all usimple_locks, not just
481 * those with USLOCK_CHECKED turned on.
484 usld_lock_common_checks(
488 if (l
== USIMPLE_LOCK_NULL
)
489 panic("%s: null lock pointer", caller
);
490 if (l
->lock_type
!= USLOCK_TAG
)
491 panic("%s: 0x%p is not a usimple lock", caller
, l
);
492 if (!(l
->debug
.state
& USLOCK_INIT
))
493 panic("%s: %p is not an initialized lock",
495 return USLOCK_CHECKING(l
);
500 * Debug checks on a usimple_lock just before attempting
509 char caller
[] = "usimple_lock";
512 if (!usld_lock_common_checks(l
, caller
))
516 * Note that we have a weird case where we are getting a lock when we are]
517 * in the process of putting the system to sleep. We are running with no
518 * current threads, therefore we can't tell if we are trying to retake a lock
519 * we have or someone on the other processor has it. Therefore we just
520 * ignore this test if the locking thread is 0.
523 if ((l
->debug
.state
& USLOCK_TAKEN
) && l
->debug
.lock_thread
&&
524 l
->debug
.lock_thread
== (void *) current_thread()) {
525 printf("%s: lock %p already locked (at %p) by",
526 caller
, l
, l
->debug
.lock_pc
);
527 printf(" current thread %p (new attempt at pc %p)\n",
528 l
->debug
.lock_thread
, pc
);
531 mp_disable_preemption();
532 usl_trace(l
, cpu_number(), pc
, caller
);
533 mp_enable_preemption();
538 * Debug checks on a usimple_lock just after acquiring it.
540 * Pre-emption has been disabled at this point,
541 * so we are safe in using cpu_number.
549 char caller
[] = "successful usimple_lock";
552 if (!usld_lock_common_checks(l
, caller
))
555 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
556 panic("%s: lock %p became uninitialized",
558 if ((l
->debug
.state
& USLOCK_TAKEN
))
559 panic("%s: lock 0x%p became TAKEN by someone else",
562 mycpu
= cpu_number();
563 l
->debug
.lock_thread
= (void *)current_thread();
564 l
->debug
.state
|= USLOCK_TAKEN
;
565 l
->debug
.lock_pc
= pc
;
566 l
->debug
.lock_cpu
= mycpu
;
568 usl_trace(l
, mycpu
, pc
, caller
);
573 * Debug checks on a usimple_lock just before
574 * releasing it. Note that the caller has not
575 * yet released the hardware lock.
577 * Preemption is still disabled, so there's
578 * no problem using cpu_number.
586 char caller
[] = "usimple_unlock";
589 if (!usld_lock_common_checks(l
, caller
))
592 mycpu
= cpu_number();
594 if (!(l
->debug
.state
& USLOCK_TAKEN
))
595 panic("%s: lock 0x%p hasn't been taken",
597 if (l
->debug
.lock_thread
!= (void *) current_thread())
598 panic("%s: unlocking lock 0x%p, owned by thread %p",
599 caller
, l
, l
->debug
.lock_thread
);
600 if (l
->debug
.lock_cpu
!= mycpu
) {
601 printf("%s: unlocking lock 0x%p on cpu 0x%x",
603 printf(" (acquired on cpu 0x%x)\n", l
->debug
.lock_cpu
);
606 usl_trace(l
, mycpu
, pc
, caller
);
608 l
->debug
.unlock_thread
= l
->debug
.lock_thread
;
609 l
->debug
.lock_thread
= INVALID_PC
;
610 l
->debug
.state
&= ~USLOCK_TAKEN
;
611 l
->debug
.unlock_pc
= pc
;
612 l
->debug
.unlock_cpu
= mycpu
;
617 * Debug checks on a usimple_lock just before
618 * attempting to acquire it.
620 * Preemption isn't guaranteed to be disabled.
627 char caller
[] = "usimple_lock_try";
629 if (!usld_lock_common_checks(l
, caller
))
631 mp_disable_preemption();
632 usl_trace(l
, cpu_number(), pc
, caller
);
633 mp_enable_preemption();
638 * Debug checks on a usimple_lock just after
639 * successfully attempting to acquire it.
641 * Preemption has been disabled by the
642 * lock acquisition attempt, so it's safe
651 char caller
[] = "successful usimple_lock_try";
653 if (!usld_lock_common_checks(l
, caller
))
656 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
657 panic("%s: lock 0x%p became uninitialized",
659 if ((l
->debug
.state
& USLOCK_TAKEN
))
660 panic("%s: lock 0x%p became TAKEN by someone else",
663 mycpu
= cpu_number();
664 l
->debug
.lock_thread
= (void *) current_thread();
665 l
->debug
.state
|= USLOCK_TAKEN
;
666 l
->debug
.lock_pc
= pc
;
667 l
->debug
.lock_cpu
= mycpu
;
669 usl_trace(l
, mycpu
, pc
, caller
);
674 * For very special cases, set traced_lock to point to a
675 * specific lock of interest. The result is a series of
676 * XPRs showing lock operations on that lock. The lock_seq
677 * value is used to show the order of those operations.
679 usimple_lock_t traced_lock
;
680 unsigned int lock_seq
;
687 const char * op_name
)
689 if (traced_lock
== l
) {
691 "seq %d, cpu %d, %s @ %x\n",
692 (uintptr_t) lock_seq
, (uintptr_t) mycpu
,
693 (uintptr_t) op_name
, (uintptr_t) pc
, 0);
699 #endif /* USLOCK_DEBUG */
702 * Routine: lock_alloc
704 * Allocate a lock for external users who cannot
705 * hard-code the structure definition into their
707 * For now just use kalloc, but a zone is probably
718 if ((l
= (lock_t
*)kalloc(sizeof(lock_t
))) != 0)
719 lock_init(l
, can_sleep
, tag
, tag1
);
726 * Free a lock allocated for external users.
727 * For now just use kfree, but a zone is probably
734 kfree(l
, sizeof(lock_t
));
741 * Initialize a lock; required before use.
742 * Note that clients declare the "struct lock"
743 * variables and then initialize them, rather
744 * than getting a new one from this module.
750 __unused
unsigned short tag
,
751 __unused
unsigned short tag1
)
753 hw_lock_byte_init(&l
->lck_rw_interlock
);
754 l
->lck_rw_want_write
= FALSE
;
755 l
->lck_rw_want_upgrade
= FALSE
;
756 l
->lck_rw_shared_count
= 0;
757 l
->lck_rw_can_sleep
= can_sleep
;
759 l
->lck_rw_priv_excl
= 1;
760 l
->lck_r_waiting
= l
->lck_w_waiting
= 0;
765 * Sleep locks. These use the same data structure and algorithm
766 * as the spin locks, but the process sleeps while it is waiting
767 * for the lock. These work on uniprocessor systems.
770 #define DECREMENTER_TIMEOUT 1000000
776 lck_rw_lock_exclusive(l
);
783 (void) lck_rw_done(l
);
790 lck_rw_lock_shared(l
);
795 * Routine: lock_read_to_write
797 * Improves a read-only lock to one with
798 * write permission. If another reader has
799 * already requested an upgrade to a write lock,
800 * no lock is held upon return.
802 * Returns FALSE if the upgrade *failed*.
809 return lck_rw_lock_shared_to_exclusive(l
);
816 lck_rw_lock_exclusive_to_shared(l
);
822 * Routine: lck_rw_alloc_init
830 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0) {
831 bzero(lck
, sizeof(lck_rw_t
));
832 lck_rw_init(lck
, grp
, attr
);
839 * Routine: lck_rw_free
845 lck_rw_destroy(lck
, grp
);
846 kfree(lck
, sizeof(lck_rw_t
));
850 * Routine: lck_rw_init
858 lck_attr_t
*lck_attr
= (attr
!= LCK_ATTR_NULL
) ?
859 attr
: &LockDefaultLckAttr
;
861 hw_lock_byte_init(&lck
->lck_rw_interlock
);
862 lck
->lck_rw_want_write
= FALSE
;
863 lck
->lck_rw_want_upgrade
= FALSE
;
864 lck
->lck_rw_shared_count
= 0;
865 lck
->lck_rw_can_sleep
= TRUE
;
866 lck
->lck_r_waiting
= lck
->lck_w_waiting
= 0;
868 lck
->lck_rw_priv_excl
= ((lck_attr
->lck_attr_val
&
869 LCK_ATTR_RW_SHARED_PRIORITY
) == 0);
871 lck_grp_reference(grp
);
872 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
876 * Routine: lck_rw_destroy
883 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
)
885 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
886 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
887 lck_grp_deallocate(grp
);
892 * Sleep locks. These use the same data structure and algorithm
893 * as the spin locks, but the process sleeps while it is waiting
894 * for the lock. These work on uniprocessor systems.
897 #define DECREMENTER_TIMEOUT 1000000
899 #define RW_LOCK_READER_EVENT(x) \
900 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
902 #define RW_LOCK_WRITER_EVENT(x) \
903 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
906 * We need to disable interrupts while holding the mutex interlock
907 * to prevent an IPI intervening.
908 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
911 lck_interlock_lock(lck_rw_t
*lck
)
915 istate
= ml_set_interrupts_enabled(FALSE
);
916 hw_lock_byte_lock(&lck
->lck_rw_interlock
);
922 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
924 hw_lock_byte_unlock(&lck
->lck_rw_interlock
);
925 ml_set_interrupts_enabled(istate
);
929 * This inline is used when busy-waiting for an rw lock.
930 * If interrupts were disabled when the lock primitive was called,
931 * we poll the IPI handler for pending tlb flushes.
932 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
935 lck_rw_lock_pause(boolean_t interrupts_enabled
)
937 if (!interrupts_enabled
)
938 handle_pending_TLB_flushes();
944 * compute the deadline to spin against when
945 * waiting for a change of state on a lck_rw_t
947 static inline uint64_t
948 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
950 if (lck
->lck_rw_can_sleep
) {
951 if (lck
->lck_r_waiting
|| lck
->lck_w_waiting
|| lck
->lck_rw_shared_count
> machine_info
.max_cpus
) {
953 * there are already threads waiting on this lock... this
954 * implies that they have spun beyond their deadlines waiting for
955 * the desired state to show up so we will not bother spinning at this time...
957 * the current number of threads sharing this lock exceeds our capacity to run them
958 * concurrently and since all states we're going to spin for require the rw_shared_count
959 * to be at 0, we'll not bother spinning since the latency for this to happen is
962 return (mach_absolute_time());
964 return (mach_absolute_time() + MutexSpin
);
966 return (mach_absolute_time() + (100000LL * 1000000000LL));
971 * Routine: lck_rw_lock_exclusive
974 lck_rw_lock_exclusive_gen(
977 uint64_t deadline
= 0;
981 wait_result_t res
= 0;
982 boolean_t istate
= -1;
985 boolean_t dtrace_ls_initialized
= FALSE
;
986 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
987 uint64_t wait_interval
= 0;
988 int readers_at_sleep
= 0;
992 * Try to acquire the lck_rw_want_write bit.
994 while ( !lck_rw_grab_want(lck
)) {
997 if (dtrace_ls_initialized
== FALSE
) {
998 dtrace_ls_initialized
= TRUE
;
999 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1000 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1001 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1002 if (dtrace_ls_enabled
) {
1004 * Either sleeping or spinning is happening,
1005 * start a timing of our delay interval now.
1007 readers_at_sleep
= lck
->lck_rw_shared_count
;
1008 wait_interval
= mach_absolute_time();
1013 istate
= ml_get_interrupts_enabled();
1015 deadline
= lck_rw_deadline_for_spin(lck
);
1017 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1019 while (((gotlock
= lck_rw_grab_want(lck
)) == 0) && mach_absolute_time() < deadline
)
1020 lck_rw_lock_pause(istate
);
1022 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, (int)lck
, 0, 0, gotlock
, 0);
1027 * if we get here, the deadline has expired w/o us
1028 * being able to grab the lock exclusively
1029 * check to see if we're allowed to do a thread_block
1031 if (lck
->lck_rw_can_sleep
) {
1033 istate
= lck_interlock_lock(lck
);
1035 if (lck
->lck_rw_want_write
) {
1037 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1039 lck
->lck_w_waiting
= TRUE
;
1041 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1042 lck_interlock_unlock(lck
, istate
);
1044 if (res
== THREAD_WAITING
) {
1045 res
= thread_block(THREAD_CONTINUE_NULL
);
1048 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, (int)lck
, res
, slept
, 0, 0);
1050 lck
->lck_rw_want_write
= TRUE
;
1051 lck_interlock_unlock(lck
, istate
);
1057 * Wait for readers (and upgrades) to finish...
1058 * the test for these conditions must be done simultaneously with
1059 * a check of the interlock not being held since
1060 * the rw_shared_count will drop to 0 first and then want_upgrade
1061 * will be set to 1 in the shared_to_exclusive scenario... those
1062 * adjustments are done behind the interlock and represent an
1063 * atomic change in state and must be considered as such
1064 * however, once we see the read count at 0, the want_upgrade not set
1065 * and the interlock not held, we are safe to proceed
1067 while (lck_rw_held_read_or_upgrade(lck
)) {
1071 * Either sleeping or spinning is happening, start
1072 * a timing of our delay interval now. If we set it
1073 * to -1 we don't have accurate data so we cannot later
1074 * decide to record a dtrace spin or sleep event.
1076 if (dtrace_ls_initialized
== FALSE
) {
1077 dtrace_ls_initialized
= TRUE
;
1078 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1079 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1080 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1081 if (dtrace_ls_enabled
) {
1083 * Either sleeping or spinning is happening,
1084 * start a timing of our delay interval now.
1086 readers_at_sleep
= lck
->lck_rw_shared_count
;
1087 wait_interval
= mach_absolute_time();
1092 istate
= ml_get_interrupts_enabled();
1094 deadline
= lck_rw_deadline_for_spin(lck
);
1096 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1098 while ((lockheld
= lck_rw_held_read_or_upgrade(lck
)) && mach_absolute_time() < deadline
)
1099 lck_rw_lock_pause(istate
);
1101 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, (int)lck
, 0, 0, lockheld
, 0);
1106 * if we get here, the deadline has expired w/o us
1107 * being able to grab the lock exclusively
1108 * check to see if we're allowed to do a thread_block
1110 if (lck
->lck_rw_can_sleep
) {
1112 istate
= lck_interlock_lock(lck
);
1114 if (lck
->lck_rw_shared_count
!= 0 || lck
->lck_rw_want_upgrade
) {
1115 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, (int)lck
, 0, 0, 0, 0);
1117 lck
->lck_w_waiting
= TRUE
;
1119 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1120 lck_interlock_unlock(lck
, istate
);
1122 if (res
== THREAD_WAITING
) {
1123 res
= thread_block(THREAD_CONTINUE_NULL
);
1126 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, (int)lck
, res
, slept
, 0, 0);
1128 lck_interlock_unlock(lck
, istate
);
1130 * must own the lock now, since we checked for
1131 * readers or upgrade owner behind the interlock
1132 * no need for a call to 'lck_rw_held_read_or_upgrade'
1141 * Decide what latencies we suffered that are Dtrace events.
1142 * If we have set wait_interval, then we either spun or slept.
1143 * At least we get out from under the interlock before we record
1144 * which is the best we can do here to minimize the impact
1146 * If we have set wait_interval to -1, then dtrace was not enabled when we
1147 * started sleeping/spinning so we don't record this event.
1149 if (dtrace_ls_enabled
== TRUE
) {
1151 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN
, lck
,
1152 mach_absolute_time() - wait_interval
, 1);
1155 * For the blocking case, we also record if when we blocked
1156 * it was held for read or write, and how many readers.
1157 * Notice that above we recorded this before we dropped
1158 * the interlock so the count is accurate.
1160 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK
, lck
,
1161 mach_absolute_time() - wait_interval
, 1,
1162 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1165 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lck
, 1);
1171 * Routine: lck_rw_done_gen
1173 * called from the assembly language wrapper...
1174 * prior_lock_state is the value in the 1st
1175 * word of the lock at the time of a successful
1176 * atomic compare and exchange with the new value...
1177 * it represents the state of the lock before we
1178 * decremented the rw_shared_count or cleared either
1179 * rw_want_upgrade or rw_want_write and
1180 * the lck_x_waiting bits... since the wrapper
1181 * routine has already changed the state atomically,
1182 * we just need to decide if we should
1183 * wake up anyone and what value to return... we do
1184 * this by examining the state of the lock before
1190 int prior_lock_state
)
1193 lck_rw_type_t lock_type
;
1196 * prior_lock state is a snapshot of the 1st word of the
1197 * lock in question... we'll fake up a pointer to it
1198 * and carefully not access anything beyond whats defined
1199 * in the first word of a lck_rw_t
1201 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1203 if (fake_lck
->lck_rw_shared_count
<= 1) {
1204 if (fake_lck
->lck_w_waiting
)
1205 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1207 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1208 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1210 if (fake_lck
->lck_rw_shared_count
)
1211 lock_type
= LCK_RW_TYPE_SHARED
;
1213 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1216 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1224 * Routine: lck_rw_unlock
1229 lck_rw_type_t lck_rw_type
)
1231 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1232 lck_rw_unlock_shared(lck
);
1233 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1234 lck_rw_unlock_exclusive(lck
);
1236 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type
);
1241 * Routine: lck_rw_unlock_shared
1244 lck_rw_unlock_shared(
1249 ret
= lck_rw_done(lck
);
1251 if (ret
!= LCK_RW_TYPE_SHARED
)
1252 panic("lck_rw_unlock(): lock held in mode: %d\n", ret
);
1257 * Routine: lck_rw_unlock_exclusive
1260 lck_rw_unlock_exclusive(
1265 ret
= lck_rw_done(lck
);
1267 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
)
1268 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret
);
1273 * Routine: lck_rw_lock
1278 lck_rw_type_t lck_rw_type
)
1280 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1281 lck_rw_lock_shared(lck
);
1282 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1283 lck_rw_lock_exclusive(lck
);
1285 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type
);
1290 * Routine: lck_rw_lock_shared_gen
1292 * assembly fast path code has determined that this lock
1293 * is held exclusively... this is where we spin/block
1294 * until we can acquire the lock in the shared mode
1297 lck_rw_lock_shared_gen(
1300 uint64_t deadline
= 0;
1303 wait_result_t res
= 0;
1304 boolean_t istate
= -1;
1307 uint64_t wait_interval
= 0;
1308 int readers_at_sleep
= 0;
1309 boolean_t dtrace_ls_initialized
= FALSE
;
1310 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1313 while ( !lck_rw_grab_shared(lck
)) {
1316 if (dtrace_ls_initialized
== FALSE
) {
1317 dtrace_ls_initialized
= TRUE
;
1318 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1319 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1320 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1321 if (dtrace_ls_enabled
) {
1323 * Either sleeping or spinning is happening,
1324 * start a timing of our delay interval now.
1326 readers_at_sleep
= lck
->lck_rw_shared_count
;
1327 wait_interval
= mach_absolute_time();
1332 istate
= ml_get_interrupts_enabled();
1334 deadline
= lck_rw_deadline_for_spin(lck
);
1336 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1337 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1339 while (((gotlock
= lck_rw_grab_shared(lck
)) == 0) && mach_absolute_time() < deadline
)
1340 lck_rw_lock_pause(istate
);
1342 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1343 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1348 * if we get here, the deadline has expired w/o us
1349 * being able to grab the lock for read
1350 * check to see if we're allowed to do a thread_block
1352 if (lck
->lck_rw_can_sleep
) {
1354 istate
= lck_interlock_lock(lck
);
1356 if ((lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
) &&
1357 ((lck
->lck_rw_shared_count
== 0) || lck
->lck_rw_priv_excl
)) {
1359 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1360 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1362 lck
->lck_r_waiting
= TRUE
;
1364 res
= assert_wait(RW_LOCK_READER_EVENT(lck
), THREAD_UNINT
);
1365 lck_interlock_unlock(lck
, istate
);
1367 if (res
== THREAD_WAITING
) {
1368 res
= thread_block(THREAD_CONTINUE_NULL
);
1371 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1372 (int)lck
, res
, slept
, 0, 0);
1374 lck
->lck_rw_shared_count
++;
1375 lck_interlock_unlock(lck
, istate
);
1382 if (dtrace_ls_enabled
== TRUE
) {
1384 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1386 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1387 mach_absolute_time() - wait_interval
, 0,
1388 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1391 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1397 * Routine: lck_rw_lock_shared_to_exclusive_failure
1399 * assembly fast path code has already dropped our read
1400 * count and determined that someone else owns 'lck_rw_want_upgrade'
1401 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1402 * all we need to do here is determine if a wakeup is needed
1405 lck_rw_lock_shared_to_exclusive_failure(
1407 int prior_lock_state
)
1412 * prior_lock state is a snapshot of the 1st word of the
1413 * lock in question... we'll fake up a pointer to it
1414 * and carefully not access anything beyond whats defined
1415 * in the first word of a lck_rw_t
1417 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1419 if (fake_lck
->lck_w_waiting
&& fake_lck
->lck_rw_shared_count
== 1) {
1421 * Someone else has requested upgrade.
1422 * Since we've released the read lock, wake
1423 * him up if he's blocked waiting
1425 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1427 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1428 (int)lck
, lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1435 * Routine: lck_rw_lock_shared_to_exclusive_failure
1437 * assembly fast path code has already dropped our read
1438 * count and successfully acquired 'lck_rw_want_upgrade'
1439 * we just need to wait for the rest of the readers to drain
1440 * and then we can return as the exclusive holder of this lock
1443 lck_rw_lock_shared_to_exclusive_success(
1446 uint64_t deadline
= 0;
1448 int still_shared
= 0;
1450 boolean_t istate
= -1;
1453 uint64_t wait_interval
= 0;
1454 int readers_at_sleep
= 0;
1455 boolean_t dtrace_ls_initialized
= FALSE
;
1456 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1459 while (lck
->lck_rw_shared_count
!= 0) {
1462 if (dtrace_ls_initialized
== FALSE
) {
1463 dtrace_ls_initialized
= TRUE
;
1464 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1465 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1466 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1467 if (dtrace_ls_enabled
) {
1469 * Either sleeping or spinning is happening,
1470 * start a timing of our delay interval now.
1472 readers_at_sleep
= lck
->lck_rw_shared_count
;
1473 wait_interval
= mach_absolute_time();
1478 istate
= ml_get_interrupts_enabled();
1480 deadline
= lck_rw_deadline_for_spin(lck
);
1482 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1483 (int)lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1485 while ((still_shared
= lck
->lck_rw_shared_count
) && mach_absolute_time() < deadline
)
1486 lck_rw_lock_pause(istate
);
1488 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1489 (int)lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1494 * if we get here, the deadline has expired w/o
1495 * the rw_shared_count having drained to 0
1496 * check to see if we're allowed to do a thread_block
1498 if (lck
->lck_rw_can_sleep
) {
1500 istate
= lck_interlock_lock(lck
);
1502 if (lck
->lck_rw_shared_count
!= 0) {
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1504 (int)lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1506 lck
->lck_w_waiting
= TRUE
;
1508 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1509 lck_interlock_unlock(lck
, istate
);
1511 if (res
== THREAD_WAITING
) {
1512 res
= thread_block(THREAD_CONTINUE_NULL
);
1515 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1516 (int)lck
, res
, slept
, 0, 0);
1518 lck_interlock_unlock(lck
, istate
);
1525 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1527 if (dtrace_ls_enabled
== TRUE
) {
1529 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1531 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lck
,
1532 mach_absolute_time() - wait_interval
, 1,
1533 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1536 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lck
, 1);
1543 * Routine: lck_rw_lock_exclusive_to_shared
1545 * assembly fast path has already dropped
1546 * our exclusive state and bumped lck_rw_shared_count
1547 * all we need to do here is determine if anyone
1548 * needs to be awakened.
1551 lck_rw_lock_exclusive_to_shared_gen(
1553 int prior_lock_state
)
1558 * prior_lock state is a snapshot of the 1st word of the
1559 * lock in question... we'll fake up a pointer to it
1560 * and carefully not access anything beyond whats defined
1561 * in the first word of a lck_rw_t
1563 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1565 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1566 (int)lck
, fake_lck
->lck_rw_want_write
, fake_lck
->lck_rw_want_upgrade
, 0, 0);
1569 * don't wake up anyone waiting to take the lock exclusively
1570 * since we hold a read count... when the read count drops to 0,
1571 * the writers will be woken.
1573 * wake up any waiting readers if we don't have any writers waiting,
1574 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1576 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1577 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1579 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1580 (int)lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1583 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1589 * Routine: lck_rw_try_lock
1594 lck_rw_type_t lck_rw_type
)
1596 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1597 return(lck_rw_try_lock_shared(lck
));
1598 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1599 return(lck_rw_try_lock_exclusive(lck
));
1601 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type
);
1612 case LCK_RW_ASSERT_SHARED
:
1613 if (lck
->lck_rw_shared_count
!= 0) {
1617 case LCK_RW_ASSERT_EXCLUSIVE
:
1618 if ((lck
->lck_rw_want_write
||
1619 lck
->lck_rw_want_upgrade
) &&
1620 lck
->lck_rw_shared_count
== 0) {
1624 case LCK_RW_ASSERT_HELD
:
1625 if (lck
->lck_rw_want_write
||
1626 lck
->lck_rw_want_upgrade
||
1627 lck
->lck_rw_shared_count
!= 0) {
1635 panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck
, type
, *(uint32_t *)lck
);
1639 * Routine: lck_mtx_alloc_init
1648 if ((lck
= (lck_mtx_t
*)kalloc(sizeof(lck_mtx_t
))) != 0)
1649 lck_mtx_init(lck
, grp
, attr
);
1655 * Routine: lck_mtx_free
1662 lck_mtx_destroy(lck
, grp
);
1663 kfree(lck
, sizeof(lck_mtx_t
));
1667 * Routine: lck_mtx_ext_init
1675 bzero((void *)lck
, sizeof(lck_mtx_ext_t
));
1677 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
1678 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
1679 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
1682 lck
->lck_mtx_grp
= grp
;
1684 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
)
1685 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
1687 lck
->lck_mtx
.lck_mtx_ptr
= (void *)LCK_MTX_PTR_EXTENDED
;
1691 * Routine: lck_mtx_init
1699 lck_mtx_ext_t
*lck_ext
;
1700 lck_attr_t
*lck_attr
;
1702 if (attr
!= LCK_ATTR_NULL
)
1705 lck_attr
= &LockDefaultLckAttr
;
1707 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
1708 if ((lck_ext
= (lck_mtx_ext_t
*)kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
1709 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
1710 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
1711 lck
->lck_mtx_ptr
= lck_ext
;
1712 lck
->lck_mtx_ilocked
= 1;
1715 lck
->lck_mtx_owner
= 0;
1716 lck
->lck_mtx_ptr
= 0;
1717 lck
->lck_mtx_waiters
= 0;
1718 lck
->lck_mtx_pri
= 0;
1719 lck
->lck_mtx_ilocked
= 0;
1720 lck
->lck_mtx_mlocked
= 0;
1721 lck
->lck_mtx_promoted
= 0;
1722 lck
->lck_mtx_spin
= 0;
1724 lck_grp_reference(grp
);
1725 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
1729 * Routine: lck_mtx_init_ext
1734 lck_mtx_ext_t
*lck_ext
,
1738 lck_attr_t
*lck_attr
;
1740 if (attr
!= LCK_ATTR_NULL
)
1743 lck_attr
= &LockDefaultLckAttr
;
1745 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
1746 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
1747 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
1748 lck
->lck_mtx_ptr
= lck_ext
;
1749 lck
->lck_mtx_ilocked
= 1;
1751 lck
->lck_mtx_owner
= 0;
1752 lck
->lck_mtx_ptr
= 0;
1753 lck
->lck_mtx_waiters
= 0;
1754 lck
->lck_mtx_pri
= 0;
1755 lck
->lck_mtx_ilocked
= 0;
1756 lck
->lck_mtx_mlocked
= 0;
1757 lck
->lck_mtx_promoted
= 0;
1758 lck
->lck_mtx_spin
= 0;
1760 lck_grp_reference(grp
);
1761 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
1765 * Routine: lck_mtx_destroy
1772 boolean_t lck_is_indirect
;
1774 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
)
1776 lck_is_indirect
= (lck
->lck_mtx_tag
== LCK_MTX_TAG_INDIRECT
);
1778 lck_mtx_lock_mark_destroyed(lck
);
1780 if (lck_is_indirect
)
1781 kfree(lck
->lck_mtx_ptr
, sizeof(lck_mtx_ext_t
));
1782 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
1783 lck_grp_deallocate(grp
);
1788 #define LCK_MTX_LCK_WAIT_CODE 0x20
1789 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
1790 #define LCK_MTX_LCK_SPIN_CODE 0x22
1791 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1792 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
1796 * Routine: lck_mtx_unlock_wakeup_x86
1798 * Invoked on unlock when there is contention.
1802 lck_mtx_unlock_wakeup_x86 (
1804 int owner_was_promoted
)
1807 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_START
, (int)mutex
, owner_was_promoted
, mutex
->lck_mtx_waiters
, 0, 0);
1809 if (lck_mtx_lock_decr_waiter(mutex
))
1810 thread_wakeup_one((event_t
)(((unsigned int*)mutex
)+(sizeof(lck_mtx_t
)-1)/sizeof(unsigned int)));
1812 if (owner_was_promoted
) {
1813 thread_t thread
= current_thread();
1816 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_DEMOTE_CODE
) | DBG_FUNC_NONE
, (uintptr_t)thread_tid(thread
), thread
->promotions
,
1817 thread
->sched_mode
& TH_MODE_PROMOTED
, 0, 0);
1819 if (thread
->promotions
> 0) {
1820 spl_t s
= splsched();
1822 thread_lock(thread
);
1824 if (--thread
->promotions
== 0 && (thread
->sched_mode
& TH_MODE_PROMOTED
)) {
1826 thread
->sched_mode
&= ~TH_MODE_PROMOTED
;
1828 if (thread
->sched_mode
& TH_MODE_ISDEPRESSED
) {
1829 KERNEL_DEBUG_CONSTANT(
1830 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_DEMOTE
) | DBG_FUNC_NONE
,
1831 thread
->sched_pri
, DEPRESSPRI
, 0, mutex
, 0);
1833 set_sched_pri(thread
, DEPRESSPRI
);
1836 if (thread
->priority
< thread
->sched_pri
) {
1837 KERNEL_DEBUG_CONSTANT(
1838 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_DEMOTE
) | DBG_FUNC_NONE
,
1839 thread
->sched_pri
, thread
->priority
, 0, mutex
, 0);
1841 compute_priority(thread
, FALSE
);
1845 thread_unlock(thread
);
1849 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_END
, (int)mutex
, 0, mutex
->lck_mtx_waiters
, 0, 0);
1854 * Routine: lck_mtx_lock_acquire_x86
1856 * Invoked on acquiring the mutex when there is
1858 * mutex is owned... interlock is not held
1861 lck_mtx_lock_acquire_x86(
1864 thread_t thread
= current_thread();
1867 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_START
, (int)mutex
, 0, mutex
->lck_mtx_waiters
, 0, 0);
1869 priority
= lck_mtx_lock_get_pri(mutex
);
1871 if (thread
->sched_pri
< priority
) {
1873 if (lck_mtx_lock_mark_promoted(mutex
)) {
1874 spl_t s
= splsched();
1876 thread_lock(thread
);
1878 if (thread
->sched_pri
< priority
) {
1880 KERNEL_DEBUG_CONSTANT(
1881 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_PROMOTE
) | DBG_FUNC_NONE
,
1882 thread
->sched_pri
, priority
, 0, mutex
, 0);
1884 set_sched_pri(thread
, priority
);
1886 thread
->promotions
++;
1887 thread
->sched_mode
|= TH_MODE_PROMOTED
;
1889 thread_unlock(thread
);
1893 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_END
, (int)mutex
, 0, mutex
->lck_mtx_waiters
, 0, 0);
1899 * Routine: lck_mtx_lock_spinwait_x86
1901 * Invoked trying to acquire a mutex when there is contention but
1902 * the holder is running on another processor. We spin for up to a maximum
1903 * time waiting for the lock to be released.
1905 * Called with the interlock unlocked.
1908 lck_mtx_lock_spinwait_x86(
1917 MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
1918 (int)mutex
, (int)mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, 0, 0);
1920 deadline
= mach_absolute_time() + MutexSpin
;
1924 * - mutex is locked, and
1925 * - its locked as a spin lock, and
1926 * - owner is running on another processor, and
1927 * - owner (processor) is not idling, and
1928 * - we haven't spun for long enough.
1931 if (lck_mtx_lock_grab_mutex(mutex
)) {
1935 if ((holder
= (thread_t
) mutex
->lck_mtx_owner
) != NULL
) {
1937 if ( !(holder
->machine
.specFlags
& OnProc
) ||
1938 (holder
->state
& TH_IDLE
)) {
1948 } while (mach_absolute_time() < deadline
);
1953 * We've already kept a count via deadline of how long we spun.
1954 * If dtrace is active, then we compute backwards to decide how
1957 * Note that we record a different probe id depending on whether
1958 * this is a direct or indirect mutex. This allows us to
1959 * penalize only lock groups that have debug/stats enabled
1960 * with dtrace processing if desired.
1962 if (mutex
->lck_mtx_ptr
!= (void *)LCK_MTX_PTR_EXTENDED
) {
1963 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, mutex
,
1964 mach_absolute_time() - (deadline
- MutexSpin
));
1966 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, mutex
,
1967 mach_absolute_time() - (deadline
- MutexSpin
));
1969 /* The lockstat acquire event is recorded by the assembly code beneath us. */
1973 MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
1974 (int)mutex
, (int)mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, retval
, 0);
1982 * Routine: lck_mtx_lock_wait_x86
1984 * Invoked in order to wait on contention.
1986 * Called with the interlock locked and
1987 * returns it unlocked.
1990 lck_mtx_lock_wait_x86 (
1993 thread_t self
= current_thread();
1996 integer_t old_lck_mtx_pri
;
1999 uint64_t sleep_start
= 0;
2001 if (lockstat_probemap
[LS_LCK_MTX_LOCK_BLOCK
] || lockstat_probemap
[LS_LCK_MTX_EXT_LOCK_BLOCK
]) {
2002 sleep_start
= mach_absolute_time();
2005 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_START
, (int)mutex
, (int)mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, 0, 0);
2007 priority
= self
->sched_pri
;
2009 if (priority
< self
->priority
)
2010 priority
= self
->priority
;
2011 if (priority
< BASEPRI_DEFAULT
)
2012 priority
= BASEPRI_DEFAULT
;
2014 if (mutex
->lck_mtx_waiters
== 0)
2015 old_lck_mtx_pri
= 0;
2017 old_lck_mtx_pri
= mutex
->lck_mtx_pri
;
2019 if (old_lck_mtx_pri
< priority
)
2020 mutex
->lck_mtx_pri
= priority
;
2022 if ( (holder
= (thread_t
)mutex
->lck_mtx_owner
) ) {
2025 thread_lock(holder
);
2027 if (holder
->sched_pri
< priority
) {
2028 KERNEL_DEBUG_CONSTANT(
2029 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_PROMOTE
) | DBG_FUNC_NONE
,
2030 holder
->sched_pri
, priority
, holder
, mutex
, 0);
2032 set_sched_pri(holder
, priority
);
2034 if (mutex
->lck_mtx_promoted
== 0) {
2035 holder
->promotions
++;
2036 holder
->sched_mode
|= TH_MODE_PROMOTED
;
2038 mutex
->lck_mtx_promoted
= 1;
2041 thread_unlock(holder
);
2044 mutex
->lck_mtx_waiters
++;
2046 assert_wait((event_t
)(((unsigned int*)mutex
)+((sizeof(lck_mtx_t
)-1)/sizeof(unsigned int))), THREAD_UNINT
);
2048 lck_mtx_ilk_unlock(mutex
);
2050 thread_block(THREAD_CONTINUE_NULL
);
2052 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_END
, (int)mutex
, (int)mutex
->lck_mtx_owner
, mutex
->lck_mtx_waiters
, 0, 0);
2056 * Record the Dtrace lockstat probe for blocking, block time
2057 * measured from when we were entered.
2060 if (mutex
->lck_mtx_ptr
!= (void *)LCK_MTX_PTR_EXTENDED
) {
2061 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK
, mutex
,
2062 mach_absolute_time() - sleep_start
);
2064 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK
, mutex
,
2065 mach_absolute_time() - sleep_start
);
2078 db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
2079 lock
->lck_rw_shared_count
,
2080 lock
->lck_rw_want_upgrade
? "" : "!",
2081 lock
->lck_rw_want_write
? "" : "!");
2082 db_printf("%swaiting, %scan_sleep\n",
2083 (lock
->lck_r_waiting
|| lock
->lck_w_waiting
) ? "" : "!",
2084 lock
->lck_rw_can_sleep
? "" : "!");
2085 db_printf("Interlock:\n");
2086 db_show_one_simple_lock((db_expr_t
) ((vm_offset_t
)simple_lock_addr(lock
->lck_rw_interlock
)),
2087 TRUE
, (db_expr_t
)0, (char *)0);
2091 * Routines to print out simple_locks and mutexes in a nicely-formatted
2095 const char *simple_lock_labels
= "ENTRY ILK THREAD DURATION CALLER";
2098 db_show_one_simple_lock (
2100 boolean_t have_addr
,
2101 __unused db_expr_t count
,
2102 __unused
char * modif
)
2104 simple_lock_t saddr
= (simple_lock_t
) ((vm_offset_t
) addr
);
2106 if (saddr
== (simple_lock_t
)0 || !have_addr
) {
2107 db_error ("No simple_lock\n");
2110 else if (saddr
->lock_type
!= USLOCK_TAG
)
2111 db_error ("Not a simple_lock\n");
2112 #endif /* USLOCK_DEBUG */
2114 db_printf ("%s\n", simple_lock_labels
);
2115 db_print_simple_lock (saddr
);
2119 db_print_simple_lock (
2123 db_printf ("%08x %3d", addr
, *hw_lock_addr(addr
->interlock
));
2125 db_printf (" %08x", addr
->debug
.lock_thread
);
2126 db_printf (" %08x ", addr
->debug
.duration
[1]);
2127 db_printsym ((int)addr
->debug
.lock_pc
, DB_STGY_ANY
);
2128 #endif /* USLOCK_DEBUG */
2132 #endif /* MACH_KDB */