]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/locks_i386.c
xnu-3247.1.106.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
CommitLineData
91447636 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
91447636
A
64#include <mach_ldebug.h>
65
91447636
A
66#include <kern/locks.h>
67#include <kern/kalloc.h>
68#include <kern/misc_protos.h>
69#include <kern/thread.h>
70#include <kern/processor.h>
71#include <kern/cpu_data.h>
72#include <kern/cpu_number.h>
73#include <kern/sched_prim.h>
74#include <kern/xpr.h>
75#include <kern/debug.h>
76#include <string.h>
77
060df5ea 78#include <i386/machine_routines.h> /* machine_timeout_suspended() */
b0d623f7 79#include <machine/machine_cpu.h>
060df5ea 80#include <i386/mp.h>
91447636
A
81
82#include <sys/kdebug.h>
6d2010ae 83#include <mach/branch_predicates.h>
91447636 84
2d21ac55
A
85/*
86 * We need only enough declarations from the BSD-side to be able to
87 * test if our probe is active, and to call __dtrace_probe(). Setting
88 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
89 */
90#if CONFIG_DTRACE
91#define NEED_DTRACE_DEFS
92#include <../bsd/sys/lockstat.h>
93#endif
94
91447636
A
95#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
96#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
97#define LCK_RW_LCK_SHARED_CODE 0x102
98#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
99#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
100#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
101
b0d623f7
A
102#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
103#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
104#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
105#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
106#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
107#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
108#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
109#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
110
91447636
A
111
112#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
113
114unsigned int LcksOpts=0;
91447636
A
115
116/* Forwards */
117
91447636
A
118#if USLOCK_DEBUG
119/*
120 * Perform simple lock checks.
121 */
122int uslock_check = 1;
123int max_lock_loops = 100000000;
124decl_simple_lock_data(extern , printf_lock)
125decl_simple_lock_data(extern , panic_lock)
91447636
A
126#endif /* USLOCK_DEBUG */
127
fe8ab488 128extern unsigned int not_in_kdp;
91447636
A
129
130/*
131 * We often want to know the addresses of the callers
132 * of the various lock routines. However, this information
133 * is only used for debugging and statistics.
134 */
135typedef void *pc_t;
136#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
137#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
138#if ANY_LOCK_DEBUG
b0d623f7 139#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
91447636
A
140#define DECL_PC(pc) pc_t pc;
141#else /* ANY_LOCK_DEBUG */
142#define DECL_PC(pc)
143#ifdef lint
144/*
145 * Eliminate lint complaints about unused local pc variables.
146 */
b0d623f7 147#define OBTAIN_PC(pc) ++pc
91447636 148#else /* lint */
b0d623f7 149#define OBTAIN_PC(pc)
91447636
A
150#endif /* lint */
151#endif /* USLOCK_DEBUG */
152
153
154/*
155 * Portable lock package implementation of usimple_locks.
156 */
157
158#if USLOCK_DEBUG
159#define USLDBG(stmt) stmt
160void usld_lock_init(usimple_lock_t, unsigned short);
161void usld_lock_pre(usimple_lock_t, pc_t);
162void usld_lock_post(usimple_lock_t, pc_t);
163void usld_unlock(usimple_lock_t, pc_t);
164void usld_lock_try_pre(usimple_lock_t, pc_t);
165void usld_lock_try_post(usimple_lock_t, pc_t);
166int usld_lock_common_checks(usimple_lock_t, char *);
167#else /* USLOCK_DEBUG */
168#define USLDBG(stmt)
169#endif /* USLOCK_DEBUG */
170
b0d623f7
A
171
172extern int lck_rw_grab_want(lck_rw_t *lck);
173extern int lck_rw_grab_shared(lck_rw_t *lck);
174extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
175
176
2d21ac55
A
177/*
178 * Forward definitions
179 */
180
181void lck_rw_lock_shared_gen(
182 lck_rw_t *lck);
183
b0d623f7
A
184void lck_rw_lock_exclusive_gen(
185 lck_rw_t *lck);
186
187boolean_t lck_rw_lock_shared_to_exclusive_success(
2d21ac55
A
188 lck_rw_t *lck);
189
b0d623f7
A
190boolean_t lck_rw_lock_shared_to_exclusive_failure(
191 lck_rw_t *lck,
192 int prior_lock_state);
193
194void lck_rw_lock_exclusive_to_shared_gen(
195 lck_rw_t *lck,
196 int prior_lock_state);
197
198lck_rw_type_t lck_rw_done_gen(
199 lck_rw_t *lck,
200 int prior_lock_state);
201
39236c6e
A
202void lck_rw_clear_promotions_x86(thread_t thread);
203
91447636
A
204/*
205 * Routine: lck_spin_alloc_init
206 */
207lck_spin_t *
208lck_spin_alloc_init(
209 lck_grp_t *grp,
210 lck_attr_t *attr)
211{
212 lck_spin_t *lck;
213
214 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
215 lck_spin_init(lck, grp, attr);
216
217 return(lck);
218}
219
220/*
221 * Routine: lck_spin_free
222 */
223void
224lck_spin_free(
225 lck_spin_t *lck,
226 lck_grp_t *grp)
227{
228 lck_spin_destroy(lck, grp);
229 kfree(lck, sizeof(lck_spin_t));
230}
231
232/*
233 * Routine: lck_spin_init
234 */
235void
236lck_spin_init(
237 lck_spin_t *lck,
238 lck_grp_t *grp,
239 __unused lck_attr_t *attr)
240{
241 usimple_lock_init((usimple_lock_t) lck, 0);
242 lck_grp_reference(grp);
243 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
244}
245
246/*
247 * Routine: lck_spin_destroy
248 */
249void
250lck_spin_destroy(
251 lck_spin_t *lck,
252 lck_grp_t *grp)
253{
b0d623f7 254 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
91447636 255 return;
b0d623f7 256 lck->interlock = LCK_SPIN_TAG_DESTROYED;
91447636
A
257 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
258 lck_grp_deallocate(grp);
259 return;
260}
261
262/*
263 * Routine: lck_spin_lock
264 */
265void
266lck_spin_lock(
267 lck_spin_t *lck)
268{
269 usimple_lock((usimple_lock_t) lck);
270}
271
272/*
273 * Routine: lck_spin_unlock
274 */
275void
276lck_spin_unlock(
277 lck_spin_t *lck)
278{
279 usimple_unlock((usimple_lock_t) lck);
280}
281
282
283/*
284 * Routine: lck_spin_try_lock
285 */
286boolean_t
287lck_spin_try_lock(
288 lck_spin_t *lck)
289{
2d21ac55 290 return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
91447636
A
291}
292
fe8ab488 293/*
3e170ce0 294 * Routine: kdp_lck_spin_is_acquired
fe8ab488
A
295 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
296 * Returns: TRUE if lock is acquired.
297 */
298boolean_t
3e170ce0 299kdp_lck_spin_is_acquired(lck_spin_t *lck) {
fe8ab488
A
300 if (not_in_kdp) {
301 panic("panic: spinlock acquired check done outside of kernel debugger");
302 }
303 return (lck->interlock != 0)? TRUE : FALSE;
304}
305
91447636
A
306/*
307 * Initialize a usimple_lock.
308 *
309 * No change in preemption state.
310 */
311void
312usimple_lock_init(
313 usimple_lock_t l,
314 __unused unsigned short tag)
315{
316#ifndef MACHINE_SIMPLE_LOCK
317 USLDBG(usld_lock_init(l, tag));
318 hw_lock_init(&l->interlock);
319#else
320 simple_lock_init((simple_lock_t)l,tag);
321#endif
322}
323
060df5ea
A
324volatile uint32_t spinlock_owner_cpu = ~0;
325volatile usimple_lock_t spinlock_timed_out;
326
fe8ab488 327uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
060df5ea
A
328 uint64_t deadline;
329 uint32_t i;
330
331 for (i = 0; i < real_ncpus; i++) {
332 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
333 spinlock_owner_cpu = i;
6d2010ae 334 if ((uint32_t) cpu_number() == i)
060df5ea
A
335 break;
336 cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
337 cpu_NMI_interrupt(i);
338 deadline = mach_absolute_time() + (LockTimeOut * 2);
339 while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
340 cpu_pause();
341 break;
342 }
343 }
344
345 return spinlock_owner_cpu;
346}
91447636
A
347
348/*
349 * Acquire a usimple_lock.
350 *
351 * Returns with preemption disabled. Note
352 * that the hw_lock routines are responsible for
353 * maintaining preemption state.
354 */
355void
356usimple_lock(
357 usimple_lock_t l)
358{
359#ifndef MACHINE_SIMPLE_LOCK
2d21ac55 360 DECL_PC(pc);
91447636 361
b0d623f7 362 OBTAIN_PC(pc);
91447636 363 USLDBG(usld_lock_pre(l, pc));
6d2010ae
A
364
365 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) {
b0d623f7 366 boolean_t uslock_acquired = FALSE;
060df5ea
A
367 while (machine_timeout_suspended()) {
368 enable_preemption();
369 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
370 break;
6d2010ae
A
371 }
372
060df5ea
A
373 if (uslock_acquired == FALSE) {
374 uint32_t lock_cpu;
7ddcb079 375 uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
060df5ea 376 spinlock_timed_out = l;
7ddcb079
A
377 lock_cpu = spinlock_timeout_NMI(lowner);
378 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
060df5ea 379 }
b0d623f7 380 }
91447636
A
381 USLDBG(usld_lock_post(l, pc));
382#else
383 simple_lock((simple_lock_t)l);
384#endif
385}
386
387
388/*
389 * Release a usimple_lock.
390 *
391 * Returns with preemption enabled. Note
392 * that the hw_lock routines are responsible for
393 * maintaining preemption state.
394 */
395void
396usimple_unlock(
397 usimple_lock_t l)
398{
399#ifndef MACHINE_SIMPLE_LOCK
400 DECL_PC(pc);
401
b0d623f7 402 OBTAIN_PC(pc);
91447636
A
403 USLDBG(usld_unlock(l, pc));
404 hw_lock_unlock(&l->interlock);
405#else
406 simple_unlock_rwmb((simple_lock_t)l);
407#endif
408}
409
410
411/*
412 * Conditionally acquire a usimple_lock.
413 *
414 * On success, returns with preemption disabled.
415 * On failure, returns with preemption in the same state
416 * as when first invoked. Note that the hw_lock routines
417 * are responsible for maintaining preemption state.
418 *
419 * XXX No stats are gathered on a miss; I preserved this
420 * behavior from the original assembly-language code, but
421 * doesn't it make sense to log misses? XXX
422 */
423unsigned int
424usimple_lock_try(
425 usimple_lock_t l)
426{
427#ifndef MACHINE_SIMPLE_LOCK
91447636 428 unsigned int success;
2d21ac55 429 DECL_PC(pc);
91447636 430
b0d623f7 431 OBTAIN_PC(pc);
91447636
A
432 USLDBG(usld_lock_try_pre(l, pc));
433 if ((success = hw_lock_try(&l->interlock))) {
434 USLDBG(usld_lock_try_post(l, pc));
435 }
436 return success;
437#else
438 return(simple_lock_try((simple_lock_t)l));
439#endif
440}
441
442#if USLOCK_DEBUG
443/*
444 * States of a usimple_lock. The default when initializing
445 * a usimple_lock is setting it up for debug checking.
446 */
447#define USLOCK_CHECKED 0x0001 /* lock is being checked */
448#define USLOCK_TAKEN 0x0002 /* lock has been taken */
449#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
450#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
451#define USLOCK_CHECKING(l) (uslock_check && \
452 ((l)->debug.state & USLOCK_CHECKED))
453
454/*
455 * Trace activities of a particularly interesting lock.
456 */
457void usl_trace(usimple_lock_t, int, pc_t, const char *);
458
459
460/*
461 * Initialize the debugging information contained
462 * in a usimple_lock.
463 */
464void
465usld_lock_init(
466 usimple_lock_t l,
467 __unused unsigned short tag)
468{
469 if (l == USIMPLE_LOCK_NULL)
470 panic("lock initialization: null lock pointer");
471 l->lock_type = USLOCK_TAG;
472 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
473 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
474 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
475 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
476 l->debug.duration[0] = l->debug.duration[1] = 0;
477 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
478 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
479 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
480}
481
482
483/*
484 * These checks apply to all usimple_locks, not just
485 * those with USLOCK_CHECKED turned on.
486 */
487int
488usld_lock_common_checks(
489 usimple_lock_t l,
490 char *caller)
491{
492 if (l == USIMPLE_LOCK_NULL)
493 panic("%s: null lock pointer", caller);
494 if (l->lock_type != USLOCK_TAG)
ebb1b9f4 495 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
91447636 496 if (!(l->debug.state & USLOCK_INIT))
ebb1b9f4 497 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
91447636
A
498 return USLOCK_CHECKING(l);
499}
500
501
502/*
503 * Debug checks on a usimple_lock just before attempting
504 * to acquire it.
505 */
506/* ARGSUSED */
507void
508usld_lock_pre(
509 usimple_lock_t l,
510 pc_t pc)
511{
512 char caller[] = "usimple_lock";
513
514
515 if (!usld_lock_common_checks(l, caller))
516 return;
517
518/*
519 * Note that we have a weird case where we are getting a lock when we are]
520 * in the process of putting the system to sleep. We are running with no
521 * current threads, therefore we can't tell if we are trying to retake a lock
522 * we have or someone on the other processor has it. Therefore we just
523 * ignore this test if the locking thread is 0.
524 */
525
526 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
527 l->debug.lock_thread == (void *) current_thread()) {
2d21ac55
A
528 printf("%s: lock %p already locked (at %p) by",
529 caller, l, l->debug.lock_pc);
530 printf(" current thread %p (new attempt at pc %p)\n",
91447636 531 l->debug.lock_thread, pc);
2d21ac55 532 panic("%s", caller);
91447636
A
533 }
534 mp_disable_preemption();
535 usl_trace(l, cpu_number(), pc, caller);
536 mp_enable_preemption();
537}
538
539
540/*
541 * Debug checks on a usimple_lock just after acquiring it.
542 *
543 * Pre-emption has been disabled at this point,
544 * so we are safe in using cpu_number.
545 */
546void
547usld_lock_post(
548 usimple_lock_t l,
549 pc_t pc)
550{
551 register int mycpu;
552 char caller[] = "successful usimple_lock";
553
554
555 if (!usld_lock_common_checks(l, caller))
556 return;
557
558 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
559 panic("%s: lock %p became uninitialized",
560 caller, l);
91447636 561 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
562 panic("%s: lock 0x%p became TAKEN by someone else",
563 caller, l);
91447636
A
564
565 mycpu = cpu_number();
566 l->debug.lock_thread = (void *)current_thread();
567 l->debug.state |= USLOCK_TAKEN;
568 l->debug.lock_pc = pc;
569 l->debug.lock_cpu = mycpu;
570
571 usl_trace(l, mycpu, pc, caller);
572}
573
574
575/*
576 * Debug checks on a usimple_lock just before
577 * releasing it. Note that the caller has not
578 * yet released the hardware lock.
579 *
580 * Preemption is still disabled, so there's
581 * no problem using cpu_number.
582 */
583void
584usld_unlock(
585 usimple_lock_t l,
586 pc_t pc)
587{
588 register int mycpu;
589 char caller[] = "usimple_unlock";
590
591
592 if (!usld_lock_common_checks(l, caller))
593 return;
594
595 mycpu = cpu_number();
596
597 if (!(l->debug.state & USLOCK_TAKEN))
b0d623f7
A
598 panic("%s: lock 0x%p hasn't been taken",
599 caller, l);
91447636 600 if (l->debug.lock_thread != (void *) current_thread())
b0d623f7
A
601 panic("%s: unlocking lock 0x%p, owned by thread %p",
602 caller, l, l->debug.lock_thread);
91447636 603 if (l->debug.lock_cpu != mycpu) {
b0d623f7
A
604 printf("%s: unlocking lock 0x%p on cpu 0x%x",
605 caller, l, mycpu);
91447636 606 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
2d21ac55 607 panic("%s", caller);
91447636
A
608 }
609 usl_trace(l, mycpu, pc, caller);
610
611 l->debug.unlock_thread = l->debug.lock_thread;
612 l->debug.lock_thread = INVALID_PC;
613 l->debug.state &= ~USLOCK_TAKEN;
614 l->debug.unlock_pc = pc;
615 l->debug.unlock_cpu = mycpu;
616}
617
618
619/*
620 * Debug checks on a usimple_lock just before
621 * attempting to acquire it.
622 *
623 * Preemption isn't guaranteed to be disabled.
624 */
625void
626usld_lock_try_pre(
627 usimple_lock_t l,
628 pc_t pc)
629{
630 char caller[] = "usimple_lock_try";
631
632 if (!usld_lock_common_checks(l, caller))
633 return;
634 mp_disable_preemption();
635 usl_trace(l, cpu_number(), pc, caller);
636 mp_enable_preemption();
637}
638
639
640/*
641 * Debug checks on a usimple_lock just after
642 * successfully attempting to acquire it.
643 *
644 * Preemption has been disabled by the
645 * lock acquisition attempt, so it's safe
646 * to use cpu_number.
647 */
648void
649usld_lock_try_post(
650 usimple_lock_t l,
651 pc_t pc)
652{
653 register int mycpu;
654 char caller[] = "successful usimple_lock_try";
655
656 if (!usld_lock_common_checks(l, caller))
657 return;
658
659 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
660 panic("%s: lock 0x%p became uninitialized",
661 caller, l);
91447636 662 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
663 panic("%s: lock 0x%p became TAKEN by someone else",
664 caller, l);
91447636
A
665
666 mycpu = cpu_number();
667 l->debug.lock_thread = (void *) current_thread();
668 l->debug.state |= USLOCK_TAKEN;
669 l->debug.lock_pc = pc;
670 l->debug.lock_cpu = mycpu;
671
672 usl_trace(l, mycpu, pc, caller);
673}
674
675
676/*
677 * For very special cases, set traced_lock to point to a
678 * specific lock of interest. The result is a series of
679 * XPRs showing lock operations on that lock. The lock_seq
680 * value is used to show the order of those operations.
681 */
682usimple_lock_t traced_lock;
683unsigned int lock_seq;
684
685void
686usl_trace(
687 usimple_lock_t l,
688 int mycpu,
689 pc_t pc,
690 const char * op_name)
691{
692 if (traced_lock == l) {
693 XPR(XPR_SLOCK,
694 "seq %d, cpu %d, %s @ %x\n",
b0d623f7
A
695 (uintptr_t) lock_seq, (uintptr_t) mycpu,
696 (uintptr_t) op_name, (uintptr_t) pc, 0);
91447636
A
697 lock_seq++;
698 }
699}
700
701
702#endif /* USLOCK_DEBUG */
703
91447636
A
704/*
705 * Routine: lck_rw_alloc_init
706 */
707lck_rw_t *
708lck_rw_alloc_init(
709 lck_grp_t *grp,
710 lck_attr_t *attr) {
711 lck_rw_t *lck;
712
b0d623f7
A
713 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
714 bzero(lck, sizeof(lck_rw_t));
91447636 715 lck_rw_init(lck, grp, attr);
b0d623f7
A
716 }
717
91447636
A
718 return(lck);
719}
720
721/*
722 * Routine: lck_rw_free
723 */
724void
725lck_rw_free(
726 lck_rw_t *lck,
727 lck_grp_t *grp) {
728 lck_rw_destroy(lck, grp);
729 kfree(lck, sizeof(lck_rw_t));
730}
731
732/*
733 * Routine: lck_rw_init
734 */
735void
736lck_rw_init(
737 lck_rw_t *lck,
738 lck_grp_t *grp,
0c530ab8
A
739 lck_attr_t *attr)
740{
741 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
742 attr : &LockDefaultLckAttr;
91447636 743
2d21ac55
A
744 hw_lock_byte_init(&lck->lck_rw_interlock);
745 lck->lck_rw_want_write = FALSE;
746 lck->lck_rw_want_upgrade = FALSE;
747 lck->lck_rw_shared_count = 0;
748 lck->lck_rw_can_sleep = TRUE;
b0d623f7 749 lck->lck_r_waiting = lck->lck_w_waiting = 0;
91447636 750 lck->lck_rw_tag = 0;
2d21ac55
A
751 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
752 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
91447636
A
753
754 lck_grp_reference(grp);
755 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
756}
757
758/*
759 * Routine: lck_rw_destroy
760 */
761void
762lck_rw_destroy(
763 lck_rw_t *lck,
b0d623f7
A
764 lck_grp_t *grp)
765{
91447636
A
766 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
767 return;
39236c6e
A
768#if MACH_LDEBUG
769 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
770#endif
91447636
A
771 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
772 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
773 lck_grp_deallocate(grp);
774 return;
775}
776
777/*
778 * Sleep locks. These use the same data structure and algorithm
779 * as the spin locks, but the process sleeps while it is waiting
780 * for the lock. These work on uniprocessor systems.
781 */
782
783#define DECREMENTER_TIMEOUT 1000000
784
2d21ac55
A
785#define RW_LOCK_READER_EVENT(x) \
786 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
787
788#define RW_LOCK_WRITER_EVENT(x) \
789 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
91447636
A
790
791/*
6d2010ae
A
792 * We disable interrupts while holding the RW interlock to prevent an
793 * interrupt from exacerbating hold time.
91447636
A
794 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
795 */
796static boolean_t
797lck_interlock_lock(lck_rw_t *lck)
798{
799 boolean_t istate;
800
801 istate = ml_set_interrupts_enabled(FALSE);
2d21ac55 802 hw_lock_byte_lock(&lck->lck_rw_interlock);
91447636
A
803
804 return istate;
805}
806
807static void
808lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
809{
2d21ac55 810 hw_lock_byte_unlock(&lck->lck_rw_interlock);
91447636
A
811 ml_set_interrupts_enabled(istate);
812}
813
0c530ab8
A
814/*
815 * This inline is used when busy-waiting for an rw lock.
816 * If interrupts were disabled when the lock primitive was called,
817 * we poll the IPI handler for pending tlb flushes.
818 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
819 */
820static inline void
821lck_rw_lock_pause(boolean_t interrupts_enabled)
822{
823 if (!interrupts_enabled)
824 handle_pending_TLB_flushes();
825 cpu_pause();
826}
827
b0d623f7
A
828
829/*
830 * compute the deadline to spin against when
831 * waiting for a change of state on a lck_rw_t
832 */
833static inline uint64_t
834lck_rw_deadline_for_spin(lck_rw_t *lck)
835{
836 if (lck->lck_rw_can_sleep) {
837 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
838 /*
839 * there are already threads waiting on this lock... this
840 * implies that they have spun beyond their deadlines waiting for
841 * the desired state to show up so we will not bother spinning at this time...
842 * or
843 * the current number of threads sharing this lock exceeds our capacity to run them
844 * concurrently and since all states we're going to spin for require the rw_shared_count
845 * to be at 0, we'll not bother spinning since the latency for this to happen is
846 * unpredictable...
847 */
848 return (mach_absolute_time());
849 }
850 return (mach_absolute_time() + MutexSpin);
851 } else
852 return (mach_absolute_time() + (100000LL * 1000000000LL));
853}
854
855
91447636
A
856/*
857 * Routine: lck_rw_lock_exclusive
858 */
859void
b0d623f7 860lck_rw_lock_exclusive_gen(
91447636
A
861 lck_rw_t *lck)
862{
3e170ce0 863 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
b0d623f7
A
864 uint64_t deadline = 0;
865 int slept = 0;
866 int gotlock = 0;
867 int lockheld = 0;
868 wait_result_t res = 0;
869 boolean_t istate = -1;
91447636 870
2d21ac55 871#if CONFIG_DTRACE
b0d623f7
A
872 boolean_t dtrace_ls_initialized = FALSE;
873 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
874 uint64_t wait_interval = 0;
875 int readers_at_sleep = 0;
2d21ac55 876#endif
91447636 877
91447636 878 /*
2d21ac55 879 * Try to acquire the lck_rw_want_write bit.
91447636 880 */
b0d623f7 881 while ( !lck_rw_grab_want(lck)) {
91447636 882
2d21ac55 883#if CONFIG_DTRACE
b0d623f7
A
884 if (dtrace_ls_initialized == FALSE) {
885 dtrace_ls_initialized = TRUE;
886 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
887 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
888 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
889 if (dtrace_ls_enabled) {
890 /*
891 * Either sleeping or spinning is happening,
892 * start a timing of our delay interval now.
893 */
894 readers_at_sleep = lck->lck_rw_shared_count;
895 wait_interval = mach_absolute_time();
896 }
91447636 897 }
2d21ac55 898#endif
b0d623f7
A
899 if (istate == -1)
900 istate = ml_get_interrupts_enabled();
91447636 901
b0d623f7
A
902 deadline = lck_rw_deadline_for_spin(lck);
903
3e170ce0 904 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
b0d623f7
A
905
906 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
907 lck_rw_lock_pause(istate);
908
3e170ce0 909 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
b0d623f7
A
910
911 if (gotlock)
912 break;
913 /*
914 * if we get here, the deadline has expired w/o us
915 * being able to grab the lock exclusively
916 * check to see if we're allowed to do a thread_block
917 */
918 if (lck->lck_rw_can_sleep) {
2d21ac55 919
91447636 920 istate = lck_interlock_lock(lck);
91447636 921
b0d623f7 922 if (lck->lck_rw_want_write) {
91447636 923
3e170ce0 924 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
91447636 925
b0d623f7 926 lck->lck_w_waiting = TRUE;
91447636 927
b0d623f7
A
928 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
929 lck_interlock_unlock(lck, istate);
91447636 930
b0d623f7
A
931 if (res == THREAD_WAITING) {
932 res = thread_block(THREAD_CONTINUE_NULL);
933 slept++;
934 }
3e170ce0 935 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
b0d623f7
A
936 } else {
937 lck->lck_rw_want_write = TRUE;
938 lck_interlock_unlock(lck, istate);
939 break;
940 }
941 }
942 }
943 /*
944 * Wait for readers (and upgrades) to finish...
945 * the test for these conditions must be done simultaneously with
946 * a check of the interlock not being held since
947 * the rw_shared_count will drop to 0 first and then want_upgrade
948 * will be set to 1 in the shared_to_exclusive scenario... those
949 * adjustments are done behind the interlock and represent an
950 * atomic change in state and must be considered as such
951 * however, once we see the read count at 0, the want_upgrade not set
952 * and the interlock not held, we are safe to proceed
953 */
954 while (lck_rw_held_read_or_upgrade(lck)) {
2d21ac55
A
955
956#if CONFIG_DTRACE
957 /*
958 * Either sleeping or spinning is happening, start
959 * a timing of our delay interval now. If we set it
960 * to -1 we don't have accurate data so we cannot later
961 * decide to record a dtrace spin or sleep event.
962 */
b0d623f7
A
963 if (dtrace_ls_initialized == FALSE) {
964 dtrace_ls_initialized = TRUE;
965 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
966 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
967 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
968 if (dtrace_ls_enabled) {
969 /*
970 * Either sleeping or spinning is happening,
971 * start a timing of our delay interval now.
972 */
973 readers_at_sleep = lck->lck_rw_shared_count;
974 wait_interval = mach_absolute_time();
975 }
2d21ac55
A
976 }
977#endif
b0d623f7
A
978 if (istate == -1)
979 istate = ml_get_interrupts_enabled();
980
981 deadline = lck_rw_deadline_for_spin(lck);
982
3e170ce0 983 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
b0d623f7
A
984
985 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
986 lck_rw_lock_pause(istate);
987
3e170ce0 988 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, lockheld, 0);
b0d623f7
A
989
990 if ( !lockheld)
991 break;
992 /*
993 * if we get here, the deadline has expired w/o us
994 * being able to grab the lock exclusively
995 * check to see if we're allowed to do a thread_block
996 */
997 if (lck->lck_rw_can_sleep) {
91447636 998
91447636 999 istate = lck_interlock_lock(lck);
91447636 1000
b0d623f7 1001 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
3e170ce0 1002 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
b0d623f7
A
1003
1004 lck->lck_w_waiting = TRUE;
1005
1006 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1007 lck_interlock_unlock(lck, istate);
b0d623f7
A
1008
1009 if (res == THREAD_WAITING) {
1010 res = thread_block(THREAD_CONTINUE_NULL);
1011 slept++;
1012 }
3e170ce0 1013 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
b0d623f7
A
1014 } else {
1015 lck_interlock_unlock(lck, istate);
1016 /*
1017 * must own the lock now, since we checked for
1018 * readers or upgrade owner behind the interlock
1019 * no need for a call to 'lck_rw_held_read_or_upgrade'
1020 */
1021 break;
91447636
A
1022 }
1023 }
91447636
A
1024 }
1025
2d21ac55
A
1026#if CONFIG_DTRACE
1027 /*
1028 * Decide what latencies we suffered that are Dtrace events.
1029 * If we have set wait_interval, then we either spun or slept.
1030 * At least we get out from under the interlock before we record
1031 * which is the best we can do here to minimize the impact
1032 * of the tracing.
1033 * If we have set wait_interval to -1, then dtrace was not enabled when we
1034 * started sleeping/spinning so we don't record this event.
1035 */
b0d623f7 1036 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1037 if (slept == 0) {
1038 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1039 mach_absolute_time() - wait_interval, 1);
1040 } else {
1041 /*
1042 * For the blocking case, we also record if when we blocked
1043 * it was held for read or write, and how many readers.
1044 * Notice that above we recorded this before we dropped
1045 * the interlock so the count is accurate.
1046 */
1047 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1048 mach_absolute_time() - wait_interval, 1,
1049 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1050 }
1051 }
1052 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1053#endif
91447636
A
1054}
1055
1056
1057/*
2d21ac55 1058 * Routine: lck_rw_done_gen
b0d623f7
A
1059 *
1060 * called from the assembly language wrapper...
1061 * prior_lock_state is the value in the 1st
1062 * word of the lock at the time of a successful
1063 * atomic compare and exchange with the new value...
1064 * it represents the state of the lock before we
1065 * decremented the rw_shared_count or cleared either
1066 * rw_want_upgrade or rw_want_write and
1067 * the lck_x_waiting bits... since the wrapper
1068 * routine has already changed the state atomically,
1069 * we just need to decide if we should
1070 * wake up anyone and what value to return... we do
1071 * this by examining the state of the lock before
1072 * we changed it
91447636
A
1073 */
1074lck_rw_type_t
2d21ac55 1075lck_rw_done_gen(
b0d623f7
A
1076 lck_rw_t *lck,
1077 int prior_lock_state)
91447636 1078{
b0d623f7
A
1079 lck_rw_t *fake_lck;
1080 lck_rw_type_t lock_type;
fe8ab488 1081 thread_t thread;
39236c6e
A
1082 uint32_t rwlock_count;
1083
91447636 1084 /*
b0d623f7
A
1085 * prior_lock state is a snapshot of the 1st word of the
1086 * lock in question... we'll fake up a pointer to it
1087 * and carefully not access anything beyond whats defined
1088 * in the first word of a lck_rw_t
91447636 1089 */
b0d623f7 1090 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1091
b0d623f7
A
1092 if (fake_lck->lck_rw_shared_count <= 1) {
1093 if (fake_lck->lck_w_waiting)
1094 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
91447636 1095
b0d623f7
A
1096 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1097 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1098 }
1099 if (fake_lck->lck_rw_shared_count)
1100 lock_type = LCK_RW_TYPE_SHARED;
1101 else
1102 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2d21ac55 1103
fe8ab488
A
1104 /* Check if dropping the lock means that we need to unpromote */
1105 thread = current_thread();
1106 rwlock_count = thread->rwlock_count--;
1107#if MACH_LDEBUG
1108 if (rwlock_count == 0) {
1109 panic("rw lock count underflow for thread %p", thread);
1110 }
1111#endif
1112 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1113 /* sched_flags checked without lock, but will be rechecked while clearing */
1114 lck_rw_clear_promotion(thread);
1115 }
1116
2d21ac55 1117#if CONFIG_DTRACE
b0d623f7 1118 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2d21ac55
A
1119#endif
1120
b0d623f7 1121 return(lock_type);
91447636
A
1122}
1123
1124
91447636
A
1125/*
1126 * Routine: lck_rw_unlock
1127 */
1128void
1129lck_rw_unlock(
1130 lck_rw_t *lck,
1131 lck_rw_type_t lck_rw_type)
1132{
1133 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1134 lck_rw_unlock_shared(lck);
1135 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1136 lck_rw_unlock_exclusive(lck);
1137 else
1138 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1139}
1140
1141
1142/*
1143 * Routine: lck_rw_unlock_shared
1144 */
1145void
1146lck_rw_unlock_shared(
1147 lck_rw_t *lck)
1148{
1149 lck_rw_type_t ret;
1150
1151 ret = lck_rw_done(lck);
1152
1153 if (ret != LCK_RW_TYPE_SHARED)
1154 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
1155}
1156
1157
1158/*
1159 * Routine: lck_rw_unlock_exclusive
1160 */
1161void
1162lck_rw_unlock_exclusive(
1163 lck_rw_t *lck)
1164{
1165 lck_rw_type_t ret;
1166
1167 ret = lck_rw_done(lck);
1168
1169 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1170 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1171}
1172
1173
1174/*
1175 * Routine: lck_rw_lock
1176 */
1177void
1178lck_rw_lock(
1179 lck_rw_t *lck,
1180 lck_rw_type_t lck_rw_type)
1181{
1182 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1183 lck_rw_lock_shared(lck);
1184 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1185 lck_rw_lock_exclusive(lck);
1186 else
1187 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1188}
1189
1190
1191/*
2d21ac55 1192 * Routine: lck_rw_lock_shared_gen
b0d623f7
A
1193 * Function:
1194 * assembly fast path code has determined that this lock
1195 * is held exclusively... this is where we spin/block
1196 * until we can acquire the lock in the shared mode
91447636
A
1197 */
1198void
2d21ac55 1199lck_rw_lock_shared_gen(
91447636
A
1200 lck_rw_t *lck)
1201{
3e170ce0 1202 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
b0d623f7
A
1203 uint64_t deadline = 0;
1204 int gotlock = 0;
1205 int slept = 0;
1206 wait_result_t res = 0;
1207 boolean_t istate = -1;
3e170ce0 1208
2d21ac55
A
1209#if CONFIG_DTRACE
1210 uint64_t wait_interval = 0;
b0d623f7
A
1211 int readers_at_sleep = 0;
1212 boolean_t dtrace_ls_initialized = FALSE;
1213 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2d21ac55 1214#endif
91447636 1215
b0d623f7
A
1216 while ( !lck_rw_grab_shared(lck)) {
1217
2d21ac55 1218#if CONFIG_DTRACE
b0d623f7
A
1219 if (dtrace_ls_initialized == FALSE) {
1220 dtrace_ls_initialized = TRUE;
1221 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1222 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1223 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1224 if (dtrace_ls_enabled) {
1225 /*
1226 * Either sleeping or spinning is happening,
1227 * start a timing of our delay interval now.
1228 */
1229 readers_at_sleep = lck->lck_rw_shared_count;
1230 wait_interval = mach_absolute_time();
1231 }
1232 }
2d21ac55 1233#endif
b0d623f7
A
1234 if (istate == -1)
1235 istate = ml_get_interrupts_enabled();
91447636 1236
b0d623f7 1237 deadline = lck_rw_deadline_for_spin(lck);
0c530ab8 1238
b0d623f7 1239 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
3e170ce0 1240 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
91447636 1241
b0d623f7
A
1242 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
1243 lck_rw_lock_pause(istate);
1244
1245 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
3e170ce0 1246 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
b0d623f7
A
1247
1248 if (gotlock)
1249 break;
1250 /*
1251 * if we get here, the deadline has expired w/o us
1252 * being able to grab the lock for read
1253 * check to see if we're allowed to do a thread_block
1254 */
1255 if (lck->lck_rw_can_sleep) {
91447636 1256
91447636 1257 istate = lck_interlock_lock(lck);
91447636 1258
b0d623f7
A
1259 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1260 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1261
1262 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
3e170ce0 1263 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
b0d623f7
A
1264
1265 lck->lck_r_waiting = TRUE;
1266
1267 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
91447636 1268 lck_interlock_unlock(lck, istate);
b0d623f7
A
1269
1270 if (res == THREAD_WAITING) {
1271 res = thread_block(THREAD_CONTINUE_NULL);
1272 slept++;
1273 }
1274 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
3e170ce0 1275 trace_lck, res, slept, 0, 0);
b0d623f7
A
1276 } else {
1277 lck->lck_rw_shared_count++;
1278 lck_interlock_unlock(lck, istate);
1279 break;
91447636
A
1280 }
1281 }
91447636
A
1282 }
1283
2d21ac55 1284#if CONFIG_DTRACE
b0d623f7 1285 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1286 if (slept == 0) {
1287 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1288 } else {
1289 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1290 mach_absolute_time() - wait_interval, 0,
1291 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1292 }
1293 }
1294 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1295#endif
91447636
A
1296}
1297
1298
1299/*
b0d623f7 1300 * Routine: lck_rw_lock_shared_to_exclusive_failure
91447636 1301 * Function:
b0d623f7
A
1302 * assembly fast path code has already dropped our read
1303 * count and determined that someone else owns 'lck_rw_want_upgrade'
1304 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1305 * all we need to do here is determine if a wakeup is needed
91447636 1306 */
91447636 1307boolean_t
b0d623f7
A
1308lck_rw_lock_shared_to_exclusive_failure(
1309 lck_rw_t *lck,
1310 int prior_lock_state)
91447636 1311{
b0d623f7 1312 lck_rw_t *fake_lck;
39236c6e
A
1313 thread_t thread = current_thread();
1314 uint32_t rwlock_count;
1315
1316 /* Check if dropping the lock means that we need to unpromote */
1317 rwlock_count = thread->rwlock_count--;
1318#if MACH_LDEBUG
1319 if (rwlock_count == 0) {
1320 panic("rw lock count underflow for thread %p", thread);
1321 }
1322#endif
1323 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1324 /* sched_flags checked without lock, but will be rechecked while clearing */
1325 lck_rw_clear_promotion(thread);
1326 }
91447636 1327
b0d623f7
A
1328 /*
1329 * prior_lock state is a snapshot of the 1st word of the
1330 * lock in question... we'll fake up a pointer to it
1331 * and carefully not access anything beyond whats defined
1332 * in the first word of a lck_rw_t
1333 */
1334 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1335
b0d623f7 1336 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
91447636
A
1337 /*
1338 * Someone else has requested upgrade.
b0d623f7
A
1339 * Since we've released the read lock, wake
1340 * him up if he's blocked waiting
91447636 1341 */
b0d623f7
A
1342 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1343 }
1344 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
3e170ce0 1345 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
91447636 1346
b0d623f7
A
1347 return (FALSE);
1348}
91447636 1349
91447636 1350
b0d623f7
A
1351/*
1352 * Routine: lck_rw_lock_shared_to_exclusive_failure
1353 * Function:
1354 * assembly fast path code has already dropped our read
1355 * count and successfully acquired 'lck_rw_want_upgrade'
1356 * we just need to wait for the rest of the readers to drain
1357 * and then we can return as the exclusive holder of this lock
1358 */
1359boolean_t
1360lck_rw_lock_shared_to_exclusive_success(
1361 lck_rw_t *lck)
1362{
3e170ce0 1363 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
b0d623f7
A
1364 uint64_t deadline = 0;
1365 int slept = 0;
1366 int still_shared = 0;
1367 wait_result_t res;
1368 boolean_t istate = -1;
91447636 1369
b0d623f7
A
1370#if CONFIG_DTRACE
1371 uint64_t wait_interval = 0;
1372 int readers_at_sleep = 0;
1373 boolean_t dtrace_ls_initialized = FALSE;
1374 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1375#endif
91447636 1376
2d21ac55 1377 while (lck->lck_rw_shared_count != 0) {
b0d623f7 1378
2d21ac55 1379#if CONFIG_DTRACE
b0d623f7
A
1380 if (dtrace_ls_initialized == FALSE) {
1381 dtrace_ls_initialized = TRUE;
1382 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1383 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1384 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1385 if (dtrace_ls_enabled) {
1386 /*
1387 * Either sleeping or spinning is happening,
1388 * start a timing of our delay interval now.
1389 */
1390 readers_at_sleep = lck->lck_rw_shared_count;
1391 wait_interval = mach_absolute_time();
1392 }
2d21ac55
A
1393 }
1394#endif
b0d623f7
A
1395 if (istate == -1)
1396 istate = ml_get_interrupts_enabled();
1397
1398 deadline = lck_rw_deadline_for_spin(lck);
1399
1400 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
3e170ce0 1401 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
b0d623f7
A
1402
1403 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
1404 lck_rw_lock_pause(istate);
1405
1406 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
3e170ce0 1407 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
b0d623f7
A
1408
1409 if ( !still_shared)
1410 break;
1411 /*
1412 * if we get here, the deadline has expired w/o
1413 * the rw_shared_count having drained to 0
1414 * check to see if we're allowed to do a thread_block
1415 */
1416 if (lck->lck_rw_can_sleep) {
1417
91447636 1418 istate = lck_interlock_lock(lck);
b0d623f7
A
1419
1420 if (lck->lck_rw_shared_count != 0) {
1421 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
3e170ce0 1422 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
b0d623f7
A
1423
1424 lck->lck_w_waiting = TRUE;
91447636 1425
b0d623f7 1426 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1427 lck_interlock_unlock(lck, istate);
b0d623f7
A
1428
1429 if (res == THREAD_WAITING) {
1430 res = thread_block(THREAD_CONTINUE_NULL);
1431 slept++;
1432 }
1433 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
3e170ce0 1434 trace_lck, res, slept, 0, 0);
b0d623f7
A
1435 } else {
1436 lck_interlock_unlock(lck, istate);
1437 break;
91447636
A
1438 }
1439 }
91447636 1440 }
2d21ac55
A
1441#if CONFIG_DTRACE
1442 /*
1443 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1444 */
b0d623f7 1445 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1446 if (slept == 0) {
1447 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1448 } else {
1449 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1450 mach_absolute_time() - wait_interval, 1,
1451 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1452 }
1453 }
2d21ac55
A
1454 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1455#endif
1456 return (TRUE);
91447636
A
1457}
1458
b0d623f7 1459
91447636
A
1460/*
1461 * Routine: lck_rw_lock_exclusive_to_shared
b0d623f7
A
1462 * Function:
1463 * assembly fast path has already dropped
1464 * our exclusive state and bumped lck_rw_shared_count
1465 * all we need to do here is determine if anyone
1466 * needs to be awakened.
91447636
A
1467 */
1468void
b0d623f7
A
1469lck_rw_lock_exclusive_to_shared_gen(
1470 lck_rw_t *lck,
1471 int prior_lock_state)
91447636 1472{
3e170ce0
A
1473 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1474 lck_rw_t *fake_lck;
91447636 1475
b0d623f7
A
1476 /*
1477 * prior_lock state is a snapshot of the 1st word of the
1478 * lock in question... we'll fake up a pointer to it
1479 * and carefully not access anything beyond whats defined
1480 * in the first word of a lck_rw_t
1481 */
1482 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1483
b0d623f7 1484 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
3e170ce0 1485 trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
91447636 1486
b0d623f7
A
1487 /*
1488 * don't wake up anyone waiting to take the lock exclusively
1489 * since we hold a read count... when the read count drops to 0,
1490 * the writers will be woken.
1491 *
1492 * wake up any waiting readers if we don't have any writers waiting,
1493 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1494 */
1495 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
2d21ac55 1496 thread_wakeup(RW_LOCK_READER_EVENT(lck));
91447636
A
1497
1498 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
3e170ce0 1499 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
91447636 1500
2d21ac55
A
1501#if CONFIG_DTRACE
1502 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1503#endif
91447636
A
1504}
1505
1506
1507/*
1508 * Routine: lck_rw_try_lock
1509 */
1510boolean_t
1511lck_rw_try_lock(
1512 lck_rw_t *lck,
1513 lck_rw_type_t lck_rw_type)
1514{
1515 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1516 return(lck_rw_try_lock_shared(lck));
1517 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1518 return(lck_rw_try_lock_exclusive(lck));
1519 else
1520 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1521 return(FALSE);
1522}
1523
91447636 1524
2d21ac55
A
1525void
1526lck_rw_assert(
1527 lck_rw_t *lck,
1528 unsigned int type)
1529{
1530 switch (type) {
1531 case LCK_RW_ASSERT_SHARED:
1532 if (lck->lck_rw_shared_count != 0) {
1533 return;
1534 }
1535 break;
1536 case LCK_RW_ASSERT_EXCLUSIVE:
1537 if ((lck->lck_rw_want_write ||
1538 lck->lck_rw_want_upgrade) &&
1539 lck->lck_rw_shared_count == 0) {
1540 return;
1541 }
1542 break;
1543 case LCK_RW_ASSERT_HELD:
1544 if (lck->lck_rw_want_write ||
1545 lck->lck_rw_want_upgrade ||
1546 lck->lck_rw_shared_count != 0) {
1547 return;
1548 }
1549 break;
39236c6e
A
1550 case LCK_RW_ASSERT_NOTHELD:
1551 if (!(lck->lck_rw_want_write ||
1552 lck->lck_rw_want_upgrade ||
1553 lck->lck_rw_shared_count != 0)) {
1554 return;
1555 }
1556 break;
2d21ac55
A
1557 default:
1558 break;
1559 }
1560
39236c6e
A
1561 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck);
1562}
1563
1564/* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
1565void
1566lck_rw_clear_promotions_x86(thread_t thread)
1567{
1568#if MACH_LDEBUG
1569 /* It's fatal to leave a RW lock locked and return to userspace */
1570 panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
1571#else
1572 /* Paper over the issue */
1573 thread->rwlock_count = 0;
1574 lck_rw_clear_promotion(thread);
1575#endif
2d21ac55
A
1576}
1577
39236c6e 1578
3e170ce0
A
1579/*
1580 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1581 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1582 */
1583boolean_t
1584kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
1585 if (not_in_kdp) {
1586 panic("panic: rw lock exclusive check done outside of kernel debugger");
1587 }
1588 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_write) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
1589}
1590
1591
6d2010ae
A
1592#ifdef MUTEX_ZONE
1593extern zone_t lck_mtx_zone;
1594#endif
91447636
A
1595/*
1596 * Routine: lck_mtx_alloc_init
1597 */
1598lck_mtx_t *
1599lck_mtx_alloc_init(
1600 lck_grp_t *grp,
1601 lck_attr_t *attr)
1602{
1603 lck_mtx_t *lck;
6d2010ae
A
1604#ifdef MUTEX_ZONE
1605 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
1606 lck_mtx_init(lck, grp, attr);
1607#else
91447636
A
1608 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
1609 lck_mtx_init(lck, grp, attr);
6d2010ae 1610#endif
91447636
A
1611 return(lck);
1612}
1613
1614/*
1615 * Routine: lck_mtx_free
1616 */
1617void
1618lck_mtx_free(
1619 lck_mtx_t *lck,
1620 lck_grp_t *grp)
1621{
1622 lck_mtx_destroy(lck, grp);
6d2010ae
A
1623#ifdef MUTEX_ZONE
1624 zfree(lck_mtx_zone, lck);
1625#else
91447636 1626 kfree(lck, sizeof(lck_mtx_t));
6d2010ae 1627#endif
91447636
A
1628}
1629
1630/*
1631 * Routine: lck_mtx_ext_init
1632 */
1633static void
1634lck_mtx_ext_init(
1635 lck_mtx_ext_t *lck,
1636 lck_grp_t *grp,
1637 lck_attr_t *attr)
1638{
2d21ac55 1639 bzero((void *)lck, sizeof(lck_mtx_ext_t));
91447636
A
1640
1641 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636
A
1642 lck->lck_mtx_deb.type = MUTEX_TAG;
1643 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1644 }
1645
1646 lck->lck_mtx_grp = grp;
2d21ac55
A
1647
1648 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
6d2010ae 1649 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
b0d623f7 1650
6d2010ae 1651 lck->lck_mtx.lck_mtx_is_ext = 1;
6d2010ae 1652 lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
91447636
A
1653}
1654
1655/*
1656 * Routine: lck_mtx_init
1657 */
1658void
1659lck_mtx_init(
1660 lck_mtx_t *lck,
1661 lck_grp_t *grp,
1662 lck_attr_t *attr)
1663{
1664 lck_mtx_ext_t *lck_ext;
2d21ac55
A
1665 lck_attr_t *lck_attr;
1666
1667 if (attr != LCK_ATTR_NULL)
1668 lck_attr = attr;
1669 else
1670 lck_attr = &LockDefaultLckAttr;
91447636 1671
2d21ac55 1672 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636 1673 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2d21ac55 1674 lck_mtx_ext_init(lck_ext, grp, lck_attr);
91447636
A
1675 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1676 lck->lck_mtx_ptr = lck_ext;
1677 }
1678 } else {
b0d623f7 1679 lck->lck_mtx_owner = 0;
6d2010ae 1680 lck->lck_mtx_state = 0;
91447636 1681 }
6d2010ae 1682 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
91447636
A
1683 lck_grp_reference(grp);
1684 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1685}
1686
2d21ac55
A
1687/*
1688 * Routine: lck_mtx_init_ext
1689 */
1690void
1691lck_mtx_init_ext(
1692 lck_mtx_t *lck,
1693 lck_mtx_ext_t *lck_ext,
1694 lck_grp_t *grp,
1695 lck_attr_t *attr)
1696{
1697 lck_attr_t *lck_attr;
1698
1699 if (attr != LCK_ATTR_NULL)
1700 lck_attr = attr;
1701 else
1702 lck_attr = &LockDefaultLckAttr;
1703
1704 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1705 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1706 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1707 lck->lck_mtx_ptr = lck_ext;
1708 } else {
b0d623f7 1709 lck->lck_mtx_owner = 0;
6d2010ae 1710 lck->lck_mtx_state = 0;
2d21ac55 1711 }
6d2010ae 1712 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
6d2010ae 1713
2d21ac55
A
1714 lck_grp_reference(grp);
1715 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1716}
1717
91447636
A
1718/*
1719 * Routine: lck_mtx_destroy
1720 */
1721void
1722lck_mtx_destroy(
1723 lck_mtx_t *lck,
1724 lck_grp_t *grp)
1725{
1726 boolean_t lck_is_indirect;
1727
1728 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
1729 return;
39236c6e
A
1730#if MACH_LDEBUG
1731 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1732#endif
91447636 1733 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
b0d623f7
A
1734
1735 lck_mtx_lock_mark_destroyed(lck);
1736
91447636
A
1737 if (lck_is_indirect)
1738 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
1739 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1740 lck_grp_deallocate(grp);
1741 return;
1742}
1743
b0d623f7
A
1744
1745#define LCK_MTX_LCK_WAIT_CODE 0x20
1746#define LCK_MTX_LCK_WAKEUP_CODE 0x21
1747#define LCK_MTX_LCK_SPIN_CODE 0x22
1748#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1749#define LCK_MTX_LCK_DEMOTE_CODE 0x24
1750
1751
1752/*
1753 * Routine: lck_mtx_unlock_wakeup_x86
1754 *
6d2010ae
A
1755 * Invoked on unlock when there is
1756 * contention (i.e. the assembly routine sees that
1757 * that mutex->lck_mtx_waiters != 0 or
1758 * that mutex->lck_mtx_promoted != 0...
b0d623f7 1759 *
6d2010ae 1760 * neither the mutex or interlock is held
b0d623f7
A
1761 */
1762void
1763lck_mtx_unlock_wakeup_x86 (
1764 lck_mtx_t *mutex,
6d2010ae 1765 int prior_lock_state)
b0d623f7 1766{
3e170ce0
A
1767 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
1768 lck_mtx_t fake_lck;
6d2010ae
A
1769
1770 /*
1771 * prior_lock state is a snapshot of the 2nd word of the
1772 * lock in question... we'll fake up a lock with the bits
1773 * copied into place and carefully not access anything
1774 * beyond whats defined in the second word of a lck_mtx_t
1775 */
1776 fake_lck.lck_mtx_state = prior_lock_state;
1777
1778 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
3e170ce0 1779 trace_lck, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
b0d623f7 1780
6d2010ae 1781 if (__probable(fake_lck.lck_mtx_waiters)) {
6d2010ae 1782 if (fake_lck.lck_mtx_waiters > 1)
3e170ce0 1783 thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex), fake_lck.lck_mtx_pri);
6d2010ae 1784 else
3e170ce0 1785 thread_wakeup_one(LCK_MTX_EVENT(mutex));
6d2010ae 1786 }
b0d623f7 1787
6d2010ae 1788 if (__improbable(fake_lck.lck_mtx_promoted)) {
b0d623f7
A
1789 thread_t thread = current_thread();
1790
1791
6d2010ae
A
1792 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
1793 thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
b0d623f7
A
1794
1795 if (thread->promotions > 0) {
1796 spl_t s = splsched();
1797
1798 thread_lock(thread);
1799
6d2010ae 1800 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
b0d623f7 1801
6d2010ae 1802 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
b0d623f7 1803
fe8ab488
A
1804 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1805 /* Thread still has a RW lock promotion */
1806 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
6d2010ae 1807 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
3e170ce0 1808 thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
b0d623f7
A
1809
1810 set_sched_pri(thread, DEPRESSPRI);
1811 }
1812 else {
3e170ce0 1813 if (thread->base_pri < thread->sched_pri) {
6d2010ae 1814 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
3e170ce0 1815 thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
b0d623f7 1816
3e170ce0 1817 thread_recompute_sched_pri(thread, FALSE);
b0d623f7
A
1818 }
1819 }
1820 }
1821 thread_unlock(thread);
1822 splx(s);
1823 }
1824 }
6d2010ae 1825 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
3e170ce0 1826 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7
A
1827}
1828
1829
1830/*
1831 * Routine: lck_mtx_lock_acquire_x86
1832 *
1833 * Invoked on acquiring the mutex when there is
6d2010ae
A
1834 * contention (i.e. the assembly routine sees that
1835 * that mutex->lck_mtx_waiters != 0 or
1836 * thread->was_promoted_on_wakeup != 0)...
1837 *
1838 * mutex is owned... interlock is held... preemption is disabled
b0d623f7
A
1839 */
1840void
1841lck_mtx_lock_acquire_x86(
1842 lck_mtx_t *mutex)
1843{
3e170ce0
A
1844 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
1845 thread_t thread;
1846 integer_t priority;
1847 spl_t s;
b0d623f7 1848
6d2010ae 1849 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
3e170ce0 1850 trace_lck, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7 1851
6d2010ae
A
1852 if (mutex->lck_mtx_waiters)
1853 priority = mutex->lck_mtx_pri;
1854 else
1855 priority = 0;
b0d623f7 1856
6d2010ae 1857 thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */
b0d623f7 1858
6d2010ae 1859 if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
b0d623f7 1860
6d2010ae 1861 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
3e170ce0 1862 thread->sched_pri, priority, thread->was_promoted_on_wakeup, trace_lck, 0);
b0d623f7 1863
6d2010ae
A
1864 s = splsched();
1865 thread_lock(thread);
b0d623f7 1866
39236c6e 1867 if (thread->sched_pri < priority) {
fe8ab488
A
1868 /* Do not promote past promotion ceiling */
1869 assert(priority <= MAXPRI_PROMOTE);
6d2010ae 1870 set_sched_pri(thread, priority);
39236c6e 1871 }
6d2010ae
A
1872 if (mutex->lck_mtx_promoted == 0) {
1873 mutex->lck_mtx_promoted = 1;
1874
b0d623f7 1875 thread->promotions++;
6d2010ae 1876 thread->sched_flags |= TH_SFLAG_PROMOTED;
b0d623f7 1877 }
6d2010ae
A
1878 thread->was_promoted_on_wakeup = 0;
1879
1880 thread_unlock(thread);
1881 splx(s);
b0d623f7 1882 }
6d2010ae 1883 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
3e170ce0 1884 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7
A
1885}
1886
1887
3e170ce0
A
1888static int
1889lck_mtx_interlock_try_lock(lck_mtx_t *mutex, boolean_t *istate)
1890{
1891 int retval;
1892
1893 *istate = ml_set_interrupts_enabled(FALSE);
1894 retval = lck_mtx_ilk_try_lock(mutex);
1895
1896 if (retval == 0)
1897 ml_set_interrupts_enabled(*istate);
1898
1899 return retval;
1900}
1901
1902static void
1903lck_mtx_interlock_unlock(lck_mtx_t *mutex, boolean_t istate)
1904{
1905 lck_mtx_ilk_unlock(mutex);
1906 ml_set_interrupts_enabled(istate);
1907}
1908
b0d623f7 1909
91447636 1910/*
b0d623f7 1911 * Routine: lck_mtx_lock_spinwait_x86
0c530ab8
A
1912 *
1913 * Invoked trying to acquire a mutex when there is contention but
1914 * the holder is running on another processor. We spin for up to a maximum
1915 * time waiting for the lock to be released.
1916 *
1917 * Called with the interlock unlocked.
6d2010ae
A
1918 * returns 0 if mutex acquired
1919 * returns 1 if we spun
1920 * returns 2 if we didn't spin due to the holder not running
0c530ab8 1921 */
b0d623f7
A
1922int
1923lck_mtx_lock_spinwait_x86(
1924 lck_mtx_t *mutex)
0c530ab8 1925{
3e170ce0 1926 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
b0d623f7 1927 thread_t holder;
3e170ce0
A
1928 uint64_t overall_deadline;
1929 uint64_t check_owner_deadline;
1930 uint64_t cur_time;
b0d623f7
A
1931 int retval = 1;
1932 int loopcount = 0;
0c530ab8 1933
6d2010ae 1934 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
3e170ce0 1935 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
0c530ab8 1936
3e170ce0
A
1937 cur_time = mach_absolute_time();
1938 overall_deadline = cur_time + MutexSpin;
1939 check_owner_deadline = cur_time;
b0d623f7 1940
0c530ab8
A
1941 /*
1942 * Spin while:
1943 * - mutex is locked, and
b0d623f7 1944 * - its locked as a spin lock, and
0c530ab8 1945 * - owner is running on another processor, and
2d21ac55 1946 * - owner (processor) is not idling, and
0c530ab8
A
1947 * - we haven't spun for long enough.
1948 */
b0d623f7 1949 do {
6d2010ae 1950 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
b0d623f7
A
1951 retval = 0;
1952 break;
2d21ac55 1953 }
3e170ce0 1954 cur_time = mach_absolute_time();
b0d623f7 1955
3e170ce0
A
1956 if (cur_time >= overall_deadline)
1957 break;
1958
1959 if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
1960 boolean_t istate;
1961
1962 if (lck_mtx_interlock_try_lock(mutex, &istate)) {
1963
1964 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
1965
1966 if ( !(holder->machine.specFlags & OnProc) ||
1967 (holder->state & TH_IDLE)) {
1968
1969 lck_mtx_interlock_unlock(mutex, istate);
1970
1971 if (loopcount == 0)
1972 retval = 2;
1973 break;
1974 }
1975 }
1976 lck_mtx_interlock_unlock(mutex, istate);
1977
1978 check_owner_deadline = cur_time + (MutexSpin / 4);
b0d623f7
A
1979 }
1980 }
1981 cpu_pause();
1982
1983 loopcount++;
1984
3e170ce0 1985 } while (TRUE);
b0d623f7 1986
2d21ac55
A
1987#if CONFIG_DTRACE
1988 /*
3e170ce0 1989 * We've already kept a count via overall_deadline of how long we spun.
2d21ac55
A
1990 * If dtrace is active, then we compute backwards to decide how
1991 * long we spun.
1992 *
1993 * Note that we record a different probe id depending on whether
1994 * this is a direct or indirect mutex. This allows us to
1995 * penalize only lock groups that have debug/stats enabled
1996 * with dtrace processing if desired.
1997 */
6d2010ae 1998 if (__probable(mutex->lck_mtx_is_ext == 0)) {
b0d623f7 1999 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
3e170ce0 2000 mach_absolute_time() - (overall_deadline - MutexSpin));
2d21ac55 2001 } else {
b0d623f7 2002 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
3e170ce0 2003 mach_absolute_time() - (overall_deadline - MutexSpin));
2d21ac55
A
2004 }
2005 /* The lockstat acquire event is recorded by the assembly code beneath us. */
2006#endif
b0d623f7 2007
6d2010ae 2008 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
3e170ce0 2009 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
b0d623f7
A
2010
2011 return retval;
0c530ab8
A
2012}
2013
b0d623f7
A
2014
2015
0c530ab8 2016/*
b0d623f7
A
2017 * Routine: lck_mtx_lock_wait_x86
2018 *
2019 * Invoked in order to wait on contention.
2020 *
2021 * Called with the interlock locked and
6d2010ae
A
2022 * preemption disabled...
2023 * returns it unlocked and with preemption enabled
0c530ab8
A
2024 */
2025void
b0d623f7
A
2026lck_mtx_lock_wait_x86 (
2027 lck_mtx_t *mutex)
0c530ab8 2028{
3e170ce0 2029 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
b0d623f7
A
2030 thread_t self = current_thread();
2031 thread_t holder;
2032 integer_t priority;
b0d623f7
A
2033 spl_t s;
2034#if CONFIG_DTRACE
2035 uint64_t sleep_start = 0;
2036
2037 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
2038 sleep_start = mach_absolute_time();
2039 }
2040#endif
6d2010ae 2041 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
3e170ce0 2042 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7
A
2043
2044 priority = self->sched_pri;
2045
3e170ce0
A
2046 if (priority < self->base_pri)
2047 priority = self->base_pri;
b0d623f7
A
2048 if (priority < BASEPRI_DEFAULT)
2049 priority = BASEPRI_DEFAULT;
2050
fe8ab488
A
2051 /* Do not promote past promotion ceiling */
2052 priority = MIN(priority, MAXPRI_PROMOTE);
39236c6e 2053
6d2010ae 2054 if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
b0d623f7 2055 mutex->lck_mtx_pri = priority;
6d2010ae 2056 mutex->lck_mtx_waiters++;
b0d623f7 2057
6d2010ae
A
2058 if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
2059 holder->sched_pri < mutex->lck_mtx_pri ) {
b0d623f7
A
2060 s = splsched();
2061 thread_lock(holder);
2062
fe8ab488
A
2063 /* holder priority may have been bumped by another thread
2064 * before thread_lock was taken
2065 */
6d2010ae 2066 if (holder->sched_pri < mutex->lck_mtx_pri) {
b0d623f7
A
2067 KERNEL_DEBUG_CONSTANT(
2068 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
3e170ce0 2069 holder->sched_pri, priority, thread_tid(holder), trace_lck, 0);
fe8ab488
A
2070 /* Assert that we're not altering the priority of a
2071 * thread above the MAXPRI_PROMOTE band
2072 */
2073 assert(holder->sched_pri < MAXPRI_PROMOTE);
b0d623f7
A
2074 set_sched_pri(holder, priority);
2075
2076 if (mutex->lck_mtx_promoted == 0) {
2077 holder->promotions++;
6d2010ae
A
2078 holder->sched_flags |= TH_SFLAG_PROMOTED;
2079
b0d623f7
A
2080 mutex->lck_mtx_promoted = 1;
2081 }
2082 }
2083 thread_unlock(holder);
2084 splx(s);
2085 }
3e170ce0 2086 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
b0d623f7
A
2087
2088 lck_mtx_ilk_unlock(mutex);
2089
2090 thread_block(THREAD_CONTINUE_NULL);
2091
6d2010ae 2092 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
3e170ce0 2093 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7
A
2094
2095#if CONFIG_DTRACE
2096 /*
2097 * Record the Dtrace lockstat probe for blocking, block time
2098 * measured from when we were entered.
2099 */
2100 if (sleep_start) {
6d2010ae 2101 if (mutex->lck_mtx_is_ext == 0) {
b0d623f7
A
2102 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2103 mach_absolute_time() - sleep_start);
2104 } else {
2105 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2106 mach_absolute_time() - sleep_start);
2107 }
2108 }
2109#endif
0c530ab8 2110}
3e170ce0
A
2111
2112/*
2113 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2114 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2115 * Returns: TRUE if lock is acquired.
2116 */
2117boolean_t
2118kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2119{
2120 if (not_in_kdp) {
2121 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2122 }
2123
2124 if (lck->lck_mtx_sw.lck_mtxd.lck_mtxd_ilocked || lck->lck_mtx_sw.lck_mtxd.lck_mtxd_mlocked) {
2125 return TRUE;
2126 }
2127
2128 return FALSE;
2129}
2130