]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/locks_i386.c
xnu-1699.24.8.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
CommitLineData
91447636 1/*
b0d623f7 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64#include <mach_kdb.h>
65#include <mach_ldebug.h>
66
67#include <kern/lock.h>
68#include <kern/locks.h>
69#include <kern/kalloc.h>
70#include <kern/misc_protos.h>
71#include <kern/thread.h>
72#include <kern/processor.h>
73#include <kern/cpu_data.h>
74#include <kern/cpu_number.h>
75#include <kern/sched_prim.h>
76#include <kern/xpr.h>
77#include <kern/debug.h>
78#include <string.h>
79
80#if MACH_KDB
81#include <ddb/db_command.h>
82#include <ddb/db_output.h>
83#include <ddb/db_sym.h>
84#include <ddb/db_print.h>
85#endif /* MACH_KDB */
060df5ea 86#include <i386/machine_routines.h> /* machine_timeout_suspended() */
b0d623f7 87#include <machine/machine_cpu.h>
060df5ea 88#include <i386/mp.h>
91447636
A
89
90#include <sys/kdebug.h>
6d2010ae 91#include <mach/branch_predicates.h>
91447636 92
2d21ac55
A
93/*
94 * We need only enough declarations from the BSD-side to be able to
95 * test if our probe is active, and to call __dtrace_probe(). Setting
96 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
97 */
98#if CONFIG_DTRACE
99#define NEED_DTRACE_DEFS
100#include <../bsd/sys/lockstat.h>
101#endif
102
91447636
A
103#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
104#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
105#define LCK_RW_LCK_SHARED_CODE 0x102
106#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
107#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
108#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
109
b0d623f7
A
110#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
111#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
112#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
113#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
114#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
115#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
116#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
117#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
118
91447636
A
119
120#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
121
122unsigned int LcksOpts=0;
91447636
A
123
124/* Forwards */
125
126#if MACH_KDB
127void db_print_simple_lock(
128 simple_lock_t addr);
91447636
A
129#endif /* MACH_KDB */
130
131
132#if USLOCK_DEBUG
133/*
134 * Perform simple lock checks.
135 */
136int uslock_check = 1;
137int max_lock_loops = 100000000;
138decl_simple_lock_data(extern , printf_lock)
139decl_simple_lock_data(extern , panic_lock)
91447636
A
140#endif /* USLOCK_DEBUG */
141
142
143/*
144 * We often want to know the addresses of the callers
145 * of the various lock routines. However, this information
146 * is only used for debugging and statistics.
147 */
148typedef void *pc_t;
149#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
150#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
151#if ANY_LOCK_DEBUG
b0d623f7 152#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
91447636
A
153#define DECL_PC(pc) pc_t pc;
154#else /* ANY_LOCK_DEBUG */
155#define DECL_PC(pc)
156#ifdef lint
157/*
158 * Eliminate lint complaints about unused local pc variables.
159 */
b0d623f7 160#define OBTAIN_PC(pc) ++pc
91447636 161#else /* lint */
b0d623f7 162#define OBTAIN_PC(pc)
91447636
A
163#endif /* lint */
164#endif /* USLOCK_DEBUG */
165
166
167/*
168 * Portable lock package implementation of usimple_locks.
169 */
170
171#if USLOCK_DEBUG
172#define USLDBG(stmt) stmt
173void usld_lock_init(usimple_lock_t, unsigned short);
174void usld_lock_pre(usimple_lock_t, pc_t);
175void usld_lock_post(usimple_lock_t, pc_t);
176void usld_unlock(usimple_lock_t, pc_t);
177void usld_lock_try_pre(usimple_lock_t, pc_t);
178void usld_lock_try_post(usimple_lock_t, pc_t);
179int usld_lock_common_checks(usimple_lock_t, char *);
180#else /* USLOCK_DEBUG */
181#define USLDBG(stmt)
182#endif /* USLOCK_DEBUG */
183
b0d623f7
A
184
185extern int lck_rw_grab_want(lck_rw_t *lck);
186extern int lck_rw_grab_shared(lck_rw_t *lck);
187extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
188
189
2d21ac55
A
190/*
191 * Forward definitions
192 */
193
194void lck_rw_lock_shared_gen(
195 lck_rw_t *lck);
196
b0d623f7
A
197void lck_rw_lock_exclusive_gen(
198 lck_rw_t *lck);
199
200boolean_t lck_rw_lock_shared_to_exclusive_success(
2d21ac55
A
201 lck_rw_t *lck);
202
b0d623f7
A
203boolean_t lck_rw_lock_shared_to_exclusive_failure(
204 lck_rw_t *lck,
205 int prior_lock_state);
206
207void lck_rw_lock_exclusive_to_shared_gen(
208 lck_rw_t *lck,
209 int prior_lock_state);
210
211lck_rw_type_t lck_rw_done_gen(
212 lck_rw_t *lck,
213 int prior_lock_state);
214
91447636
A
215/*
216 * Routine: lck_spin_alloc_init
217 */
218lck_spin_t *
219lck_spin_alloc_init(
220 lck_grp_t *grp,
221 lck_attr_t *attr)
222{
223 lck_spin_t *lck;
224
225 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
226 lck_spin_init(lck, grp, attr);
227
228 return(lck);
229}
230
231/*
232 * Routine: lck_spin_free
233 */
234void
235lck_spin_free(
236 lck_spin_t *lck,
237 lck_grp_t *grp)
238{
239 lck_spin_destroy(lck, grp);
240 kfree(lck, sizeof(lck_spin_t));
241}
242
243/*
244 * Routine: lck_spin_init
245 */
246void
247lck_spin_init(
248 lck_spin_t *lck,
249 lck_grp_t *grp,
250 __unused lck_attr_t *attr)
251{
252 usimple_lock_init((usimple_lock_t) lck, 0);
253 lck_grp_reference(grp);
254 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
255}
256
257/*
258 * Routine: lck_spin_destroy
259 */
260void
261lck_spin_destroy(
262 lck_spin_t *lck,
263 lck_grp_t *grp)
264{
b0d623f7 265 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
91447636 266 return;
b0d623f7 267 lck->interlock = LCK_SPIN_TAG_DESTROYED;
91447636
A
268 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
269 lck_grp_deallocate(grp);
270 return;
271}
272
273/*
274 * Routine: lck_spin_lock
275 */
276void
277lck_spin_lock(
278 lck_spin_t *lck)
279{
280 usimple_lock((usimple_lock_t) lck);
281}
282
283/*
284 * Routine: lck_spin_unlock
285 */
286void
287lck_spin_unlock(
288 lck_spin_t *lck)
289{
290 usimple_unlock((usimple_lock_t) lck);
291}
292
293
294/*
295 * Routine: lck_spin_try_lock
296 */
297boolean_t
298lck_spin_try_lock(
299 lck_spin_t *lck)
300{
2d21ac55 301 return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
91447636
A
302}
303
304/*
305 * Initialize a usimple_lock.
306 *
307 * No change in preemption state.
308 */
309void
310usimple_lock_init(
311 usimple_lock_t l,
312 __unused unsigned short tag)
313{
314#ifndef MACHINE_SIMPLE_LOCK
315 USLDBG(usld_lock_init(l, tag));
316 hw_lock_init(&l->interlock);
317#else
318 simple_lock_init((simple_lock_t)l,tag);
319#endif
320}
321
060df5ea
A
322volatile uint32_t spinlock_owner_cpu = ~0;
323volatile usimple_lock_t spinlock_timed_out;
324
325static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
326 uint64_t deadline;
327 uint32_t i;
328
329 for (i = 0; i < real_ncpus; i++) {
330 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
331 spinlock_owner_cpu = i;
6d2010ae 332 if ((uint32_t) cpu_number() == i)
060df5ea
A
333 break;
334 cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
335 cpu_NMI_interrupt(i);
336 deadline = mach_absolute_time() + (LockTimeOut * 2);
337 while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
338 cpu_pause();
339 break;
340 }
341 }
342
343 return spinlock_owner_cpu;
344}
91447636
A
345
346/*
347 * Acquire a usimple_lock.
348 *
349 * Returns with preemption disabled. Note
350 * that the hw_lock routines are responsible for
351 * maintaining preemption state.
352 */
353void
354usimple_lock(
355 usimple_lock_t l)
356{
357#ifndef MACHINE_SIMPLE_LOCK
2d21ac55 358 DECL_PC(pc);
91447636 359
b0d623f7 360 OBTAIN_PC(pc);
91447636 361 USLDBG(usld_lock_pre(l, pc));
6d2010ae
A
362
363 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) {
b0d623f7 364 boolean_t uslock_acquired = FALSE;
060df5ea
A
365 while (machine_timeout_suspended()) {
366 enable_preemption();
367 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
368 break;
6d2010ae
A
369 }
370
060df5ea
A
371 if (uslock_acquired == FALSE) {
372 uint32_t lock_cpu;
373 spinlock_timed_out = l;
374 lock_cpu = spinlock_timeout_NMI((uintptr_t)l->interlock.lock_data);
375 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l, (uintptr_t)l->interlock.lock_data, current_thread(), lock_cpu);
376 }
b0d623f7 377 }
91447636
A
378 USLDBG(usld_lock_post(l, pc));
379#else
380 simple_lock((simple_lock_t)l);
381#endif
382}
383
384
385/*
386 * Release a usimple_lock.
387 *
388 * Returns with preemption enabled. Note
389 * that the hw_lock routines are responsible for
390 * maintaining preemption state.
391 */
392void
393usimple_unlock(
394 usimple_lock_t l)
395{
396#ifndef MACHINE_SIMPLE_LOCK
397 DECL_PC(pc);
398
b0d623f7 399 OBTAIN_PC(pc);
91447636
A
400 USLDBG(usld_unlock(l, pc));
401 hw_lock_unlock(&l->interlock);
402#else
403 simple_unlock_rwmb((simple_lock_t)l);
404#endif
405}
406
407
408/*
409 * Conditionally acquire a usimple_lock.
410 *
411 * On success, returns with preemption disabled.
412 * On failure, returns with preemption in the same state
413 * as when first invoked. Note that the hw_lock routines
414 * are responsible for maintaining preemption state.
415 *
416 * XXX No stats are gathered on a miss; I preserved this
417 * behavior from the original assembly-language code, but
418 * doesn't it make sense to log misses? XXX
419 */
420unsigned int
421usimple_lock_try(
422 usimple_lock_t l)
423{
424#ifndef MACHINE_SIMPLE_LOCK
91447636 425 unsigned int success;
2d21ac55 426 DECL_PC(pc);
91447636 427
b0d623f7 428 OBTAIN_PC(pc);
91447636
A
429 USLDBG(usld_lock_try_pre(l, pc));
430 if ((success = hw_lock_try(&l->interlock))) {
431 USLDBG(usld_lock_try_post(l, pc));
432 }
433 return success;
434#else
435 return(simple_lock_try((simple_lock_t)l));
436#endif
437}
438
439#if USLOCK_DEBUG
440/*
441 * States of a usimple_lock. The default when initializing
442 * a usimple_lock is setting it up for debug checking.
443 */
444#define USLOCK_CHECKED 0x0001 /* lock is being checked */
445#define USLOCK_TAKEN 0x0002 /* lock has been taken */
446#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
447#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
448#define USLOCK_CHECKING(l) (uslock_check && \
449 ((l)->debug.state & USLOCK_CHECKED))
450
451/*
452 * Trace activities of a particularly interesting lock.
453 */
454void usl_trace(usimple_lock_t, int, pc_t, const char *);
455
456
457/*
458 * Initialize the debugging information contained
459 * in a usimple_lock.
460 */
461void
462usld_lock_init(
463 usimple_lock_t l,
464 __unused unsigned short tag)
465{
466 if (l == USIMPLE_LOCK_NULL)
467 panic("lock initialization: null lock pointer");
468 l->lock_type = USLOCK_TAG;
469 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
470 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
471 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
472 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
473 l->debug.duration[0] = l->debug.duration[1] = 0;
474 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
475 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
476 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
477}
478
479
480/*
481 * These checks apply to all usimple_locks, not just
482 * those with USLOCK_CHECKED turned on.
483 */
484int
485usld_lock_common_checks(
486 usimple_lock_t l,
487 char *caller)
488{
489 if (l == USIMPLE_LOCK_NULL)
490 panic("%s: null lock pointer", caller);
491 if (l->lock_type != USLOCK_TAG)
ebb1b9f4 492 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
91447636 493 if (!(l->debug.state & USLOCK_INIT))
ebb1b9f4 494 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
91447636
A
495 return USLOCK_CHECKING(l);
496}
497
498
499/*
500 * Debug checks on a usimple_lock just before attempting
501 * to acquire it.
502 */
503/* ARGSUSED */
504void
505usld_lock_pre(
506 usimple_lock_t l,
507 pc_t pc)
508{
509 char caller[] = "usimple_lock";
510
511
512 if (!usld_lock_common_checks(l, caller))
513 return;
514
515/*
516 * Note that we have a weird case where we are getting a lock when we are]
517 * in the process of putting the system to sleep. We are running with no
518 * current threads, therefore we can't tell if we are trying to retake a lock
519 * we have or someone on the other processor has it. Therefore we just
520 * ignore this test if the locking thread is 0.
521 */
522
523 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
524 l->debug.lock_thread == (void *) current_thread()) {
2d21ac55
A
525 printf("%s: lock %p already locked (at %p) by",
526 caller, l, l->debug.lock_pc);
527 printf(" current thread %p (new attempt at pc %p)\n",
91447636 528 l->debug.lock_thread, pc);
2d21ac55 529 panic("%s", caller);
91447636
A
530 }
531 mp_disable_preemption();
532 usl_trace(l, cpu_number(), pc, caller);
533 mp_enable_preemption();
534}
535
536
537/*
538 * Debug checks on a usimple_lock just after acquiring it.
539 *
540 * Pre-emption has been disabled at this point,
541 * so we are safe in using cpu_number.
542 */
543void
544usld_lock_post(
545 usimple_lock_t l,
546 pc_t pc)
547{
548 register int mycpu;
549 char caller[] = "successful usimple_lock";
550
551
552 if (!usld_lock_common_checks(l, caller))
553 return;
554
555 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
556 panic("%s: lock %p became uninitialized",
557 caller, l);
91447636 558 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
559 panic("%s: lock 0x%p became TAKEN by someone else",
560 caller, l);
91447636
A
561
562 mycpu = cpu_number();
563 l->debug.lock_thread = (void *)current_thread();
564 l->debug.state |= USLOCK_TAKEN;
565 l->debug.lock_pc = pc;
566 l->debug.lock_cpu = mycpu;
567
568 usl_trace(l, mycpu, pc, caller);
569}
570
571
572/*
573 * Debug checks on a usimple_lock just before
574 * releasing it. Note that the caller has not
575 * yet released the hardware lock.
576 *
577 * Preemption is still disabled, so there's
578 * no problem using cpu_number.
579 */
580void
581usld_unlock(
582 usimple_lock_t l,
583 pc_t pc)
584{
585 register int mycpu;
586 char caller[] = "usimple_unlock";
587
588
589 if (!usld_lock_common_checks(l, caller))
590 return;
591
592 mycpu = cpu_number();
593
594 if (!(l->debug.state & USLOCK_TAKEN))
b0d623f7
A
595 panic("%s: lock 0x%p hasn't been taken",
596 caller, l);
91447636 597 if (l->debug.lock_thread != (void *) current_thread())
b0d623f7
A
598 panic("%s: unlocking lock 0x%p, owned by thread %p",
599 caller, l, l->debug.lock_thread);
91447636 600 if (l->debug.lock_cpu != mycpu) {
b0d623f7
A
601 printf("%s: unlocking lock 0x%p on cpu 0x%x",
602 caller, l, mycpu);
91447636 603 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
2d21ac55 604 panic("%s", caller);
91447636
A
605 }
606 usl_trace(l, mycpu, pc, caller);
607
608 l->debug.unlock_thread = l->debug.lock_thread;
609 l->debug.lock_thread = INVALID_PC;
610 l->debug.state &= ~USLOCK_TAKEN;
611 l->debug.unlock_pc = pc;
612 l->debug.unlock_cpu = mycpu;
613}
614
615
616/*
617 * Debug checks on a usimple_lock just before
618 * attempting to acquire it.
619 *
620 * Preemption isn't guaranteed to be disabled.
621 */
622void
623usld_lock_try_pre(
624 usimple_lock_t l,
625 pc_t pc)
626{
627 char caller[] = "usimple_lock_try";
628
629 if (!usld_lock_common_checks(l, caller))
630 return;
631 mp_disable_preemption();
632 usl_trace(l, cpu_number(), pc, caller);
633 mp_enable_preemption();
634}
635
636
637/*
638 * Debug checks on a usimple_lock just after
639 * successfully attempting to acquire it.
640 *
641 * Preemption has been disabled by the
642 * lock acquisition attempt, so it's safe
643 * to use cpu_number.
644 */
645void
646usld_lock_try_post(
647 usimple_lock_t l,
648 pc_t pc)
649{
650 register int mycpu;
651 char caller[] = "successful usimple_lock_try";
652
653 if (!usld_lock_common_checks(l, caller))
654 return;
655
656 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
657 panic("%s: lock 0x%p became uninitialized",
658 caller, l);
91447636 659 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
660 panic("%s: lock 0x%p became TAKEN by someone else",
661 caller, l);
91447636
A
662
663 mycpu = cpu_number();
664 l->debug.lock_thread = (void *) current_thread();
665 l->debug.state |= USLOCK_TAKEN;
666 l->debug.lock_pc = pc;
667 l->debug.lock_cpu = mycpu;
668
669 usl_trace(l, mycpu, pc, caller);
670}
671
672
673/*
674 * For very special cases, set traced_lock to point to a
675 * specific lock of interest. The result is a series of
676 * XPRs showing lock operations on that lock. The lock_seq
677 * value is used to show the order of those operations.
678 */
679usimple_lock_t traced_lock;
680unsigned int lock_seq;
681
682void
683usl_trace(
684 usimple_lock_t l,
685 int mycpu,
686 pc_t pc,
687 const char * op_name)
688{
689 if (traced_lock == l) {
690 XPR(XPR_SLOCK,
691 "seq %d, cpu %d, %s @ %x\n",
b0d623f7
A
692 (uintptr_t) lock_seq, (uintptr_t) mycpu,
693 (uintptr_t) op_name, (uintptr_t) pc, 0);
91447636
A
694 lock_seq++;
695 }
696}
697
698
699#endif /* USLOCK_DEBUG */
700
701/*
702 * Routine: lock_alloc
703 * Function:
704 * Allocate a lock for external users who cannot
705 * hard-code the structure definition into their
706 * objects.
707 * For now just use kalloc, but a zone is probably
708 * warranted.
709 */
710lock_t *
711lock_alloc(
712 boolean_t can_sleep,
713 unsigned short tag,
714 unsigned short tag1)
715{
716 lock_t *l;
717
718 if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
719 lock_init(l, can_sleep, tag, tag1);
720 return(l);
721}
722
723/*
724 * Routine: lock_free
725 * Function:
726 * Free a lock allocated for external users.
727 * For now just use kfree, but a zone is probably
728 * warranted.
729 */
730void
731lock_free(
732 lock_t *l)
733{
734 kfree(l, sizeof(lock_t));
735}
736
737
738/*
739 * Routine: lock_init
740 * Function:
741 * Initialize a lock; required before use.
742 * Note that clients declare the "struct lock"
743 * variables and then initialize them, rather
744 * than getting a new one from this module.
745 */
746void
747lock_init(
748 lock_t *l,
749 boolean_t can_sleep,
750 __unused unsigned short tag,
0c530ab8 751 __unused unsigned short tag1)
91447636 752{
2d21ac55
A
753 hw_lock_byte_init(&l->lck_rw_interlock);
754 l->lck_rw_want_write = FALSE;
755 l->lck_rw_want_upgrade = FALSE;
756 l->lck_rw_shared_count = 0;
757 l->lck_rw_can_sleep = can_sleep;
0c530ab8 758 l->lck_rw_tag = tag;
2d21ac55 759 l->lck_rw_priv_excl = 1;
b0d623f7 760 l->lck_r_waiting = l->lck_w_waiting = 0;
91447636
A
761}
762
763
764/*
765 * Sleep locks. These use the same data structure and algorithm
766 * as the spin locks, but the process sleeps while it is waiting
767 * for the lock. These work on uniprocessor systems.
768 */
769
770#define DECREMENTER_TIMEOUT 1000000
771
772void
773lock_write(
774 register lock_t * l)
775{
0c530ab8 776 lck_rw_lock_exclusive(l);
91447636
A
777}
778
779void
780lock_done(
781 register lock_t * l)
782{
0c530ab8 783 (void) lck_rw_done(l);
91447636
A
784}
785
786void
787lock_read(
788 register lock_t * l)
789{
0c530ab8 790 lck_rw_lock_shared(l);
91447636
A
791}
792
793
794/*
795 * Routine: lock_read_to_write
796 * Function:
797 * Improves a read-only lock to one with
798 * write permission. If another reader has
799 * already requested an upgrade to a write lock,
800 * no lock is held upon return.
801 *
2d21ac55 802 * Returns FALSE if the upgrade *failed*.
91447636
A
803 */
804
805boolean_t
806lock_read_to_write(
807 register lock_t * l)
808{
0c530ab8 809 return lck_rw_lock_shared_to_exclusive(l);
91447636
A
810}
811
812void
813lock_write_to_read(
814 register lock_t * l)
815{
0c530ab8 816 lck_rw_lock_exclusive_to_shared(l);
8f6c56a5
A
817}
818
8f6c56a5 819
91447636
A
820
821/*
822 * Routine: lck_rw_alloc_init
823 */
824lck_rw_t *
825lck_rw_alloc_init(
826 lck_grp_t *grp,
827 lck_attr_t *attr) {
828 lck_rw_t *lck;
829
b0d623f7
A
830 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
831 bzero(lck, sizeof(lck_rw_t));
91447636 832 lck_rw_init(lck, grp, attr);
b0d623f7
A
833 }
834
91447636
A
835 return(lck);
836}
837
838/*
839 * Routine: lck_rw_free
840 */
841void
842lck_rw_free(
843 lck_rw_t *lck,
844 lck_grp_t *grp) {
845 lck_rw_destroy(lck, grp);
846 kfree(lck, sizeof(lck_rw_t));
847}
848
849/*
850 * Routine: lck_rw_init
851 */
852void
853lck_rw_init(
854 lck_rw_t *lck,
855 lck_grp_t *grp,
0c530ab8
A
856 lck_attr_t *attr)
857{
858 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
859 attr : &LockDefaultLckAttr;
91447636 860
2d21ac55
A
861 hw_lock_byte_init(&lck->lck_rw_interlock);
862 lck->lck_rw_want_write = FALSE;
863 lck->lck_rw_want_upgrade = FALSE;
864 lck->lck_rw_shared_count = 0;
865 lck->lck_rw_can_sleep = TRUE;
b0d623f7 866 lck->lck_r_waiting = lck->lck_w_waiting = 0;
91447636 867 lck->lck_rw_tag = 0;
2d21ac55
A
868 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
869 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
91447636
A
870
871 lck_grp_reference(grp);
872 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
873}
874
875/*
876 * Routine: lck_rw_destroy
877 */
878void
879lck_rw_destroy(
880 lck_rw_t *lck,
b0d623f7
A
881 lck_grp_t *grp)
882{
91447636
A
883 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
884 return;
885 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
886 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
887 lck_grp_deallocate(grp);
888 return;
889}
890
891/*
892 * Sleep locks. These use the same data structure and algorithm
893 * as the spin locks, but the process sleeps while it is waiting
894 * for the lock. These work on uniprocessor systems.
895 */
896
897#define DECREMENTER_TIMEOUT 1000000
898
2d21ac55
A
899#define RW_LOCK_READER_EVENT(x) \
900 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
901
902#define RW_LOCK_WRITER_EVENT(x) \
903 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
91447636
A
904
905/*
6d2010ae
A
906 * We disable interrupts while holding the RW interlock to prevent an
907 * interrupt from exacerbating hold time.
91447636
A
908 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
909 */
910static boolean_t
911lck_interlock_lock(lck_rw_t *lck)
912{
913 boolean_t istate;
914
915 istate = ml_set_interrupts_enabled(FALSE);
2d21ac55 916 hw_lock_byte_lock(&lck->lck_rw_interlock);
91447636
A
917
918 return istate;
919}
920
921static void
922lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
923{
2d21ac55 924 hw_lock_byte_unlock(&lck->lck_rw_interlock);
91447636
A
925 ml_set_interrupts_enabled(istate);
926}
927
0c530ab8
A
928/*
929 * This inline is used when busy-waiting for an rw lock.
930 * If interrupts were disabled when the lock primitive was called,
931 * we poll the IPI handler for pending tlb flushes.
932 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
933 */
934static inline void
935lck_rw_lock_pause(boolean_t interrupts_enabled)
936{
937 if (!interrupts_enabled)
938 handle_pending_TLB_flushes();
939 cpu_pause();
940}
941
b0d623f7
A
942
943/*
944 * compute the deadline to spin against when
945 * waiting for a change of state on a lck_rw_t
946 */
947static inline uint64_t
948lck_rw_deadline_for_spin(lck_rw_t *lck)
949{
950 if (lck->lck_rw_can_sleep) {
951 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
952 /*
953 * there are already threads waiting on this lock... this
954 * implies that they have spun beyond their deadlines waiting for
955 * the desired state to show up so we will not bother spinning at this time...
956 * or
957 * the current number of threads sharing this lock exceeds our capacity to run them
958 * concurrently and since all states we're going to spin for require the rw_shared_count
959 * to be at 0, we'll not bother spinning since the latency for this to happen is
960 * unpredictable...
961 */
962 return (mach_absolute_time());
963 }
964 return (mach_absolute_time() + MutexSpin);
965 } else
966 return (mach_absolute_time() + (100000LL * 1000000000LL));
967}
968
969
91447636
A
970/*
971 * Routine: lck_rw_lock_exclusive
972 */
973void
b0d623f7 974lck_rw_lock_exclusive_gen(
91447636
A
975 lck_rw_t *lck)
976{
b0d623f7
A
977 uint64_t deadline = 0;
978 int slept = 0;
979 int gotlock = 0;
980 int lockheld = 0;
981 wait_result_t res = 0;
982 boolean_t istate = -1;
91447636 983
2d21ac55 984#if CONFIG_DTRACE
b0d623f7
A
985 boolean_t dtrace_ls_initialized = FALSE;
986 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
987 uint64_t wait_interval = 0;
988 int readers_at_sleep = 0;
2d21ac55 989#endif
91447636 990
91447636 991 /*
2d21ac55 992 * Try to acquire the lck_rw_want_write bit.
91447636 993 */
b0d623f7 994 while ( !lck_rw_grab_want(lck)) {
91447636 995
2d21ac55 996#if CONFIG_DTRACE
b0d623f7
A
997 if (dtrace_ls_initialized == FALSE) {
998 dtrace_ls_initialized = TRUE;
999 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1000 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1001 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1002 if (dtrace_ls_enabled) {
1003 /*
1004 * Either sleeping or spinning is happening,
1005 * start a timing of our delay interval now.
1006 */
1007 readers_at_sleep = lck->lck_rw_shared_count;
1008 wait_interval = mach_absolute_time();
1009 }
91447636 1010 }
2d21ac55 1011#endif
b0d623f7
A
1012 if (istate == -1)
1013 istate = ml_get_interrupts_enabled();
91447636 1014
b0d623f7
A
1015 deadline = lck_rw_deadline_for_spin(lck);
1016
1017 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1018
1019 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
1020 lck_rw_lock_pause(istate);
1021
1022 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
1023
1024 if (gotlock)
1025 break;
1026 /*
1027 * if we get here, the deadline has expired w/o us
1028 * being able to grab the lock exclusively
1029 * check to see if we're allowed to do a thread_block
1030 */
1031 if (lck->lck_rw_can_sleep) {
2d21ac55 1032
91447636 1033 istate = lck_interlock_lock(lck);
91447636 1034
b0d623f7 1035 if (lck->lck_rw_want_write) {
91447636 1036
b0d623f7 1037 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
91447636 1038
b0d623f7 1039 lck->lck_w_waiting = TRUE;
91447636 1040
b0d623f7
A
1041 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1042 lck_interlock_unlock(lck, istate);
91447636 1043
b0d623f7
A
1044 if (res == THREAD_WAITING) {
1045 res = thread_block(THREAD_CONTINUE_NULL);
1046 slept++;
1047 }
1048 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1049 } else {
1050 lck->lck_rw_want_write = TRUE;
1051 lck_interlock_unlock(lck, istate);
1052 break;
1053 }
1054 }
1055 }
1056 /*
1057 * Wait for readers (and upgrades) to finish...
1058 * the test for these conditions must be done simultaneously with
1059 * a check of the interlock not being held since
1060 * the rw_shared_count will drop to 0 first and then want_upgrade
1061 * will be set to 1 in the shared_to_exclusive scenario... those
1062 * adjustments are done behind the interlock and represent an
1063 * atomic change in state and must be considered as such
1064 * however, once we see the read count at 0, the want_upgrade not set
1065 * and the interlock not held, we are safe to proceed
1066 */
1067 while (lck_rw_held_read_or_upgrade(lck)) {
2d21ac55
A
1068
1069#if CONFIG_DTRACE
1070 /*
1071 * Either sleeping or spinning is happening, start
1072 * a timing of our delay interval now. If we set it
1073 * to -1 we don't have accurate data so we cannot later
1074 * decide to record a dtrace spin or sleep event.
1075 */
b0d623f7
A
1076 if (dtrace_ls_initialized == FALSE) {
1077 dtrace_ls_initialized = TRUE;
1078 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1079 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1080 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1081 if (dtrace_ls_enabled) {
1082 /*
1083 * Either sleeping or spinning is happening,
1084 * start a timing of our delay interval now.
1085 */
1086 readers_at_sleep = lck->lck_rw_shared_count;
1087 wait_interval = mach_absolute_time();
1088 }
2d21ac55
A
1089 }
1090#endif
b0d623f7
A
1091 if (istate == -1)
1092 istate = ml_get_interrupts_enabled();
1093
1094 deadline = lck_rw_deadline_for_spin(lck);
1095
1096 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1097
1098 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
1099 lck_rw_lock_pause(istate);
1100
1101 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
1102
1103 if ( !lockheld)
1104 break;
1105 /*
1106 * if we get here, the deadline has expired w/o us
1107 * being able to grab the lock exclusively
1108 * check to see if we're allowed to do a thread_block
1109 */
1110 if (lck->lck_rw_can_sleep) {
91447636 1111
91447636 1112 istate = lck_interlock_lock(lck);
91447636 1113
b0d623f7
A
1114 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
1115 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1116
1117 lck->lck_w_waiting = TRUE;
1118
1119 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1120 lck_interlock_unlock(lck, istate);
b0d623f7
A
1121
1122 if (res == THREAD_WAITING) {
1123 res = thread_block(THREAD_CONTINUE_NULL);
1124 slept++;
1125 }
1126 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1127 } else {
1128 lck_interlock_unlock(lck, istate);
1129 /*
1130 * must own the lock now, since we checked for
1131 * readers or upgrade owner behind the interlock
1132 * no need for a call to 'lck_rw_held_read_or_upgrade'
1133 */
1134 break;
91447636
A
1135 }
1136 }
91447636
A
1137 }
1138
2d21ac55
A
1139#if CONFIG_DTRACE
1140 /*
1141 * Decide what latencies we suffered that are Dtrace events.
1142 * If we have set wait_interval, then we either spun or slept.
1143 * At least we get out from under the interlock before we record
1144 * which is the best we can do here to minimize the impact
1145 * of the tracing.
1146 * If we have set wait_interval to -1, then dtrace was not enabled when we
1147 * started sleeping/spinning so we don't record this event.
1148 */
b0d623f7 1149 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1150 if (slept == 0) {
1151 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1152 mach_absolute_time() - wait_interval, 1);
1153 } else {
1154 /*
1155 * For the blocking case, we also record if when we blocked
1156 * it was held for read or write, and how many readers.
1157 * Notice that above we recorded this before we dropped
1158 * the interlock so the count is accurate.
1159 */
1160 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1161 mach_absolute_time() - wait_interval, 1,
1162 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1163 }
1164 }
1165 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1166#endif
91447636
A
1167}
1168
1169
1170/*
2d21ac55 1171 * Routine: lck_rw_done_gen
b0d623f7
A
1172 *
1173 * called from the assembly language wrapper...
1174 * prior_lock_state is the value in the 1st
1175 * word of the lock at the time of a successful
1176 * atomic compare and exchange with the new value...
1177 * it represents the state of the lock before we
1178 * decremented the rw_shared_count or cleared either
1179 * rw_want_upgrade or rw_want_write and
1180 * the lck_x_waiting bits... since the wrapper
1181 * routine has already changed the state atomically,
1182 * we just need to decide if we should
1183 * wake up anyone and what value to return... we do
1184 * this by examining the state of the lock before
1185 * we changed it
91447636
A
1186 */
1187lck_rw_type_t
2d21ac55 1188lck_rw_done_gen(
b0d623f7
A
1189 lck_rw_t *lck,
1190 int prior_lock_state)
91447636 1191{
b0d623f7
A
1192 lck_rw_t *fake_lck;
1193 lck_rw_type_t lock_type;
91447636
A
1194
1195 /*
b0d623f7
A
1196 * prior_lock state is a snapshot of the 1st word of the
1197 * lock in question... we'll fake up a pointer to it
1198 * and carefully not access anything beyond whats defined
1199 * in the first word of a lck_rw_t
91447636 1200 */
b0d623f7 1201 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1202
b0d623f7
A
1203 if (fake_lck->lck_rw_shared_count <= 1) {
1204 if (fake_lck->lck_w_waiting)
1205 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
91447636 1206
b0d623f7
A
1207 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1208 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1209 }
1210 if (fake_lck->lck_rw_shared_count)
1211 lock_type = LCK_RW_TYPE_SHARED;
1212 else
1213 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2d21ac55
A
1214
1215#if CONFIG_DTRACE
b0d623f7 1216 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2d21ac55
A
1217#endif
1218
b0d623f7 1219 return(lock_type);
91447636
A
1220}
1221
1222
91447636
A
1223/*
1224 * Routine: lck_rw_unlock
1225 */
1226void
1227lck_rw_unlock(
1228 lck_rw_t *lck,
1229 lck_rw_type_t lck_rw_type)
1230{
1231 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1232 lck_rw_unlock_shared(lck);
1233 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1234 lck_rw_unlock_exclusive(lck);
1235 else
1236 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1237}
1238
1239
1240/*
1241 * Routine: lck_rw_unlock_shared
1242 */
1243void
1244lck_rw_unlock_shared(
1245 lck_rw_t *lck)
1246{
1247 lck_rw_type_t ret;
1248
1249 ret = lck_rw_done(lck);
1250
1251 if (ret != LCK_RW_TYPE_SHARED)
1252 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
1253}
1254
1255
1256/*
1257 * Routine: lck_rw_unlock_exclusive
1258 */
1259void
1260lck_rw_unlock_exclusive(
1261 lck_rw_t *lck)
1262{
1263 lck_rw_type_t ret;
1264
1265 ret = lck_rw_done(lck);
1266
1267 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1268 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1269}
1270
1271
1272/*
1273 * Routine: lck_rw_lock
1274 */
1275void
1276lck_rw_lock(
1277 lck_rw_t *lck,
1278 lck_rw_type_t lck_rw_type)
1279{
1280 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1281 lck_rw_lock_shared(lck);
1282 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1283 lck_rw_lock_exclusive(lck);
1284 else
1285 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1286}
1287
1288
1289/*
2d21ac55 1290 * Routine: lck_rw_lock_shared_gen
b0d623f7
A
1291 * Function:
1292 * assembly fast path code has determined that this lock
1293 * is held exclusively... this is where we spin/block
1294 * until we can acquire the lock in the shared mode
91447636
A
1295 */
1296void
2d21ac55 1297lck_rw_lock_shared_gen(
91447636
A
1298 lck_rw_t *lck)
1299{
b0d623f7
A
1300 uint64_t deadline = 0;
1301 int gotlock = 0;
1302 int slept = 0;
1303 wait_result_t res = 0;
1304 boolean_t istate = -1;
1305
2d21ac55
A
1306#if CONFIG_DTRACE
1307 uint64_t wait_interval = 0;
b0d623f7
A
1308 int readers_at_sleep = 0;
1309 boolean_t dtrace_ls_initialized = FALSE;
1310 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2d21ac55 1311#endif
91447636 1312
b0d623f7
A
1313 while ( !lck_rw_grab_shared(lck)) {
1314
2d21ac55 1315#if CONFIG_DTRACE
b0d623f7
A
1316 if (dtrace_ls_initialized == FALSE) {
1317 dtrace_ls_initialized = TRUE;
1318 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1319 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1320 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1321 if (dtrace_ls_enabled) {
1322 /*
1323 * Either sleeping or spinning is happening,
1324 * start a timing of our delay interval now.
1325 */
1326 readers_at_sleep = lck->lck_rw_shared_count;
1327 wait_interval = mach_absolute_time();
1328 }
1329 }
2d21ac55 1330#endif
b0d623f7
A
1331 if (istate == -1)
1332 istate = ml_get_interrupts_enabled();
91447636 1333
b0d623f7 1334 deadline = lck_rw_deadline_for_spin(lck);
0c530ab8 1335
b0d623f7
A
1336 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1337 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
91447636 1338
b0d623f7
A
1339 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
1340 lck_rw_lock_pause(istate);
1341
1342 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1343 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
1344
1345 if (gotlock)
1346 break;
1347 /*
1348 * if we get here, the deadline has expired w/o us
1349 * being able to grab the lock for read
1350 * check to see if we're allowed to do a thread_block
1351 */
1352 if (lck->lck_rw_can_sleep) {
91447636 1353
91447636 1354 istate = lck_interlock_lock(lck);
91447636 1355
b0d623f7
A
1356 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1357 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1358
1359 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1360 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1361
1362 lck->lck_r_waiting = TRUE;
1363
1364 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
91447636 1365 lck_interlock_unlock(lck, istate);
b0d623f7
A
1366
1367 if (res == THREAD_WAITING) {
1368 res = thread_block(THREAD_CONTINUE_NULL);
1369 slept++;
1370 }
1371 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1372 (int)lck, res, slept, 0, 0);
1373 } else {
1374 lck->lck_rw_shared_count++;
1375 lck_interlock_unlock(lck, istate);
1376 break;
91447636
A
1377 }
1378 }
91447636
A
1379 }
1380
2d21ac55 1381#if CONFIG_DTRACE
b0d623f7 1382 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1383 if (slept == 0) {
1384 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1385 } else {
1386 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1387 mach_absolute_time() - wait_interval, 0,
1388 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1389 }
1390 }
1391 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1392#endif
91447636
A
1393}
1394
1395
1396/*
b0d623f7 1397 * Routine: lck_rw_lock_shared_to_exclusive_failure
91447636 1398 * Function:
b0d623f7
A
1399 * assembly fast path code has already dropped our read
1400 * count and determined that someone else owns 'lck_rw_want_upgrade'
1401 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1402 * all we need to do here is determine if a wakeup is needed
91447636 1403 */
91447636 1404boolean_t
b0d623f7
A
1405lck_rw_lock_shared_to_exclusive_failure(
1406 lck_rw_t *lck,
1407 int prior_lock_state)
91447636 1408{
b0d623f7 1409 lck_rw_t *fake_lck;
91447636 1410
b0d623f7
A
1411 /*
1412 * prior_lock state is a snapshot of the 1st word of the
1413 * lock in question... we'll fake up a pointer to it
1414 * and carefully not access anything beyond whats defined
1415 * in the first word of a lck_rw_t
1416 */
1417 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1418
b0d623f7 1419 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
91447636
A
1420 /*
1421 * Someone else has requested upgrade.
b0d623f7
A
1422 * Since we've released the read lock, wake
1423 * him up if he's blocked waiting
91447636 1424 */
b0d623f7
A
1425 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1426 }
1427 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1428 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
91447636 1429
b0d623f7
A
1430 return (FALSE);
1431}
91447636 1432
91447636 1433
b0d623f7
A
1434/*
1435 * Routine: lck_rw_lock_shared_to_exclusive_failure
1436 * Function:
1437 * assembly fast path code has already dropped our read
1438 * count and successfully acquired 'lck_rw_want_upgrade'
1439 * we just need to wait for the rest of the readers to drain
1440 * and then we can return as the exclusive holder of this lock
1441 */
1442boolean_t
1443lck_rw_lock_shared_to_exclusive_success(
1444 lck_rw_t *lck)
1445{
1446 uint64_t deadline = 0;
1447 int slept = 0;
1448 int still_shared = 0;
1449 wait_result_t res;
1450 boolean_t istate = -1;
91447636 1451
b0d623f7
A
1452#if CONFIG_DTRACE
1453 uint64_t wait_interval = 0;
1454 int readers_at_sleep = 0;
1455 boolean_t dtrace_ls_initialized = FALSE;
1456 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1457#endif
91447636 1458
2d21ac55 1459 while (lck->lck_rw_shared_count != 0) {
b0d623f7 1460
2d21ac55 1461#if CONFIG_DTRACE
b0d623f7
A
1462 if (dtrace_ls_initialized == FALSE) {
1463 dtrace_ls_initialized = TRUE;
1464 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1465 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1466 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1467 if (dtrace_ls_enabled) {
1468 /*
1469 * Either sleeping or spinning is happening,
1470 * start a timing of our delay interval now.
1471 */
1472 readers_at_sleep = lck->lck_rw_shared_count;
1473 wait_interval = mach_absolute_time();
1474 }
2d21ac55
A
1475 }
1476#endif
b0d623f7
A
1477 if (istate == -1)
1478 istate = ml_get_interrupts_enabled();
1479
1480 deadline = lck_rw_deadline_for_spin(lck);
1481
1482 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1483 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1484
1485 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
1486 lck_rw_lock_pause(istate);
1487
1488 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1489 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1490
1491 if ( !still_shared)
1492 break;
1493 /*
1494 * if we get here, the deadline has expired w/o
1495 * the rw_shared_count having drained to 0
1496 * check to see if we're allowed to do a thread_block
1497 */
1498 if (lck->lck_rw_can_sleep) {
1499
91447636 1500 istate = lck_interlock_lock(lck);
b0d623f7
A
1501
1502 if (lck->lck_rw_shared_count != 0) {
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1504 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1505
1506 lck->lck_w_waiting = TRUE;
91447636 1507
b0d623f7 1508 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1509 lck_interlock_unlock(lck, istate);
b0d623f7
A
1510
1511 if (res == THREAD_WAITING) {
1512 res = thread_block(THREAD_CONTINUE_NULL);
1513 slept++;
1514 }
1515 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1516 (int)lck, res, slept, 0, 0);
1517 } else {
1518 lck_interlock_unlock(lck, istate);
1519 break;
91447636
A
1520 }
1521 }
91447636 1522 }
2d21ac55
A
1523#if CONFIG_DTRACE
1524 /*
1525 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1526 */
b0d623f7 1527 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1528 if (slept == 0) {
1529 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1530 } else {
1531 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1532 mach_absolute_time() - wait_interval, 1,
1533 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1534 }
1535 }
2d21ac55
A
1536 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1537#endif
1538 return (TRUE);
91447636
A
1539}
1540
b0d623f7 1541
91447636
A
1542/*
1543 * Routine: lck_rw_lock_exclusive_to_shared
b0d623f7
A
1544 * Function:
1545 * assembly fast path has already dropped
1546 * our exclusive state and bumped lck_rw_shared_count
1547 * all we need to do here is determine if anyone
1548 * needs to be awakened.
91447636
A
1549 */
1550void
b0d623f7
A
1551lck_rw_lock_exclusive_to_shared_gen(
1552 lck_rw_t *lck,
1553 int prior_lock_state)
91447636 1554{
b0d623f7 1555 lck_rw_t *fake_lck;
91447636 1556
b0d623f7
A
1557 /*
1558 * prior_lock state is a snapshot of the 1st word of the
1559 * lock in question... we'll fake up a pointer to it
1560 * and carefully not access anything beyond whats defined
1561 * in the first word of a lck_rw_t
1562 */
1563 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1564
b0d623f7
A
1565 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1566 (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
91447636 1567
b0d623f7
A
1568 /*
1569 * don't wake up anyone waiting to take the lock exclusively
1570 * since we hold a read count... when the read count drops to 0,
1571 * the writers will be woken.
1572 *
1573 * wake up any waiting readers if we don't have any writers waiting,
1574 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1575 */
1576 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
2d21ac55 1577 thread_wakeup(RW_LOCK_READER_EVENT(lck));
91447636
A
1578
1579 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
2d21ac55 1580 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
91447636 1581
2d21ac55
A
1582#if CONFIG_DTRACE
1583 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1584#endif
91447636
A
1585}
1586
1587
1588/*
1589 * Routine: lck_rw_try_lock
1590 */
1591boolean_t
1592lck_rw_try_lock(
1593 lck_rw_t *lck,
1594 lck_rw_type_t lck_rw_type)
1595{
1596 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1597 return(lck_rw_try_lock_shared(lck));
1598 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1599 return(lck_rw_try_lock_exclusive(lck));
1600 else
1601 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1602 return(FALSE);
1603}
1604
91447636 1605
2d21ac55
A
1606void
1607lck_rw_assert(
1608 lck_rw_t *lck,
1609 unsigned int type)
1610{
1611 switch (type) {
1612 case LCK_RW_ASSERT_SHARED:
1613 if (lck->lck_rw_shared_count != 0) {
1614 return;
1615 }
1616 break;
1617 case LCK_RW_ASSERT_EXCLUSIVE:
1618 if ((lck->lck_rw_want_write ||
1619 lck->lck_rw_want_upgrade) &&
1620 lck->lck_rw_shared_count == 0) {
1621 return;
1622 }
1623 break;
1624 case LCK_RW_ASSERT_HELD:
1625 if (lck->lck_rw_want_write ||
1626 lck->lck_rw_want_upgrade ||
1627 lck->lck_rw_shared_count != 0) {
1628 return;
1629 }
1630 break;
1631 default:
1632 break;
1633 }
1634
b0d623f7 1635 panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
2d21ac55
A
1636}
1637
6d2010ae
A
1638#ifdef MUTEX_ZONE
1639extern zone_t lck_mtx_zone;
1640#endif
91447636
A
1641/*
1642 * Routine: lck_mtx_alloc_init
1643 */
1644lck_mtx_t *
1645lck_mtx_alloc_init(
1646 lck_grp_t *grp,
1647 lck_attr_t *attr)
1648{
1649 lck_mtx_t *lck;
6d2010ae
A
1650#ifdef MUTEX_ZONE
1651 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
1652 lck_mtx_init(lck, grp, attr);
1653#else
91447636
A
1654 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
1655 lck_mtx_init(lck, grp, attr);
6d2010ae 1656#endif
91447636
A
1657 return(lck);
1658}
1659
1660/*
1661 * Routine: lck_mtx_free
1662 */
1663void
1664lck_mtx_free(
1665 lck_mtx_t *lck,
1666 lck_grp_t *grp)
1667{
1668 lck_mtx_destroy(lck, grp);
6d2010ae
A
1669#ifdef MUTEX_ZONE
1670 zfree(lck_mtx_zone, lck);
1671#else
91447636 1672 kfree(lck, sizeof(lck_mtx_t));
6d2010ae 1673#endif
91447636
A
1674}
1675
1676/*
1677 * Routine: lck_mtx_ext_init
1678 */
1679static void
1680lck_mtx_ext_init(
1681 lck_mtx_ext_t *lck,
1682 lck_grp_t *grp,
1683 lck_attr_t *attr)
1684{
2d21ac55 1685 bzero((void *)lck, sizeof(lck_mtx_ext_t));
91447636
A
1686
1687 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636
A
1688 lck->lck_mtx_deb.type = MUTEX_TAG;
1689 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1690 }
1691
1692 lck->lck_mtx_grp = grp;
2d21ac55
A
1693
1694 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
6d2010ae 1695 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
b0d623f7 1696
6d2010ae
A
1697 lck->lck_mtx.lck_mtx_is_ext = 1;
1698#if defined(__x86_64__)
1699 lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1700#endif
91447636
A
1701}
1702
1703/*
1704 * Routine: lck_mtx_init
1705 */
1706void
1707lck_mtx_init(
1708 lck_mtx_t *lck,
1709 lck_grp_t *grp,
1710 lck_attr_t *attr)
1711{
1712 lck_mtx_ext_t *lck_ext;
2d21ac55
A
1713 lck_attr_t *lck_attr;
1714
1715 if (attr != LCK_ATTR_NULL)
1716 lck_attr = attr;
1717 else
1718 lck_attr = &LockDefaultLckAttr;
91447636 1719
2d21ac55 1720 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636 1721 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2d21ac55 1722 lck_mtx_ext_init(lck_ext, grp, lck_attr);
91447636
A
1723 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1724 lck->lck_mtx_ptr = lck_ext;
1725 }
1726 } else {
b0d623f7 1727 lck->lck_mtx_owner = 0;
6d2010ae 1728 lck->lck_mtx_state = 0;
91447636 1729 }
6d2010ae
A
1730#if defined(__x86_64__)
1731 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1732#endif
91447636
A
1733 lck_grp_reference(grp);
1734 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1735}
1736
2d21ac55
A
1737/*
1738 * Routine: lck_mtx_init_ext
1739 */
1740void
1741lck_mtx_init_ext(
1742 lck_mtx_t *lck,
1743 lck_mtx_ext_t *lck_ext,
1744 lck_grp_t *grp,
1745 lck_attr_t *attr)
1746{
1747 lck_attr_t *lck_attr;
1748
1749 if (attr != LCK_ATTR_NULL)
1750 lck_attr = attr;
1751 else
1752 lck_attr = &LockDefaultLckAttr;
1753
1754 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1755 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1756 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1757 lck->lck_mtx_ptr = lck_ext;
1758 } else {
b0d623f7 1759 lck->lck_mtx_owner = 0;
6d2010ae 1760 lck->lck_mtx_state = 0;
2d21ac55 1761 }
6d2010ae
A
1762#if defined(__x86_64__)
1763 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1764#endif
1765
2d21ac55
A
1766 lck_grp_reference(grp);
1767 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1768}
1769
91447636
A
1770/*
1771 * Routine: lck_mtx_destroy
1772 */
1773void
1774lck_mtx_destroy(
1775 lck_mtx_t *lck,
1776 lck_grp_t *grp)
1777{
1778 boolean_t lck_is_indirect;
1779
1780 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
1781 return;
1782 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
b0d623f7
A
1783
1784 lck_mtx_lock_mark_destroyed(lck);
1785
91447636
A
1786 if (lck_is_indirect)
1787 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
1788 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1789 lck_grp_deallocate(grp);
1790 return;
1791}
1792
b0d623f7
A
1793
1794#define LCK_MTX_LCK_WAIT_CODE 0x20
1795#define LCK_MTX_LCK_WAKEUP_CODE 0x21
1796#define LCK_MTX_LCK_SPIN_CODE 0x22
1797#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1798#define LCK_MTX_LCK_DEMOTE_CODE 0x24
1799
1800
1801/*
1802 * Routine: lck_mtx_unlock_wakeup_x86
1803 *
6d2010ae
A
1804 * Invoked on unlock when there is
1805 * contention (i.e. the assembly routine sees that
1806 * that mutex->lck_mtx_waiters != 0 or
1807 * that mutex->lck_mtx_promoted != 0...
b0d623f7 1808 *
6d2010ae 1809 * neither the mutex or interlock is held
b0d623f7
A
1810 */
1811void
1812lck_mtx_unlock_wakeup_x86 (
1813 lck_mtx_t *mutex,
6d2010ae 1814 int prior_lock_state)
b0d623f7 1815{
6d2010ae
A
1816 lck_mtx_t fake_lck;
1817
1818 /*
1819 * prior_lock state is a snapshot of the 2nd word of the
1820 * lock in question... we'll fake up a lock with the bits
1821 * copied into place and carefully not access anything
1822 * beyond whats defined in the second word of a lck_mtx_t
1823 */
1824 fake_lck.lck_mtx_state = prior_lock_state;
1825
1826 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1827 mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
b0d623f7 1828
6d2010ae 1829 if (__probable(fake_lck.lck_mtx_waiters)) {
b0d623f7 1830
6d2010ae
A
1831 if (fake_lck.lck_mtx_waiters > 1)
1832 thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
1833 else
1834 thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
1835 }
b0d623f7 1836
6d2010ae 1837 if (__improbable(fake_lck.lck_mtx_promoted)) {
b0d623f7
A
1838 thread_t thread = current_thread();
1839
1840
6d2010ae
A
1841 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
1842 thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
b0d623f7
A
1843
1844 if (thread->promotions > 0) {
1845 spl_t s = splsched();
1846
1847 thread_lock(thread);
1848
6d2010ae 1849 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
b0d623f7 1850
6d2010ae 1851 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
b0d623f7 1852
6d2010ae
A
1853 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1854 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
1855 thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
b0d623f7
A
1856
1857 set_sched_pri(thread, DEPRESSPRI);
1858 }
1859 else {
1860 if (thread->priority < thread->sched_pri) {
6d2010ae
A
1861 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
1862 thread->sched_pri, thread->priority, 0, mutex, 0);
b0d623f7 1863
6d2010ae 1864 SCHED(compute_priority)(thread, FALSE);
b0d623f7
A
1865 }
1866 }
1867 }
1868 thread_unlock(thread);
1869 splx(s);
1870 }
1871 }
6d2010ae
A
1872 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1873 mutex, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7
A
1874}
1875
1876
1877/*
1878 * Routine: lck_mtx_lock_acquire_x86
1879 *
1880 * Invoked on acquiring the mutex when there is
6d2010ae
A
1881 * contention (i.e. the assembly routine sees that
1882 * that mutex->lck_mtx_waiters != 0 or
1883 * thread->was_promoted_on_wakeup != 0)...
1884 *
1885 * mutex is owned... interlock is held... preemption is disabled
b0d623f7
A
1886 */
1887void
1888lck_mtx_lock_acquire_x86(
1889 lck_mtx_t *mutex)
1890{
6d2010ae 1891 thread_t thread;
b0d623f7 1892 integer_t priority;
6d2010ae 1893 spl_t s;
b0d623f7 1894
6d2010ae
A
1895 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1896 mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7 1897
6d2010ae
A
1898 if (mutex->lck_mtx_waiters)
1899 priority = mutex->lck_mtx_pri;
1900 else
1901 priority = 0;
b0d623f7 1902
6d2010ae 1903 thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */
b0d623f7 1904
6d2010ae 1905 if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
b0d623f7 1906
6d2010ae
A
1907 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
1908 thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0);
b0d623f7 1909
6d2010ae
A
1910 s = splsched();
1911 thread_lock(thread);
b0d623f7 1912
6d2010ae
A
1913 if (thread->sched_pri < priority)
1914 set_sched_pri(thread, priority);
b0d623f7 1915
6d2010ae
A
1916 if (mutex->lck_mtx_promoted == 0) {
1917 mutex->lck_mtx_promoted = 1;
1918
b0d623f7 1919 thread->promotions++;
6d2010ae 1920 thread->sched_flags |= TH_SFLAG_PROMOTED;
b0d623f7 1921 }
6d2010ae
A
1922 thread->was_promoted_on_wakeup = 0;
1923
1924 thread_unlock(thread);
1925 splx(s);
b0d623f7 1926 }
6d2010ae
A
1927 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1928 mutex, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7
A
1929}
1930
1931
1932
91447636 1933/*
b0d623f7 1934 * Routine: lck_mtx_lock_spinwait_x86
0c530ab8
A
1935 *
1936 * Invoked trying to acquire a mutex when there is contention but
1937 * the holder is running on another processor. We spin for up to a maximum
1938 * time waiting for the lock to be released.
1939 *
1940 * Called with the interlock unlocked.
6d2010ae
A
1941 * returns 0 if mutex acquired
1942 * returns 1 if we spun
1943 * returns 2 if we didn't spin due to the holder not running
0c530ab8 1944 */
b0d623f7
A
1945int
1946lck_mtx_lock_spinwait_x86(
1947 lck_mtx_t *mutex)
0c530ab8 1948{
b0d623f7
A
1949 thread_t holder;
1950 uint64_t deadline;
1951 int retval = 1;
1952 int loopcount = 0;
0c530ab8 1953
6d2010ae
A
1954
1955 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1956 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
0c530ab8
A
1957
1958 deadline = mach_absolute_time() + MutexSpin;
b0d623f7 1959
0c530ab8
A
1960 /*
1961 * Spin while:
1962 * - mutex is locked, and
b0d623f7 1963 * - its locked as a spin lock, and
0c530ab8 1964 * - owner is running on another processor, and
2d21ac55 1965 * - owner (processor) is not idling, and
0c530ab8
A
1966 * - we haven't spun for long enough.
1967 */
b0d623f7 1968 do {
6d2010ae 1969 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
b0d623f7
A
1970 retval = 0;
1971 break;
2d21ac55 1972 }
b0d623f7
A
1973 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
1974
1975 if ( !(holder->machine.specFlags & OnProc) ||
1976 (holder->state & TH_IDLE)) {
1977 if (loopcount == 0)
1978 retval = 2;
1979 break;
1980 }
1981 }
1982 cpu_pause();
1983
1984 loopcount++;
1985
1986 } while (mach_absolute_time() < deadline);
1987
1988
2d21ac55
A
1989#if CONFIG_DTRACE
1990 /*
1991 * We've already kept a count via deadline of how long we spun.
1992 * If dtrace is active, then we compute backwards to decide how
1993 * long we spun.
1994 *
1995 * Note that we record a different probe id depending on whether
1996 * this is a direct or indirect mutex. This allows us to
1997 * penalize only lock groups that have debug/stats enabled
1998 * with dtrace processing if desired.
1999 */
6d2010ae 2000 if (__probable(mutex->lck_mtx_is_ext == 0)) {
b0d623f7 2001 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
2d21ac55
A
2002 mach_absolute_time() - (deadline - MutexSpin));
2003 } else {
b0d623f7 2004 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
2d21ac55
A
2005 mach_absolute_time() - (deadline - MutexSpin));
2006 }
2007 /* The lockstat acquire event is recorded by the assembly code beneath us. */
2008#endif
b0d623f7 2009
6d2010ae
A
2010 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2011 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
b0d623f7
A
2012
2013 return retval;
0c530ab8
A
2014}
2015
b0d623f7
A
2016
2017
0c530ab8 2018/*
b0d623f7
A
2019 * Routine: lck_mtx_lock_wait_x86
2020 *
2021 * Invoked in order to wait on contention.
2022 *
2023 * Called with the interlock locked and
6d2010ae
A
2024 * preemption disabled...
2025 * returns it unlocked and with preemption enabled
0c530ab8
A
2026 */
2027void
b0d623f7
A
2028lck_mtx_lock_wait_x86 (
2029 lck_mtx_t *mutex)
0c530ab8 2030{
b0d623f7
A
2031 thread_t self = current_thread();
2032 thread_t holder;
2033 integer_t priority;
b0d623f7
A
2034 spl_t s;
2035#if CONFIG_DTRACE
2036 uint64_t sleep_start = 0;
2037
2038 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
2039 sleep_start = mach_absolute_time();
2040 }
2041#endif
6d2010ae
A
2042 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2043 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7
A
2044
2045 priority = self->sched_pri;
2046
2047 if (priority < self->priority)
2048 priority = self->priority;
2049 if (priority < BASEPRI_DEFAULT)
2050 priority = BASEPRI_DEFAULT;
2051
6d2010ae 2052 if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
b0d623f7 2053 mutex->lck_mtx_pri = priority;
6d2010ae 2054 mutex->lck_mtx_waiters++;
b0d623f7 2055
6d2010ae
A
2056 if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
2057 holder->sched_pri < mutex->lck_mtx_pri ) {
b0d623f7
A
2058
2059 s = splsched();
2060 thread_lock(holder);
2061
6d2010ae 2062 if (holder->sched_pri < mutex->lck_mtx_pri) {
b0d623f7
A
2063 KERNEL_DEBUG_CONSTANT(
2064 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
6d2010ae 2065 holder->sched_pri, priority, thread_tid(holder), mutex, 0);
b0d623f7
A
2066
2067 set_sched_pri(holder, priority);
2068
2069 if (mutex->lck_mtx_promoted == 0) {
2070 holder->promotions++;
6d2010ae
A
2071 holder->sched_flags |= TH_SFLAG_PROMOTED;
2072
b0d623f7
A
2073 mutex->lck_mtx_promoted = 1;
2074 }
2075 }
2076 thread_unlock(holder);
2077 splx(s);
2078 }
b0d623f7
A
2079 assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
2080
2081 lck_mtx_ilk_unlock(mutex);
2082
2083 thread_block(THREAD_CONTINUE_NULL);
2084
6d2010ae
A
2085 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2086 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7
A
2087
2088#if CONFIG_DTRACE
2089 /*
2090 * Record the Dtrace lockstat probe for blocking, block time
2091 * measured from when we were entered.
2092 */
2093 if (sleep_start) {
6d2010ae 2094 if (mutex->lck_mtx_is_ext == 0) {
b0d623f7
A
2095 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2096 mach_absolute_time() - sleep_start);
2097 } else {
2098 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2099 mach_absolute_time() - sleep_start);
2100 }
2101 }
2102#endif
0c530ab8
A
2103}
2104
91447636 2105
2d21ac55 2106#if MACH_KDB
91447636
A
2107
2108void
2109db_show_one_lock(
2110 lock_t *lock)
2111{
2112 db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
2d21ac55
A
2113 lock->lck_rw_shared_count,
2114 lock->lck_rw_want_upgrade ? "" : "!",
2115 lock->lck_rw_want_write ? "" : "!");
91447636 2116 db_printf("%swaiting, %scan_sleep\n",
2d21ac55
A
2117 (lock->lck_r_waiting || lock->lck_w_waiting) ? "" : "!",
2118 lock->lck_rw_can_sleep ? "" : "!");
91447636 2119 db_printf("Interlock:\n");
2d21ac55 2120 db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->lck_rw_interlock)),
91447636
A
2121 TRUE, (db_expr_t)0, (char *)0);
2122}
2123
91447636
A
2124/*
2125 * Routines to print out simple_locks and mutexes in a nicely-formatted
2126 * fashion.
2127 */
2128
0c530ab8 2129const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER";
91447636
A
2130
2131void
2132db_show_one_simple_lock (
2133 db_expr_t addr,
2134 boolean_t have_addr,
0c530ab8
A
2135 __unused db_expr_t count,
2136 __unused char * modif)
91447636 2137{
0c530ab8 2138 simple_lock_t saddr = (simple_lock_t) ((vm_offset_t) addr);
91447636
A
2139
2140 if (saddr == (simple_lock_t)0 || !have_addr) {
2141 db_error ("No simple_lock\n");
2142 }
2143#if USLOCK_DEBUG
2144 else if (saddr->lock_type != USLOCK_TAG)
2145 db_error ("Not a simple_lock\n");
2146#endif /* USLOCK_DEBUG */
2147
2148 db_printf ("%s\n", simple_lock_labels);
2149 db_print_simple_lock (saddr);
2150}
2151
2152void
2153db_print_simple_lock (
2154 simple_lock_t addr)
2155{
2156
2157 db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
2158#if USLOCK_DEBUG
2159 db_printf (" %08x", addr->debug.lock_thread);
2160 db_printf (" %08x ", addr->debug.duration[1]);
2161 db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
2162#endif /* USLOCK_DEBUG */
2163 db_printf ("\n");
2164}
2165
91447636 2166#endif /* MACH_KDB */