]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/locks_i386.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
CommitLineData
91447636 1/*
b0d623f7 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64#include <mach_kdb.h>
65#include <mach_ldebug.h>
66
67#include <kern/lock.h>
68#include <kern/locks.h>
69#include <kern/kalloc.h>
70#include <kern/misc_protos.h>
71#include <kern/thread.h>
72#include <kern/processor.h>
73#include <kern/cpu_data.h>
74#include <kern/cpu_number.h>
75#include <kern/sched_prim.h>
76#include <kern/xpr.h>
77#include <kern/debug.h>
78#include <string.h>
79
b0d623f7 80#include <i386/mp.h> /* mp_recent_debugger_activity() */
91447636
A
81#if MACH_KDB
82#include <ddb/db_command.h>
83#include <ddb/db_output.h>
84#include <ddb/db_sym.h>
85#include <ddb/db_print.h>
86#endif /* MACH_KDB */
87
b0d623f7 88#include <machine/machine_cpu.h>
91447636
A
89
90#include <sys/kdebug.h>
91
2d21ac55
A
92/*
93 * We need only enough declarations from the BSD-side to be able to
94 * test if our probe is active, and to call __dtrace_probe(). Setting
95 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
96 */
97#if CONFIG_DTRACE
98#define NEED_DTRACE_DEFS
99#include <../bsd/sys/lockstat.h>
100#endif
101
91447636
A
102#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
103#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
104#define LCK_RW_LCK_SHARED_CODE 0x102
105#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
106#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
107#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
108
b0d623f7
A
109#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
110#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
111#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
112#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
113#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
114#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
115#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
116#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
117
91447636
A
118
119#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
120
121unsigned int LcksOpts=0;
91447636
A
122
123/* Forwards */
124
125#if MACH_KDB
126void db_print_simple_lock(
127 simple_lock_t addr);
91447636
A
128#endif /* MACH_KDB */
129
130
131#if USLOCK_DEBUG
132/*
133 * Perform simple lock checks.
134 */
135int uslock_check = 1;
136int max_lock_loops = 100000000;
137decl_simple_lock_data(extern , printf_lock)
138decl_simple_lock_data(extern , panic_lock)
91447636
A
139#endif /* USLOCK_DEBUG */
140
141
142/*
143 * We often want to know the addresses of the callers
144 * of the various lock routines. However, this information
145 * is only used for debugging and statistics.
146 */
147typedef void *pc_t;
148#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
149#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
150#if ANY_LOCK_DEBUG
b0d623f7 151#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
91447636
A
152#define DECL_PC(pc) pc_t pc;
153#else /* ANY_LOCK_DEBUG */
154#define DECL_PC(pc)
155#ifdef lint
156/*
157 * Eliminate lint complaints about unused local pc variables.
158 */
b0d623f7 159#define OBTAIN_PC(pc) ++pc
91447636 160#else /* lint */
b0d623f7 161#define OBTAIN_PC(pc)
91447636
A
162#endif /* lint */
163#endif /* USLOCK_DEBUG */
164
165
166/*
167 * Portable lock package implementation of usimple_locks.
168 */
169
170#if USLOCK_DEBUG
171#define USLDBG(stmt) stmt
172void usld_lock_init(usimple_lock_t, unsigned short);
173void usld_lock_pre(usimple_lock_t, pc_t);
174void usld_lock_post(usimple_lock_t, pc_t);
175void usld_unlock(usimple_lock_t, pc_t);
176void usld_lock_try_pre(usimple_lock_t, pc_t);
177void usld_lock_try_post(usimple_lock_t, pc_t);
178int usld_lock_common_checks(usimple_lock_t, char *);
179#else /* USLOCK_DEBUG */
180#define USLDBG(stmt)
181#endif /* USLOCK_DEBUG */
182
b0d623f7
A
183
184extern int lck_rw_grab_want(lck_rw_t *lck);
185extern int lck_rw_grab_shared(lck_rw_t *lck);
186extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
187
188
2d21ac55
A
189/*
190 * Forward definitions
191 */
192
193void lck_rw_lock_shared_gen(
194 lck_rw_t *lck);
195
b0d623f7
A
196void lck_rw_lock_exclusive_gen(
197 lck_rw_t *lck);
198
199boolean_t lck_rw_lock_shared_to_exclusive_success(
2d21ac55
A
200 lck_rw_t *lck);
201
b0d623f7
A
202boolean_t lck_rw_lock_shared_to_exclusive_failure(
203 lck_rw_t *lck,
204 int prior_lock_state);
205
206void lck_rw_lock_exclusive_to_shared_gen(
207 lck_rw_t *lck,
208 int prior_lock_state);
209
210lck_rw_type_t lck_rw_done_gen(
211 lck_rw_t *lck,
212 int prior_lock_state);
213
214
91447636
A
215/*
216 * Routine: lck_spin_alloc_init
217 */
218lck_spin_t *
219lck_spin_alloc_init(
220 lck_grp_t *grp,
221 lck_attr_t *attr)
222{
223 lck_spin_t *lck;
224
225 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
226 lck_spin_init(lck, grp, attr);
227
228 return(lck);
229}
230
231/*
232 * Routine: lck_spin_free
233 */
234void
235lck_spin_free(
236 lck_spin_t *lck,
237 lck_grp_t *grp)
238{
239 lck_spin_destroy(lck, grp);
240 kfree(lck, sizeof(lck_spin_t));
241}
242
243/*
244 * Routine: lck_spin_init
245 */
246void
247lck_spin_init(
248 lck_spin_t *lck,
249 lck_grp_t *grp,
250 __unused lck_attr_t *attr)
251{
252 usimple_lock_init((usimple_lock_t) lck, 0);
253 lck_grp_reference(grp);
254 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
255}
256
257/*
258 * Routine: lck_spin_destroy
259 */
260void
261lck_spin_destroy(
262 lck_spin_t *lck,
263 lck_grp_t *grp)
264{
b0d623f7 265 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
91447636 266 return;
b0d623f7 267 lck->interlock = LCK_SPIN_TAG_DESTROYED;
91447636
A
268 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
269 lck_grp_deallocate(grp);
270 return;
271}
272
273/*
274 * Routine: lck_spin_lock
275 */
276void
277lck_spin_lock(
278 lck_spin_t *lck)
279{
280 usimple_lock((usimple_lock_t) lck);
281}
282
283/*
284 * Routine: lck_spin_unlock
285 */
286void
287lck_spin_unlock(
288 lck_spin_t *lck)
289{
290 usimple_unlock((usimple_lock_t) lck);
291}
292
293
294/*
295 * Routine: lck_spin_try_lock
296 */
297boolean_t
298lck_spin_try_lock(
299 lck_spin_t *lck)
300{
2d21ac55 301 return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
91447636
A
302}
303
304/*
305 * Initialize a usimple_lock.
306 *
307 * No change in preemption state.
308 */
309void
310usimple_lock_init(
311 usimple_lock_t l,
312 __unused unsigned short tag)
313{
314#ifndef MACHINE_SIMPLE_LOCK
315 USLDBG(usld_lock_init(l, tag));
316 hw_lock_init(&l->interlock);
317#else
318 simple_lock_init((simple_lock_t)l,tag);
319#endif
320}
321
322
323/*
324 * Acquire a usimple_lock.
325 *
326 * Returns with preemption disabled. Note
327 * that the hw_lock routines are responsible for
328 * maintaining preemption state.
329 */
330void
331usimple_lock(
332 usimple_lock_t l)
333{
334#ifndef MACHINE_SIMPLE_LOCK
2d21ac55 335 DECL_PC(pc);
91447636 336
b0d623f7 337 OBTAIN_PC(pc);
91447636
A
338 USLDBG(usld_lock_pre(l, pc));
339
b0d623f7
A
340 if(!hw_lock_to(&l->interlock, LockTimeOutTSC)) {/* Try to get the lock
341 * with a timeout */
342 boolean_t uslock_acquired = FALSE;
343 while (mp_recent_debugger_activity() &&
344 !(uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)));
345 if (uslock_acquired == FALSE)
346 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p", l, (uintptr_t)l->interlock.lock_data, current_thread());
347 }
91447636
A
348 USLDBG(usld_lock_post(l, pc));
349#else
350 simple_lock((simple_lock_t)l);
351#endif
352}
353
354
355/*
356 * Release a usimple_lock.
357 *
358 * Returns with preemption enabled. Note
359 * that the hw_lock routines are responsible for
360 * maintaining preemption state.
361 */
362void
363usimple_unlock(
364 usimple_lock_t l)
365{
366#ifndef MACHINE_SIMPLE_LOCK
367 DECL_PC(pc);
368
b0d623f7 369 OBTAIN_PC(pc);
91447636
A
370 USLDBG(usld_unlock(l, pc));
371 hw_lock_unlock(&l->interlock);
372#else
373 simple_unlock_rwmb((simple_lock_t)l);
374#endif
375}
376
377
378/*
379 * Conditionally acquire a usimple_lock.
380 *
381 * On success, returns with preemption disabled.
382 * On failure, returns with preemption in the same state
383 * as when first invoked. Note that the hw_lock routines
384 * are responsible for maintaining preemption state.
385 *
386 * XXX No stats are gathered on a miss; I preserved this
387 * behavior from the original assembly-language code, but
388 * doesn't it make sense to log misses? XXX
389 */
390unsigned int
391usimple_lock_try(
392 usimple_lock_t l)
393{
394#ifndef MACHINE_SIMPLE_LOCK
91447636 395 unsigned int success;
2d21ac55 396 DECL_PC(pc);
91447636 397
b0d623f7 398 OBTAIN_PC(pc);
91447636
A
399 USLDBG(usld_lock_try_pre(l, pc));
400 if ((success = hw_lock_try(&l->interlock))) {
401 USLDBG(usld_lock_try_post(l, pc));
402 }
403 return success;
404#else
405 return(simple_lock_try((simple_lock_t)l));
406#endif
407}
408
409#if USLOCK_DEBUG
410/*
411 * States of a usimple_lock. The default when initializing
412 * a usimple_lock is setting it up for debug checking.
413 */
414#define USLOCK_CHECKED 0x0001 /* lock is being checked */
415#define USLOCK_TAKEN 0x0002 /* lock has been taken */
416#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
417#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
418#define USLOCK_CHECKING(l) (uslock_check && \
419 ((l)->debug.state & USLOCK_CHECKED))
420
421/*
422 * Trace activities of a particularly interesting lock.
423 */
424void usl_trace(usimple_lock_t, int, pc_t, const char *);
425
426
427/*
428 * Initialize the debugging information contained
429 * in a usimple_lock.
430 */
431void
432usld_lock_init(
433 usimple_lock_t l,
434 __unused unsigned short tag)
435{
436 if (l == USIMPLE_LOCK_NULL)
437 panic("lock initialization: null lock pointer");
438 l->lock_type = USLOCK_TAG;
439 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
440 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
441 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
442 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
443 l->debug.duration[0] = l->debug.duration[1] = 0;
444 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
445 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
446 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
447}
448
449
450/*
451 * These checks apply to all usimple_locks, not just
452 * those with USLOCK_CHECKED turned on.
453 */
454int
455usld_lock_common_checks(
456 usimple_lock_t l,
457 char *caller)
458{
459 if (l == USIMPLE_LOCK_NULL)
460 panic("%s: null lock pointer", caller);
461 if (l->lock_type != USLOCK_TAG)
b0d623f7 462 panic("%s: 0x%p is not a usimple lock", caller, l);
91447636 463 if (!(l->debug.state & USLOCK_INIT))
b0d623f7
A
464 panic("%s: %p is not an initialized lock",
465 caller, l);
91447636
A
466 return USLOCK_CHECKING(l);
467}
468
469
470/*
471 * Debug checks on a usimple_lock just before attempting
472 * to acquire it.
473 */
474/* ARGSUSED */
475void
476usld_lock_pre(
477 usimple_lock_t l,
478 pc_t pc)
479{
480 char caller[] = "usimple_lock";
481
482
483 if (!usld_lock_common_checks(l, caller))
484 return;
485
486/*
487 * Note that we have a weird case where we are getting a lock when we are]
488 * in the process of putting the system to sleep. We are running with no
489 * current threads, therefore we can't tell if we are trying to retake a lock
490 * we have or someone on the other processor has it. Therefore we just
491 * ignore this test if the locking thread is 0.
492 */
493
494 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
495 l->debug.lock_thread == (void *) current_thread()) {
2d21ac55
A
496 printf("%s: lock %p already locked (at %p) by",
497 caller, l, l->debug.lock_pc);
498 printf(" current thread %p (new attempt at pc %p)\n",
91447636 499 l->debug.lock_thread, pc);
2d21ac55 500 panic("%s", caller);
91447636
A
501 }
502 mp_disable_preemption();
503 usl_trace(l, cpu_number(), pc, caller);
504 mp_enable_preemption();
505}
506
507
508/*
509 * Debug checks on a usimple_lock just after acquiring it.
510 *
511 * Pre-emption has been disabled at this point,
512 * so we are safe in using cpu_number.
513 */
514void
515usld_lock_post(
516 usimple_lock_t l,
517 pc_t pc)
518{
519 register int mycpu;
520 char caller[] = "successful usimple_lock";
521
522
523 if (!usld_lock_common_checks(l, caller))
524 return;
525
526 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
527 panic("%s: lock %p became uninitialized",
528 caller, l);
91447636 529 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
530 panic("%s: lock 0x%p became TAKEN by someone else",
531 caller, l);
91447636
A
532
533 mycpu = cpu_number();
534 l->debug.lock_thread = (void *)current_thread();
535 l->debug.state |= USLOCK_TAKEN;
536 l->debug.lock_pc = pc;
537 l->debug.lock_cpu = mycpu;
538
539 usl_trace(l, mycpu, pc, caller);
540}
541
542
543/*
544 * Debug checks on a usimple_lock just before
545 * releasing it. Note that the caller has not
546 * yet released the hardware lock.
547 *
548 * Preemption is still disabled, so there's
549 * no problem using cpu_number.
550 */
551void
552usld_unlock(
553 usimple_lock_t l,
554 pc_t pc)
555{
556 register int mycpu;
557 char caller[] = "usimple_unlock";
558
559
560 if (!usld_lock_common_checks(l, caller))
561 return;
562
563 mycpu = cpu_number();
564
565 if (!(l->debug.state & USLOCK_TAKEN))
b0d623f7
A
566 panic("%s: lock 0x%p hasn't been taken",
567 caller, l);
91447636 568 if (l->debug.lock_thread != (void *) current_thread())
b0d623f7
A
569 panic("%s: unlocking lock 0x%p, owned by thread %p",
570 caller, l, l->debug.lock_thread);
91447636 571 if (l->debug.lock_cpu != mycpu) {
b0d623f7
A
572 printf("%s: unlocking lock 0x%p on cpu 0x%x",
573 caller, l, mycpu);
91447636 574 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
2d21ac55 575 panic("%s", caller);
91447636
A
576 }
577 usl_trace(l, mycpu, pc, caller);
578
579 l->debug.unlock_thread = l->debug.lock_thread;
580 l->debug.lock_thread = INVALID_PC;
581 l->debug.state &= ~USLOCK_TAKEN;
582 l->debug.unlock_pc = pc;
583 l->debug.unlock_cpu = mycpu;
584}
585
586
587/*
588 * Debug checks on a usimple_lock just before
589 * attempting to acquire it.
590 *
591 * Preemption isn't guaranteed to be disabled.
592 */
593void
594usld_lock_try_pre(
595 usimple_lock_t l,
596 pc_t pc)
597{
598 char caller[] = "usimple_lock_try";
599
600 if (!usld_lock_common_checks(l, caller))
601 return;
602 mp_disable_preemption();
603 usl_trace(l, cpu_number(), pc, caller);
604 mp_enable_preemption();
605}
606
607
608/*
609 * Debug checks on a usimple_lock just after
610 * successfully attempting to acquire it.
611 *
612 * Preemption has been disabled by the
613 * lock acquisition attempt, so it's safe
614 * to use cpu_number.
615 */
616void
617usld_lock_try_post(
618 usimple_lock_t l,
619 pc_t pc)
620{
621 register int mycpu;
622 char caller[] = "successful usimple_lock_try";
623
624 if (!usld_lock_common_checks(l, caller))
625 return;
626
627 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
628 panic("%s: lock 0x%p became uninitialized",
629 caller, l);
91447636 630 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
631 panic("%s: lock 0x%p became TAKEN by someone else",
632 caller, l);
91447636
A
633
634 mycpu = cpu_number();
635 l->debug.lock_thread = (void *) current_thread();
636 l->debug.state |= USLOCK_TAKEN;
637 l->debug.lock_pc = pc;
638 l->debug.lock_cpu = mycpu;
639
640 usl_trace(l, mycpu, pc, caller);
641}
642
643
644/*
645 * For very special cases, set traced_lock to point to a
646 * specific lock of interest. The result is a series of
647 * XPRs showing lock operations on that lock. The lock_seq
648 * value is used to show the order of those operations.
649 */
650usimple_lock_t traced_lock;
651unsigned int lock_seq;
652
653void
654usl_trace(
655 usimple_lock_t l,
656 int mycpu,
657 pc_t pc,
658 const char * op_name)
659{
660 if (traced_lock == l) {
661 XPR(XPR_SLOCK,
662 "seq %d, cpu %d, %s @ %x\n",
b0d623f7
A
663 (uintptr_t) lock_seq, (uintptr_t) mycpu,
664 (uintptr_t) op_name, (uintptr_t) pc, 0);
91447636
A
665 lock_seq++;
666 }
667}
668
669
670#endif /* USLOCK_DEBUG */
671
672/*
673 * Routine: lock_alloc
674 * Function:
675 * Allocate a lock for external users who cannot
676 * hard-code the structure definition into their
677 * objects.
678 * For now just use kalloc, but a zone is probably
679 * warranted.
680 */
681lock_t *
682lock_alloc(
683 boolean_t can_sleep,
684 unsigned short tag,
685 unsigned short tag1)
686{
687 lock_t *l;
688
689 if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
690 lock_init(l, can_sleep, tag, tag1);
691 return(l);
692}
693
694/*
695 * Routine: lock_free
696 * Function:
697 * Free a lock allocated for external users.
698 * For now just use kfree, but a zone is probably
699 * warranted.
700 */
701void
702lock_free(
703 lock_t *l)
704{
705 kfree(l, sizeof(lock_t));
706}
707
708
709/*
710 * Routine: lock_init
711 * Function:
712 * Initialize a lock; required before use.
713 * Note that clients declare the "struct lock"
714 * variables and then initialize them, rather
715 * than getting a new one from this module.
716 */
717void
718lock_init(
719 lock_t *l,
720 boolean_t can_sleep,
721 __unused unsigned short tag,
0c530ab8 722 __unused unsigned short tag1)
91447636 723{
2d21ac55
A
724 hw_lock_byte_init(&l->lck_rw_interlock);
725 l->lck_rw_want_write = FALSE;
726 l->lck_rw_want_upgrade = FALSE;
727 l->lck_rw_shared_count = 0;
728 l->lck_rw_can_sleep = can_sleep;
0c530ab8 729 l->lck_rw_tag = tag;
2d21ac55 730 l->lck_rw_priv_excl = 1;
b0d623f7 731 l->lck_r_waiting = l->lck_w_waiting = 0;
91447636
A
732}
733
734
735/*
736 * Sleep locks. These use the same data structure and algorithm
737 * as the spin locks, but the process sleeps while it is waiting
738 * for the lock. These work on uniprocessor systems.
739 */
740
741#define DECREMENTER_TIMEOUT 1000000
742
743void
744lock_write(
745 register lock_t * l)
746{
0c530ab8 747 lck_rw_lock_exclusive(l);
91447636
A
748}
749
750void
751lock_done(
752 register lock_t * l)
753{
0c530ab8 754 (void) lck_rw_done(l);
91447636
A
755}
756
757void
758lock_read(
759 register lock_t * l)
760{
0c530ab8 761 lck_rw_lock_shared(l);
91447636
A
762}
763
764
765/*
766 * Routine: lock_read_to_write
767 * Function:
768 * Improves a read-only lock to one with
769 * write permission. If another reader has
770 * already requested an upgrade to a write lock,
771 * no lock is held upon return.
772 *
2d21ac55 773 * Returns FALSE if the upgrade *failed*.
91447636
A
774 */
775
776boolean_t
777lock_read_to_write(
778 register lock_t * l)
779{
0c530ab8 780 return lck_rw_lock_shared_to_exclusive(l);
91447636
A
781}
782
783void
784lock_write_to_read(
785 register lock_t * l)
786{
0c530ab8 787 lck_rw_lock_exclusive_to_shared(l);
8f6c56a5
A
788}
789
8f6c56a5 790
91447636
A
791
792/*
793 * Routine: lck_rw_alloc_init
794 */
795lck_rw_t *
796lck_rw_alloc_init(
797 lck_grp_t *grp,
798 lck_attr_t *attr) {
799 lck_rw_t *lck;
800
b0d623f7
A
801 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
802 bzero(lck, sizeof(lck_rw_t));
91447636 803 lck_rw_init(lck, grp, attr);
b0d623f7
A
804 }
805
91447636
A
806 return(lck);
807}
808
809/*
810 * Routine: lck_rw_free
811 */
812void
813lck_rw_free(
814 lck_rw_t *lck,
815 lck_grp_t *grp) {
816 lck_rw_destroy(lck, grp);
817 kfree(lck, sizeof(lck_rw_t));
818}
819
820/*
821 * Routine: lck_rw_init
822 */
823void
824lck_rw_init(
825 lck_rw_t *lck,
826 lck_grp_t *grp,
0c530ab8
A
827 lck_attr_t *attr)
828{
829 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
830 attr : &LockDefaultLckAttr;
91447636 831
2d21ac55
A
832 hw_lock_byte_init(&lck->lck_rw_interlock);
833 lck->lck_rw_want_write = FALSE;
834 lck->lck_rw_want_upgrade = FALSE;
835 lck->lck_rw_shared_count = 0;
836 lck->lck_rw_can_sleep = TRUE;
b0d623f7 837 lck->lck_r_waiting = lck->lck_w_waiting = 0;
91447636 838 lck->lck_rw_tag = 0;
2d21ac55
A
839 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
840 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
91447636
A
841
842 lck_grp_reference(grp);
843 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
844}
845
846/*
847 * Routine: lck_rw_destroy
848 */
849void
850lck_rw_destroy(
851 lck_rw_t *lck,
b0d623f7
A
852 lck_grp_t *grp)
853{
91447636
A
854 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
855 return;
856 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
857 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
858 lck_grp_deallocate(grp);
859 return;
860}
861
862/*
863 * Sleep locks. These use the same data structure and algorithm
864 * as the spin locks, but the process sleeps while it is waiting
865 * for the lock. These work on uniprocessor systems.
866 */
867
868#define DECREMENTER_TIMEOUT 1000000
869
2d21ac55
A
870#define RW_LOCK_READER_EVENT(x) \
871 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
872
873#define RW_LOCK_WRITER_EVENT(x) \
874 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
91447636
A
875
876/*
877 * We need to disable interrupts while holding the mutex interlock
878 * to prevent an IPI intervening.
879 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
880 */
881static boolean_t
882lck_interlock_lock(lck_rw_t *lck)
883{
884 boolean_t istate;
885
886 istate = ml_set_interrupts_enabled(FALSE);
2d21ac55 887 hw_lock_byte_lock(&lck->lck_rw_interlock);
91447636
A
888
889 return istate;
890}
891
892static void
893lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
894{
2d21ac55 895 hw_lock_byte_unlock(&lck->lck_rw_interlock);
91447636
A
896 ml_set_interrupts_enabled(istate);
897}
898
0c530ab8
A
899/*
900 * This inline is used when busy-waiting for an rw lock.
901 * If interrupts were disabled when the lock primitive was called,
902 * we poll the IPI handler for pending tlb flushes.
903 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
904 */
905static inline void
906lck_rw_lock_pause(boolean_t interrupts_enabled)
907{
908 if (!interrupts_enabled)
909 handle_pending_TLB_flushes();
910 cpu_pause();
911}
912
b0d623f7
A
913
914/*
915 * compute the deadline to spin against when
916 * waiting for a change of state on a lck_rw_t
917 */
918static inline uint64_t
919lck_rw_deadline_for_spin(lck_rw_t *lck)
920{
921 if (lck->lck_rw_can_sleep) {
922 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
923 /*
924 * there are already threads waiting on this lock... this
925 * implies that they have spun beyond their deadlines waiting for
926 * the desired state to show up so we will not bother spinning at this time...
927 * or
928 * the current number of threads sharing this lock exceeds our capacity to run them
929 * concurrently and since all states we're going to spin for require the rw_shared_count
930 * to be at 0, we'll not bother spinning since the latency for this to happen is
931 * unpredictable...
932 */
933 return (mach_absolute_time());
934 }
935 return (mach_absolute_time() + MutexSpin);
936 } else
937 return (mach_absolute_time() + (100000LL * 1000000000LL));
938}
939
940
91447636
A
941/*
942 * Routine: lck_rw_lock_exclusive
943 */
944void
b0d623f7 945lck_rw_lock_exclusive_gen(
91447636
A
946 lck_rw_t *lck)
947{
b0d623f7
A
948 uint64_t deadline = 0;
949 int slept = 0;
950 int gotlock = 0;
951 int lockheld = 0;
952 wait_result_t res = 0;
953 boolean_t istate = -1;
91447636 954
2d21ac55 955#if CONFIG_DTRACE
b0d623f7
A
956 boolean_t dtrace_ls_initialized = FALSE;
957 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
958 uint64_t wait_interval = 0;
959 int readers_at_sleep = 0;
2d21ac55 960#endif
91447636 961
91447636 962 /*
2d21ac55 963 * Try to acquire the lck_rw_want_write bit.
91447636 964 */
b0d623f7 965 while ( !lck_rw_grab_want(lck)) {
91447636 966
2d21ac55 967#if CONFIG_DTRACE
b0d623f7
A
968 if (dtrace_ls_initialized == FALSE) {
969 dtrace_ls_initialized = TRUE;
970 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
971 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
972 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
973 if (dtrace_ls_enabled) {
974 /*
975 * Either sleeping or spinning is happening,
976 * start a timing of our delay interval now.
977 */
978 readers_at_sleep = lck->lck_rw_shared_count;
979 wait_interval = mach_absolute_time();
980 }
91447636 981 }
2d21ac55 982#endif
b0d623f7
A
983 if (istate == -1)
984 istate = ml_get_interrupts_enabled();
91447636 985
b0d623f7
A
986 deadline = lck_rw_deadline_for_spin(lck);
987
988 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
989
990 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
991 lck_rw_lock_pause(istate);
992
993 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
994
995 if (gotlock)
996 break;
997 /*
998 * if we get here, the deadline has expired w/o us
999 * being able to grab the lock exclusively
1000 * check to see if we're allowed to do a thread_block
1001 */
1002 if (lck->lck_rw_can_sleep) {
2d21ac55 1003
91447636 1004 istate = lck_interlock_lock(lck);
91447636 1005
b0d623f7 1006 if (lck->lck_rw_want_write) {
91447636 1007
b0d623f7 1008 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
91447636 1009
b0d623f7 1010 lck->lck_w_waiting = TRUE;
91447636 1011
b0d623f7
A
1012 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1013 lck_interlock_unlock(lck, istate);
91447636 1014
b0d623f7
A
1015 if (res == THREAD_WAITING) {
1016 res = thread_block(THREAD_CONTINUE_NULL);
1017 slept++;
1018 }
1019 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1020 } else {
1021 lck->lck_rw_want_write = TRUE;
1022 lck_interlock_unlock(lck, istate);
1023 break;
1024 }
1025 }
1026 }
1027 /*
1028 * Wait for readers (and upgrades) to finish...
1029 * the test for these conditions must be done simultaneously with
1030 * a check of the interlock not being held since
1031 * the rw_shared_count will drop to 0 first and then want_upgrade
1032 * will be set to 1 in the shared_to_exclusive scenario... those
1033 * adjustments are done behind the interlock and represent an
1034 * atomic change in state and must be considered as such
1035 * however, once we see the read count at 0, the want_upgrade not set
1036 * and the interlock not held, we are safe to proceed
1037 */
1038 while (lck_rw_held_read_or_upgrade(lck)) {
2d21ac55
A
1039
1040#if CONFIG_DTRACE
1041 /*
1042 * Either sleeping or spinning is happening, start
1043 * a timing of our delay interval now. If we set it
1044 * to -1 we don't have accurate data so we cannot later
1045 * decide to record a dtrace spin or sleep event.
1046 */
b0d623f7
A
1047 if (dtrace_ls_initialized == FALSE) {
1048 dtrace_ls_initialized = TRUE;
1049 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1050 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1051 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1052 if (dtrace_ls_enabled) {
1053 /*
1054 * Either sleeping or spinning is happening,
1055 * start a timing of our delay interval now.
1056 */
1057 readers_at_sleep = lck->lck_rw_shared_count;
1058 wait_interval = mach_absolute_time();
1059 }
2d21ac55
A
1060 }
1061#endif
b0d623f7
A
1062 if (istate == -1)
1063 istate = ml_get_interrupts_enabled();
1064
1065 deadline = lck_rw_deadline_for_spin(lck);
1066
1067 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1068
1069 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
1070 lck_rw_lock_pause(istate);
1071
1072 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
1073
1074 if ( !lockheld)
1075 break;
1076 /*
1077 * if we get here, the deadline has expired w/o us
1078 * being able to grab the lock exclusively
1079 * check to see if we're allowed to do a thread_block
1080 */
1081 if (lck->lck_rw_can_sleep) {
91447636 1082
91447636 1083 istate = lck_interlock_lock(lck);
91447636 1084
b0d623f7
A
1085 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
1086 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1087
1088 lck->lck_w_waiting = TRUE;
1089
1090 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1091 lck_interlock_unlock(lck, istate);
b0d623f7
A
1092
1093 if (res == THREAD_WAITING) {
1094 res = thread_block(THREAD_CONTINUE_NULL);
1095 slept++;
1096 }
1097 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1098 } else {
1099 lck_interlock_unlock(lck, istate);
1100 /*
1101 * must own the lock now, since we checked for
1102 * readers or upgrade owner behind the interlock
1103 * no need for a call to 'lck_rw_held_read_or_upgrade'
1104 */
1105 break;
91447636
A
1106 }
1107 }
91447636
A
1108 }
1109
2d21ac55
A
1110#if CONFIG_DTRACE
1111 /*
1112 * Decide what latencies we suffered that are Dtrace events.
1113 * If we have set wait_interval, then we either spun or slept.
1114 * At least we get out from under the interlock before we record
1115 * which is the best we can do here to minimize the impact
1116 * of the tracing.
1117 * If we have set wait_interval to -1, then dtrace was not enabled when we
1118 * started sleeping/spinning so we don't record this event.
1119 */
b0d623f7 1120 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1121 if (slept == 0) {
1122 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1123 mach_absolute_time() - wait_interval, 1);
1124 } else {
1125 /*
1126 * For the blocking case, we also record if when we blocked
1127 * it was held for read or write, and how many readers.
1128 * Notice that above we recorded this before we dropped
1129 * the interlock so the count is accurate.
1130 */
1131 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1132 mach_absolute_time() - wait_interval, 1,
1133 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1134 }
1135 }
1136 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1137#endif
91447636
A
1138}
1139
1140
1141/*
2d21ac55 1142 * Routine: lck_rw_done_gen
b0d623f7
A
1143 *
1144 * called from the assembly language wrapper...
1145 * prior_lock_state is the value in the 1st
1146 * word of the lock at the time of a successful
1147 * atomic compare and exchange with the new value...
1148 * it represents the state of the lock before we
1149 * decremented the rw_shared_count or cleared either
1150 * rw_want_upgrade or rw_want_write and
1151 * the lck_x_waiting bits... since the wrapper
1152 * routine has already changed the state atomically,
1153 * we just need to decide if we should
1154 * wake up anyone and what value to return... we do
1155 * this by examining the state of the lock before
1156 * we changed it
91447636
A
1157 */
1158lck_rw_type_t
2d21ac55 1159lck_rw_done_gen(
b0d623f7
A
1160 lck_rw_t *lck,
1161 int prior_lock_state)
91447636 1162{
b0d623f7
A
1163 lck_rw_t *fake_lck;
1164 lck_rw_type_t lock_type;
91447636
A
1165
1166 /*
b0d623f7
A
1167 * prior_lock state is a snapshot of the 1st word of the
1168 * lock in question... we'll fake up a pointer to it
1169 * and carefully not access anything beyond whats defined
1170 * in the first word of a lck_rw_t
91447636 1171 */
b0d623f7 1172 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1173
b0d623f7
A
1174 if (fake_lck->lck_rw_shared_count <= 1) {
1175 if (fake_lck->lck_w_waiting)
1176 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
91447636 1177
b0d623f7
A
1178 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1179 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1180 }
1181 if (fake_lck->lck_rw_shared_count)
1182 lock_type = LCK_RW_TYPE_SHARED;
1183 else
1184 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2d21ac55
A
1185
1186#if CONFIG_DTRACE
b0d623f7 1187 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2d21ac55
A
1188#endif
1189
b0d623f7 1190 return(lock_type);
91447636
A
1191}
1192
1193
91447636
A
1194/*
1195 * Routine: lck_rw_unlock
1196 */
1197void
1198lck_rw_unlock(
1199 lck_rw_t *lck,
1200 lck_rw_type_t lck_rw_type)
1201{
1202 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1203 lck_rw_unlock_shared(lck);
1204 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1205 lck_rw_unlock_exclusive(lck);
1206 else
1207 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1208}
1209
1210
1211/*
1212 * Routine: lck_rw_unlock_shared
1213 */
1214void
1215lck_rw_unlock_shared(
1216 lck_rw_t *lck)
1217{
1218 lck_rw_type_t ret;
1219
1220 ret = lck_rw_done(lck);
1221
1222 if (ret != LCK_RW_TYPE_SHARED)
1223 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
1224}
1225
1226
1227/*
1228 * Routine: lck_rw_unlock_exclusive
1229 */
1230void
1231lck_rw_unlock_exclusive(
1232 lck_rw_t *lck)
1233{
1234 lck_rw_type_t ret;
1235
1236 ret = lck_rw_done(lck);
1237
1238 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1239 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1240}
1241
1242
1243/*
1244 * Routine: lck_rw_lock
1245 */
1246void
1247lck_rw_lock(
1248 lck_rw_t *lck,
1249 lck_rw_type_t lck_rw_type)
1250{
1251 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1252 lck_rw_lock_shared(lck);
1253 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1254 lck_rw_lock_exclusive(lck);
1255 else
1256 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1257}
1258
1259
1260/*
2d21ac55 1261 * Routine: lck_rw_lock_shared_gen
b0d623f7
A
1262 * Function:
1263 * assembly fast path code has determined that this lock
1264 * is held exclusively... this is where we spin/block
1265 * until we can acquire the lock in the shared mode
91447636
A
1266 */
1267void
2d21ac55 1268lck_rw_lock_shared_gen(
91447636
A
1269 lck_rw_t *lck)
1270{
b0d623f7
A
1271 uint64_t deadline = 0;
1272 int gotlock = 0;
1273 int slept = 0;
1274 wait_result_t res = 0;
1275 boolean_t istate = -1;
1276
2d21ac55
A
1277#if CONFIG_DTRACE
1278 uint64_t wait_interval = 0;
b0d623f7
A
1279 int readers_at_sleep = 0;
1280 boolean_t dtrace_ls_initialized = FALSE;
1281 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2d21ac55 1282#endif
91447636 1283
b0d623f7
A
1284 while ( !lck_rw_grab_shared(lck)) {
1285
2d21ac55 1286#if CONFIG_DTRACE
b0d623f7
A
1287 if (dtrace_ls_initialized == FALSE) {
1288 dtrace_ls_initialized = TRUE;
1289 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1290 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1291 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1292 if (dtrace_ls_enabled) {
1293 /*
1294 * Either sleeping or spinning is happening,
1295 * start a timing of our delay interval now.
1296 */
1297 readers_at_sleep = lck->lck_rw_shared_count;
1298 wait_interval = mach_absolute_time();
1299 }
1300 }
2d21ac55 1301#endif
b0d623f7
A
1302 if (istate == -1)
1303 istate = ml_get_interrupts_enabled();
91447636 1304
b0d623f7 1305 deadline = lck_rw_deadline_for_spin(lck);
0c530ab8 1306
b0d623f7
A
1307 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1308 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
91447636 1309
b0d623f7
A
1310 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
1311 lck_rw_lock_pause(istate);
1312
1313 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1314 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
1315
1316 if (gotlock)
1317 break;
1318 /*
1319 * if we get here, the deadline has expired w/o us
1320 * being able to grab the lock for read
1321 * check to see if we're allowed to do a thread_block
1322 */
1323 if (lck->lck_rw_can_sleep) {
91447636 1324
91447636 1325 istate = lck_interlock_lock(lck);
91447636 1326
b0d623f7
A
1327 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1328 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1329
1330 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1331 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1332
1333 lck->lck_r_waiting = TRUE;
1334
1335 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
91447636 1336 lck_interlock_unlock(lck, istate);
b0d623f7
A
1337
1338 if (res == THREAD_WAITING) {
1339 res = thread_block(THREAD_CONTINUE_NULL);
1340 slept++;
1341 }
1342 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1343 (int)lck, res, slept, 0, 0);
1344 } else {
1345 lck->lck_rw_shared_count++;
1346 lck_interlock_unlock(lck, istate);
1347 break;
91447636
A
1348 }
1349 }
91447636
A
1350 }
1351
2d21ac55 1352#if CONFIG_DTRACE
b0d623f7 1353 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1354 if (slept == 0) {
1355 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1356 } else {
1357 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1358 mach_absolute_time() - wait_interval, 0,
1359 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1360 }
1361 }
1362 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1363#endif
91447636
A
1364}
1365
1366
1367/*
b0d623f7 1368 * Routine: lck_rw_lock_shared_to_exclusive_failure
91447636 1369 * Function:
b0d623f7
A
1370 * assembly fast path code has already dropped our read
1371 * count and determined that someone else owns 'lck_rw_want_upgrade'
1372 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1373 * all we need to do here is determine if a wakeup is needed
91447636 1374 */
91447636 1375boolean_t
b0d623f7
A
1376lck_rw_lock_shared_to_exclusive_failure(
1377 lck_rw_t *lck,
1378 int prior_lock_state)
91447636 1379{
b0d623f7 1380 lck_rw_t *fake_lck;
91447636 1381
b0d623f7
A
1382 /*
1383 * prior_lock state is a snapshot of the 1st word of the
1384 * lock in question... we'll fake up a pointer to it
1385 * and carefully not access anything beyond whats defined
1386 * in the first word of a lck_rw_t
1387 */
1388 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1389
b0d623f7 1390 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
91447636
A
1391 /*
1392 * Someone else has requested upgrade.
b0d623f7
A
1393 * Since we've released the read lock, wake
1394 * him up if he's blocked waiting
91447636 1395 */
b0d623f7
A
1396 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1397 }
1398 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1399 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
91447636 1400
b0d623f7
A
1401 return (FALSE);
1402}
91447636 1403
91447636 1404
b0d623f7
A
1405/*
1406 * Routine: lck_rw_lock_shared_to_exclusive_failure
1407 * Function:
1408 * assembly fast path code has already dropped our read
1409 * count and successfully acquired 'lck_rw_want_upgrade'
1410 * we just need to wait for the rest of the readers to drain
1411 * and then we can return as the exclusive holder of this lock
1412 */
1413boolean_t
1414lck_rw_lock_shared_to_exclusive_success(
1415 lck_rw_t *lck)
1416{
1417 uint64_t deadline = 0;
1418 int slept = 0;
1419 int still_shared = 0;
1420 wait_result_t res;
1421 boolean_t istate = -1;
91447636 1422
b0d623f7
A
1423#if CONFIG_DTRACE
1424 uint64_t wait_interval = 0;
1425 int readers_at_sleep = 0;
1426 boolean_t dtrace_ls_initialized = FALSE;
1427 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1428#endif
91447636 1429
2d21ac55 1430 while (lck->lck_rw_shared_count != 0) {
b0d623f7 1431
2d21ac55 1432#if CONFIG_DTRACE
b0d623f7
A
1433 if (dtrace_ls_initialized == FALSE) {
1434 dtrace_ls_initialized = TRUE;
1435 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1436 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1437 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1438 if (dtrace_ls_enabled) {
1439 /*
1440 * Either sleeping or spinning is happening,
1441 * start a timing of our delay interval now.
1442 */
1443 readers_at_sleep = lck->lck_rw_shared_count;
1444 wait_interval = mach_absolute_time();
1445 }
2d21ac55
A
1446 }
1447#endif
b0d623f7
A
1448 if (istate == -1)
1449 istate = ml_get_interrupts_enabled();
1450
1451 deadline = lck_rw_deadline_for_spin(lck);
1452
1453 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1454 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1455
1456 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
1457 lck_rw_lock_pause(istate);
1458
1459 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1460 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1461
1462 if ( !still_shared)
1463 break;
1464 /*
1465 * if we get here, the deadline has expired w/o
1466 * the rw_shared_count having drained to 0
1467 * check to see if we're allowed to do a thread_block
1468 */
1469 if (lck->lck_rw_can_sleep) {
1470
91447636 1471 istate = lck_interlock_lock(lck);
b0d623f7
A
1472
1473 if (lck->lck_rw_shared_count != 0) {
1474 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1475 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1476
1477 lck->lck_w_waiting = TRUE;
91447636 1478
b0d623f7 1479 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1480 lck_interlock_unlock(lck, istate);
b0d623f7
A
1481
1482 if (res == THREAD_WAITING) {
1483 res = thread_block(THREAD_CONTINUE_NULL);
1484 slept++;
1485 }
1486 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1487 (int)lck, res, slept, 0, 0);
1488 } else {
1489 lck_interlock_unlock(lck, istate);
1490 break;
91447636
A
1491 }
1492 }
91447636 1493 }
2d21ac55
A
1494#if CONFIG_DTRACE
1495 /*
1496 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1497 */
b0d623f7 1498 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1499 if (slept == 0) {
1500 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1501 } else {
1502 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1503 mach_absolute_time() - wait_interval, 1,
1504 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1505 }
1506 }
2d21ac55
A
1507 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1508#endif
1509 return (TRUE);
91447636
A
1510}
1511
b0d623f7 1512
91447636
A
1513/*
1514 * Routine: lck_rw_lock_exclusive_to_shared
b0d623f7
A
1515 * Function:
1516 * assembly fast path has already dropped
1517 * our exclusive state and bumped lck_rw_shared_count
1518 * all we need to do here is determine if anyone
1519 * needs to be awakened.
91447636
A
1520 */
1521void
b0d623f7
A
1522lck_rw_lock_exclusive_to_shared_gen(
1523 lck_rw_t *lck,
1524 int prior_lock_state)
91447636 1525{
b0d623f7 1526 lck_rw_t *fake_lck;
91447636 1527
b0d623f7
A
1528 /*
1529 * prior_lock state is a snapshot of the 1st word of the
1530 * lock in question... we'll fake up a pointer to it
1531 * and carefully not access anything beyond whats defined
1532 * in the first word of a lck_rw_t
1533 */
1534 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1535
b0d623f7
A
1536 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1537 (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
91447636 1538
b0d623f7
A
1539 /*
1540 * don't wake up anyone waiting to take the lock exclusively
1541 * since we hold a read count... when the read count drops to 0,
1542 * the writers will be woken.
1543 *
1544 * wake up any waiting readers if we don't have any writers waiting,
1545 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1546 */
1547 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
2d21ac55 1548 thread_wakeup(RW_LOCK_READER_EVENT(lck));
91447636
A
1549
1550 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
2d21ac55 1551 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
91447636 1552
2d21ac55
A
1553#if CONFIG_DTRACE
1554 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1555#endif
91447636
A
1556}
1557
1558
1559/*
1560 * Routine: lck_rw_try_lock
1561 */
1562boolean_t
1563lck_rw_try_lock(
1564 lck_rw_t *lck,
1565 lck_rw_type_t lck_rw_type)
1566{
1567 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1568 return(lck_rw_try_lock_shared(lck));
1569 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1570 return(lck_rw_try_lock_exclusive(lck));
1571 else
1572 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1573 return(FALSE);
1574}
1575
91447636 1576
2d21ac55
A
1577void
1578lck_rw_assert(
1579 lck_rw_t *lck,
1580 unsigned int type)
1581{
1582 switch (type) {
1583 case LCK_RW_ASSERT_SHARED:
1584 if (lck->lck_rw_shared_count != 0) {
1585 return;
1586 }
1587 break;
1588 case LCK_RW_ASSERT_EXCLUSIVE:
1589 if ((lck->lck_rw_want_write ||
1590 lck->lck_rw_want_upgrade) &&
1591 lck->lck_rw_shared_count == 0) {
1592 return;
1593 }
1594 break;
1595 case LCK_RW_ASSERT_HELD:
1596 if (lck->lck_rw_want_write ||
1597 lck->lck_rw_want_upgrade ||
1598 lck->lck_rw_shared_count != 0) {
1599 return;
1600 }
1601 break;
1602 default:
1603 break;
1604 }
1605
b0d623f7 1606 panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
2d21ac55
A
1607}
1608
91447636
A
1609/*
1610 * Routine: lck_mtx_alloc_init
1611 */
1612lck_mtx_t *
1613lck_mtx_alloc_init(
1614 lck_grp_t *grp,
1615 lck_attr_t *attr)
1616{
1617 lck_mtx_t *lck;
1618
1619 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
1620 lck_mtx_init(lck, grp, attr);
1621
1622 return(lck);
1623}
1624
1625/*
1626 * Routine: lck_mtx_free
1627 */
1628void
1629lck_mtx_free(
1630 lck_mtx_t *lck,
1631 lck_grp_t *grp)
1632{
1633 lck_mtx_destroy(lck, grp);
1634 kfree(lck, sizeof(lck_mtx_t));
1635}
1636
1637/*
1638 * Routine: lck_mtx_ext_init
1639 */
1640static void
1641lck_mtx_ext_init(
1642 lck_mtx_ext_t *lck,
1643 lck_grp_t *grp,
1644 lck_attr_t *attr)
1645{
2d21ac55 1646 bzero((void *)lck, sizeof(lck_mtx_ext_t));
91447636
A
1647
1648 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636
A
1649 lck->lck_mtx_deb.type = MUTEX_TAG;
1650 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1651 }
1652
1653 lck->lck_mtx_grp = grp;
2d21ac55
A
1654
1655 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
1656 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
b0d623f7
A
1657
1658 lck->lck_mtx.lck_mtx_ptr = (void *)LCK_MTX_PTR_EXTENDED;
91447636
A
1659}
1660
1661/*
1662 * Routine: lck_mtx_init
1663 */
1664void
1665lck_mtx_init(
1666 lck_mtx_t *lck,
1667 lck_grp_t *grp,
1668 lck_attr_t *attr)
1669{
1670 lck_mtx_ext_t *lck_ext;
2d21ac55
A
1671 lck_attr_t *lck_attr;
1672
1673 if (attr != LCK_ATTR_NULL)
1674 lck_attr = attr;
1675 else
1676 lck_attr = &LockDefaultLckAttr;
91447636 1677
2d21ac55 1678 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636 1679 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2d21ac55 1680 lck_mtx_ext_init(lck_ext, grp, lck_attr);
91447636
A
1681 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1682 lck->lck_mtx_ptr = lck_ext;
b0d623f7 1683 lck->lck_mtx_ilocked = 1;
91447636
A
1684 }
1685 } else {
b0d623f7
A
1686 lck->lck_mtx_owner = 0;
1687 lck->lck_mtx_ptr = 0;
91447636
A
1688 lck->lck_mtx_waiters = 0;
1689 lck->lck_mtx_pri = 0;
b0d623f7
A
1690 lck->lck_mtx_ilocked = 0;
1691 lck->lck_mtx_mlocked = 0;
1692 lck->lck_mtx_promoted = 0;
1693 lck->lck_mtx_spin = 0;
91447636
A
1694 }
1695 lck_grp_reference(grp);
1696 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1697}
1698
2d21ac55
A
1699/*
1700 * Routine: lck_mtx_init_ext
1701 */
1702void
1703lck_mtx_init_ext(
1704 lck_mtx_t *lck,
1705 lck_mtx_ext_t *lck_ext,
1706 lck_grp_t *grp,
1707 lck_attr_t *attr)
1708{
1709 lck_attr_t *lck_attr;
1710
1711 if (attr != LCK_ATTR_NULL)
1712 lck_attr = attr;
1713 else
1714 lck_attr = &LockDefaultLckAttr;
1715
1716 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1717 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1718 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1719 lck->lck_mtx_ptr = lck_ext;
b0d623f7 1720 lck->lck_mtx_ilocked = 1;
2d21ac55 1721 } else {
b0d623f7
A
1722 lck->lck_mtx_owner = 0;
1723 lck->lck_mtx_ptr = 0;
2d21ac55
A
1724 lck->lck_mtx_waiters = 0;
1725 lck->lck_mtx_pri = 0;
b0d623f7
A
1726 lck->lck_mtx_ilocked = 0;
1727 lck->lck_mtx_mlocked = 0;
1728 lck->lck_mtx_promoted = 0;
1729 lck->lck_mtx_spin = 0;
2d21ac55
A
1730 }
1731 lck_grp_reference(grp);
1732 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1733}
1734
91447636
A
1735/*
1736 * Routine: lck_mtx_destroy
1737 */
1738void
1739lck_mtx_destroy(
1740 lck_mtx_t *lck,
1741 lck_grp_t *grp)
1742{
1743 boolean_t lck_is_indirect;
1744
1745 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
1746 return;
1747 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
b0d623f7
A
1748
1749 lck_mtx_lock_mark_destroyed(lck);
1750
91447636
A
1751 if (lck_is_indirect)
1752 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
1753 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1754 lck_grp_deallocate(grp);
1755 return;
1756}
1757
b0d623f7
A
1758
1759#define LCK_MTX_LCK_WAIT_CODE 0x20
1760#define LCK_MTX_LCK_WAKEUP_CODE 0x21
1761#define LCK_MTX_LCK_SPIN_CODE 0x22
1762#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1763#define LCK_MTX_LCK_DEMOTE_CODE 0x24
1764
1765
1766/*
1767 * Routine: lck_mtx_unlock_wakeup_x86
1768 *
1769 * Invoked on unlock when there is contention.
1770 *
1771 */
1772void
1773lck_mtx_unlock_wakeup_x86 (
1774 lck_mtx_t *mutex,
1775 int owner_was_promoted)
1776{
1777
1778 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START, (int)mutex, owner_was_promoted, mutex->lck_mtx_waiters, 0, 0);
1779
1780 if (lck_mtx_lock_decr_waiter(mutex))
1781 thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
1782
1783 if (owner_was_promoted) {
1784 thread_t thread = current_thread();
1785
1786
1787 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), thread->promotions,
1788 thread->sched_mode & TH_MODE_PROMOTED, 0, 0);
1789
1790 if (thread->promotions > 0) {
1791 spl_t s = splsched();
1792
1793 thread_lock(thread);
1794
1795 if (--thread->promotions == 0 && (thread->sched_mode & TH_MODE_PROMOTED)) {
1796
1797 thread->sched_mode &= ~TH_MODE_PROMOTED;
1798
1799 if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
1800 KERNEL_DEBUG_CONSTANT(
1801 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
1802 thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
1803
1804 set_sched_pri(thread, DEPRESSPRI);
1805 }
1806 else {
1807 if (thread->priority < thread->sched_pri) {
1808 KERNEL_DEBUG_CONSTANT(
1809 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
1810 thread->sched_pri, thread->priority, 0, mutex, 0);
1811
1812 compute_priority(thread, FALSE);
1813 }
1814 }
1815 }
1816 thread_unlock(thread);
1817 splx(s);
1818 }
1819 }
1820 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1821}
1822
1823
1824/*
1825 * Routine: lck_mtx_lock_acquire_x86
1826 *
1827 * Invoked on acquiring the mutex when there is
1828 * contention.
1829 * mutex is owned... interlock is not held
1830 */
1831void
1832lck_mtx_lock_acquire_x86(
1833 lck_mtx_t *mutex)
1834{
1835 thread_t thread = current_thread();
1836 integer_t priority;
1837
1838 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1839
1840 priority = lck_mtx_lock_get_pri(mutex);
1841
1842 if (thread->sched_pri < priority) {
1843
1844 if (lck_mtx_lock_mark_promoted(mutex)) {
1845 spl_t s = splsched();
1846
1847 thread_lock(thread);
1848
1849 if (thread->sched_pri < priority) {
1850
1851 KERNEL_DEBUG_CONSTANT(
1852 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
1853 thread->sched_pri, priority, 0, mutex, 0);
1854
1855 set_sched_pri(thread, priority);
1856 }
1857 thread->promotions++;
1858 thread->sched_mode |= TH_MODE_PROMOTED;
1859
1860 thread_unlock(thread);
1861 splx(s);
1862 }
1863 }
1864 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1865}
1866
1867
1868
91447636 1869/*
b0d623f7 1870 * Routine: lck_mtx_lock_spinwait_x86
0c530ab8
A
1871 *
1872 * Invoked trying to acquire a mutex when there is contention but
1873 * the holder is running on another processor. We spin for up to a maximum
1874 * time waiting for the lock to be released.
1875 *
1876 * Called with the interlock unlocked.
1877 */
b0d623f7
A
1878int
1879lck_mtx_lock_spinwait_x86(
1880 lck_mtx_t *mutex)
0c530ab8 1881{
b0d623f7
A
1882 thread_t holder;
1883 uint64_t deadline;
1884 int retval = 1;
1885 int loopcount = 0;
0c530ab8 1886
2d21ac55 1887 KERNEL_DEBUG(
b0d623f7
A
1888 MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1889 (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
0c530ab8
A
1890
1891 deadline = mach_absolute_time() + MutexSpin;
b0d623f7 1892
0c530ab8
A
1893 /*
1894 * Spin while:
1895 * - mutex is locked, and
b0d623f7 1896 * - its locked as a spin lock, and
0c530ab8 1897 * - owner is running on another processor, and
2d21ac55 1898 * - owner (processor) is not idling, and
0c530ab8
A
1899 * - we haven't spun for long enough.
1900 */
b0d623f7
A
1901 do {
1902 if (lck_mtx_lock_grab_mutex(mutex)) {
1903 retval = 0;
1904 break;
2d21ac55 1905 }
b0d623f7
A
1906 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
1907
1908 if ( !(holder->machine.specFlags & OnProc) ||
1909 (holder->state & TH_IDLE)) {
1910 if (loopcount == 0)
1911 retval = 2;
1912 break;
1913 }
1914 }
1915 cpu_pause();
1916
1917 loopcount++;
1918
1919 } while (mach_absolute_time() < deadline);
1920
1921
2d21ac55
A
1922#if CONFIG_DTRACE
1923 /*
1924 * We've already kept a count via deadline of how long we spun.
1925 * If dtrace is active, then we compute backwards to decide how
1926 * long we spun.
1927 *
1928 * Note that we record a different probe id depending on whether
1929 * this is a direct or indirect mutex. This allows us to
1930 * penalize only lock groups that have debug/stats enabled
1931 * with dtrace processing if desired.
1932 */
b0d623f7
A
1933 if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) {
1934 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
2d21ac55
A
1935 mach_absolute_time() - (deadline - MutexSpin));
1936 } else {
b0d623f7 1937 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
2d21ac55
A
1938 mach_absolute_time() - (deadline - MutexSpin));
1939 }
1940 /* The lockstat acquire event is recorded by the assembly code beneath us. */
1941#endif
b0d623f7
A
1942
1943 KERNEL_DEBUG(
1944 MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
1945 (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
1946
1947 return retval;
0c530ab8
A
1948}
1949
b0d623f7
A
1950
1951
0c530ab8 1952/*
b0d623f7
A
1953 * Routine: lck_mtx_lock_wait_x86
1954 *
1955 * Invoked in order to wait on contention.
1956 *
1957 * Called with the interlock locked and
1958 * returns it unlocked.
0c530ab8
A
1959 */
1960void
b0d623f7
A
1961lck_mtx_lock_wait_x86 (
1962 lck_mtx_t *mutex)
0c530ab8 1963{
b0d623f7
A
1964 thread_t self = current_thread();
1965 thread_t holder;
1966 integer_t priority;
1967 integer_t old_lck_mtx_pri;
1968 spl_t s;
1969#if CONFIG_DTRACE
1970 uint64_t sleep_start = 0;
1971
1972 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1973 sleep_start = mach_absolute_time();
1974 }
1975#endif
1976 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
1977
1978 priority = self->sched_pri;
1979
1980 if (priority < self->priority)
1981 priority = self->priority;
1982 if (priority < BASEPRI_DEFAULT)
1983 priority = BASEPRI_DEFAULT;
1984
1985 if (mutex->lck_mtx_waiters == 0)
1986 old_lck_mtx_pri = 0;
1987 else
1988 old_lck_mtx_pri = mutex->lck_mtx_pri;
1989
1990 if (old_lck_mtx_pri < priority)
1991 mutex->lck_mtx_pri = priority;
1992
1993 if ( (holder = (thread_t)mutex->lck_mtx_owner) ) {
1994
1995 s = splsched();
1996 thread_lock(holder);
1997
1998 if (holder->sched_pri < priority) {
1999 KERNEL_DEBUG_CONSTANT(
2000 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
2001 holder->sched_pri, priority, holder, mutex, 0);
2002
2003 set_sched_pri(holder, priority);
2004
2005 if (mutex->lck_mtx_promoted == 0) {
2006 holder->promotions++;
2007 holder->sched_mode |= TH_MODE_PROMOTED;
2008
2009 mutex->lck_mtx_promoted = 1;
2010 }
2011 }
2012 thread_unlock(holder);
2013 splx(s);
2014 }
2015 mutex->lck_mtx_waiters++;
2016
2017 assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
2018
2019 lck_mtx_ilk_unlock(mutex);
2020
2021 thread_block(THREAD_CONTINUE_NULL);
2022
2023 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
2024
2025#if CONFIG_DTRACE
2026 /*
2027 * Record the Dtrace lockstat probe for blocking, block time
2028 * measured from when we were entered.
2029 */
2030 if (sleep_start) {
2031 if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) {
2032 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2033 mach_absolute_time() - sleep_start);
2034 } else {
2035 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2036 mach_absolute_time() - sleep_start);
2037 }
2038 }
2039#endif
0c530ab8
A
2040}
2041
91447636 2042
2d21ac55 2043#if MACH_KDB
91447636
A
2044
2045void
2046db_show_one_lock(
2047 lock_t *lock)
2048{
2049 db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
2d21ac55
A
2050 lock->lck_rw_shared_count,
2051 lock->lck_rw_want_upgrade ? "" : "!",
2052 lock->lck_rw_want_write ? "" : "!");
91447636 2053 db_printf("%swaiting, %scan_sleep\n",
2d21ac55
A
2054 (lock->lck_r_waiting || lock->lck_w_waiting) ? "" : "!",
2055 lock->lck_rw_can_sleep ? "" : "!");
91447636 2056 db_printf("Interlock:\n");
2d21ac55 2057 db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->lck_rw_interlock)),
91447636
A
2058 TRUE, (db_expr_t)0, (char *)0);
2059}
2060
91447636
A
2061/*
2062 * Routines to print out simple_locks and mutexes in a nicely-formatted
2063 * fashion.
2064 */
2065
0c530ab8 2066const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER";
91447636
A
2067
2068void
2069db_show_one_simple_lock (
2070 db_expr_t addr,
2071 boolean_t have_addr,
0c530ab8
A
2072 __unused db_expr_t count,
2073 __unused char * modif)
91447636 2074{
0c530ab8 2075 simple_lock_t saddr = (simple_lock_t) ((vm_offset_t) addr);
91447636
A
2076
2077 if (saddr == (simple_lock_t)0 || !have_addr) {
2078 db_error ("No simple_lock\n");
2079 }
2080#if USLOCK_DEBUG
2081 else if (saddr->lock_type != USLOCK_TAG)
2082 db_error ("Not a simple_lock\n");
2083#endif /* USLOCK_DEBUG */
2084
2085 db_printf ("%s\n", simple_lock_labels);
2086 db_print_simple_lock (saddr);
2087}
2088
2089void
2090db_print_simple_lock (
2091 simple_lock_t addr)
2092{
2093
2094 db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
2095#if USLOCK_DEBUG
2096 db_printf (" %08x", addr->debug.lock_thread);
2097 db_printf (" %08x ", addr->debug.duration[1]);
2098 db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
2099#endif /* USLOCK_DEBUG */
2100 db_printf ("\n");
2101}
2102
91447636 2103#endif /* MACH_KDB */