]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/locks_i386.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64 #include <mach_kdb.h>
65 #include <mach_ldebug.h>
66
67 #include <kern/lock.h>
68 #include <kern/locks.h>
69 #include <kern/kalloc.h>
70 #include <kern/misc_protos.h>
71 #include <kern/thread.h>
72 #include <kern/processor.h>
73 #include <kern/cpu_data.h>
74 #include <kern/cpu_number.h>
75 #include <kern/sched_prim.h>
76 #include <kern/xpr.h>
77 #include <kern/debug.h>
78 #include <string.h>
79
80 #if MACH_KDB
81 #include <ddb/db_command.h>
82 #include <ddb/db_output.h>
83 #include <ddb/db_sym.h>
84 #include <ddb/db_print.h>
85 #endif /* MACH_KDB */
86 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
87 #include <machine/machine_cpu.h>
88 #include <i386/mp.h>
89
90 #include <sys/kdebug.h>
91
92 /*
93 * We need only enough declarations from the BSD-side to be able to
94 * test if our probe is active, and to call __dtrace_probe(). Setting
95 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
96 */
97 #if CONFIG_DTRACE
98 #define NEED_DTRACE_DEFS
99 #include <../bsd/sys/lockstat.h>
100 #endif
101
102 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
103 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
104 #define LCK_RW_LCK_SHARED_CODE 0x102
105 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
106 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
107 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
108
109 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
110 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
111 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
112 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
113 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
114 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
115 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
116 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
117
118
119 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
120
121 unsigned int LcksOpts=0;
122
123 /* Forwards */
124
125 #if MACH_KDB
126 void db_print_simple_lock(
127 simple_lock_t addr);
128 #endif /* MACH_KDB */
129
130
131 #if USLOCK_DEBUG
132 /*
133 * Perform simple lock checks.
134 */
135 int uslock_check = 1;
136 int max_lock_loops = 100000000;
137 decl_simple_lock_data(extern , printf_lock)
138 decl_simple_lock_data(extern , panic_lock)
139 #endif /* USLOCK_DEBUG */
140
141
142 /*
143 * We often want to know the addresses of the callers
144 * of the various lock routines. However, this information
145 * is only used for debugging and statistics.
146 */
147 typedef void *pc_t;
148 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
149 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
150 #if ANY_LOCK_DEBUG
151 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
152 #define DECL_PC(pc) pc_t pc;
153 #else /* ANY_LOCK_DEBUG */
154 #define DECL_PC(pc)
155 #ifdef lint
156 /*
157 * Eliminate lint complaints about unused local pc variables.
158 */
159 #define OBTAIN_PC(pc) ++pc
160 #else /* lint */
161 #define OBTAIN_PC(pc)
162 #endif /* lint */
163 #endif /* USLOCK_DEBUG */
164
165
166 /*
167 * Portable lock package implementation of usimple_locks.
168 */
169
170 #if USLOCK_DEBUG
171 #define USLDBG(stmt) stmt
172 void usld_lock_init(usimple_lock_t, unsigned short);
173 void usld_lock_pre(usimple_lock_t, pc_t);
174 void usld_lock_post(usimple_lock_t, pc_t);
175 void usld_unlock(usimple_lock_t, pc_t);
176 void usld_lock_try_pre(usimple_lock_t, pc_t);
177 void usld_lock_try_post(usimple_lock_t, pc_t);
178 int usld_lock_common_checks(usimple_lock_t, char *);
179 #else /* USLOCK_DEBUG */
180 #define USLDBG(stmt)
181 #endif /* USLOCK_DEBUG */
182
183
184 extern int lck_rw_grab_want(lck_rw_t *lck);
185 extern int lck_rw_grab_shared(lck_rw_t *lck);
186 extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
187
188
189 /*
190 * Forward definitions
191 */
192
193 void lck_rw_lock_shared_gen(
194 lck_rw_t *lck);
195
196 void lck_rw_lock_exclusive_gen(
197 lck_rw_t *lck);
198
199 boolean_t lck_rw_lock_shared_to_exclusive_success(
200 lck_rw_t *lck);
201
202 boolean_t lck_rw_lock_shared_to_exclusive_failure(
203 lck_rw_t *lck,
204 int prior_lock_state);
205
206 void lck_rw_lock_exclusive_to_shared_gen(
207 lck_rw_t *lck,
208 int prior_lock_state);
209
210 lck_rw_type_t lck_rw_done_gen(
211 lck_rw_t *lck,
212 int prior_lock_state);
213
214
215 /*
216 * Routine: lck_spin_alloc_init
217 */
218 lck_spin_t *
219 lck_spin_alloc_init(
220 lck_grp_t *grp,
221 lck_attr_t *attr)
222 {
223 lck_spin_t *lck;
224
225 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
226 lck_spin_init(lck, grp, attr);
227
228 return(lck);
229 }
230
231 /*
232 * Routine: lck_spin_free
233 */
234 void
235 lck_spin_free(
236 lck_spin_t *lck,
237 lck_grp_t *grp)
238 {
239 lck_spin_destroy(lck, grp);
240 kfree(lck, sizeof(lck_spin_t));
241 }
242
243 /*
244 * Routine: lck_spin_init
245 */
246 void
247 lck_spin_init(
248 lck_spin_t *lck,
249 lck_grp_t *grp,
250 __unused lck_attr_t *attr)
251 {
252 usimple_lock_init((usimple_lock_t) lck, 0);
253 lck_grp_reference(grp);
254 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
255 }
256
257 /*
258 * Routine: lck_spin_destroy
259 */
260 void
261 lck_spin_destroy(
262 lck_spin_t *lck,
263 lck_grp_t *grp)
264 {
265 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
266 return;
267 lck->interlock = LCK_SPIN_TAG_DESTROYED;
268 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
269 lck_grp_deallocate(grp);
270 return;
271 }
272
273 /*
274 * Routine: lck_spin_lock
275 */
276 void
277 lck_spin_lock(
278 lck_spin_t *lck)
279 {
280 usimple_lock((usimple_lock_t) lck);
281 }
282
283 /*
284 * Routine: lck_spin_unlock
285 */
286 void
287 lck_spin_unlock(
288 lck_spin_t *lck)
289 {
290 usimple_unlock((usimple_lock_t) lck);
291 }
292
293
294 /*
295 * Routine: lck_spin_try_lock
296 */
297 boolean_t
298 lck_spin_try_lock(
299 lck_spin_t *lck)
300 {
301 return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
302 }
303
304 /*
305 * Initialize a usimple_lock.
306 *
307 * No change in preemption state.
308 */
309 void
310 usimple_lock_init(
311 usimple_lock_t l,
312 __unused unsigned short tag)
313 {
314 #ifndef MACHINE_SIMPLE_LOCK
315 USLDBG(usld_lock_init(l, tag));
316 hw_lock_init(&l->interlock);
317 #else
318 simple_lock_init((simple_lock_t)l,tag);
319 #endif
320 }
321
322 volatile uint32_t spinlock_owner_cpu = ~0;
323 volatile usimple_lock_t spinlock_timed_out;
324
325 static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
326 uint64_t deadline;
327 uint32_t i;
328
329 for (i = 0; i < real_ncpus; i++) {
330 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
331 spinlock_owner_cpu = i;
332 if ((uint32_t)cpu_number() == i)
333 break;
334 cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
335 cpu_NMI_interrupt(i);
336 deadline = mach_absolute_time() + (LockTimeOut * 2);
337 while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
338 cpu_pause();
339 break;
340 }
341 }
342
343 return spinlock_owner_cpu;
344 }
345
346 /*
347 * Acquire a usimple_lock.
348 *
349 * Returns with preemption disabled. Note
350 * that the hw_lock routines are responsible for
351 * maintaining preemption state.
352 */
353 void
354 usimple_lock(
355 usimple_lock_t l)
356 {
357 #ifndef MACHINE_SIMPLE_LOCK
358 DECL_PC(pc);
359
360 OBTAIN_PC(pc);
361 USLDBG(usld_lock_pre(l, pc));
362 /* Try to get the lock with a timeout */
363 if(!hw_lock_to(&l->interlock, LockTimeOutTSC)) {
364 boolean_t uslock_acquired = FALSE;
365 while (machine_timeout_suspended()) {
366 enable_preemption();
367 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
368 break;
369 }
370 if (uslock_acquired == FALSE) {
371 uint32_t lock_cpu;
372 spinlock_timed_out = l;
373 lock_cpu = spinlock_timeout_NMI((uintptr_t)l->interlock.lock_data);
374 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l, (uintptr_t)l->interlock.lock_data, current_thread(), lock_cpu);
375 }
376 }
377 USLDBG(usld_lock_post(l, pc));
378 #else
379 simple_lock((simple_lock_t)l);
380 #endif
381 }
382
383
384 /*
385 * Release a usimple_lock.
386 *
387 * Returns with preemption enabled. Note
388 * that the hw_lock routines are responsible for
389 * maintaining preemption state.
390 */
391 void
392 usimple_unlock(
393 usimple_lock_t l)
394 {
395 #ifndef MACHINE_SIMPLE_LOCK
396 DECL_PC(pc);
397
398 OBTAIN_PC(pc);
399 USLDBG(usld_unlock(l, pc));
400 hw_lock_unlock(&l->interlock);
401 #else
402 simple_unlock_rwmb((simple_lock_t)l);
403 #endif
404 }
405
406
407 /*
408 * Conditionally acquire a usimple_lock.
409 *
410 * On success, returns with preemption disabled.
411 * On failure, returns with preemption in the same state
412 * as when first invoked. Note that the hw_lock routines
413 * are responsible for maintaining preemption state.
414 *
415 * XXX No stats are gathered on a miss; I preserved this
416 * behavior from the original assembly-language code, but
417 * doesn't it make sense to log misses? XXX
418 */
419 unsigned int
420 usimple_lock_try(
421 usimple_lock_t l)
422 {
423 #ifndef MACHINE_SIMPLE_LOCK
424 unsigned int success;
425 DECL_PC(pc);
426
427 OBTAIN_PC(pc);
428 USLDBG(usld_lock_try_pre(l, pc));
429 if ((success = hw_lock_try(&l->interlock))) {
430 USLDBG(usld_lock_try_post(l, pc));
431 }
432 return success;
433 #else
434 return(simple_lock_try((simple_lock_t)l));
435 #endif
436 }
437
438 #if USLOCK_DEBUG
439 /*
440 * States of a usimple_lock. The default when initializing
441 * a usimple_lock is setting it up for debug checking.
442 */
443 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
444 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
445 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
446 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
447 #define USLOCK_CHECKING(l) (uslock_check && \
448 ((l)->debug.state & USLOCK_CHECKED))
449
450 /*
451 * Trace activities of a particularly interesting lock.
452 */
453 void usl_trace(usimple_lock_t, int, pc_t, const char *);
454
455
456 /*
457 * Initialize the debugging information contained
458 * in a usimple_lock.
459 */
460 void
461 usld_lock_init(
462 usimple_lock_t l,
463 __unused unsigned short tag)
464 {
465 if (l == USIMPLE_LOCK_NULL)
466 panic("lock initialization: null lock pointer");
467 l->lock_type = USLOCK_TAG;
468 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
469 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
470 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
471 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
472 l->debug.duration[0] = l->debug.duration[1] = 0;
473 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
474 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
475 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
476 }
477
478
479 /*
480 * These checks apply to all usimple_locks, not just
481 * those with USLOCK_CHECKED turned on.
482 */
483 int
484 usld_lock_common_checks(
485 usimple_lock_t l,
486 char *caller)
487 {
488 if (l == USIMPLE_LOCK_NULL)
489 panic("%s: null lock pointer", caller);
490 if (l->lock_type != USLOCK_TAG)
491 panic("%s: 0x%p is not a usimple lock", caller, l);
492 if (!(l->debug.state & USLOCK_INIT))
493 panic("%s: %p is not an initialized lock",
494 caller, l);
495 return USLOCK_CHECKING(l);
496 }
497
498
499 /*
500 * Debug checks on a usimple_lock just before attempting
501 * to acquire it.
502 */
503 /* ARGSUSED */
504 void
505 usld_lock_pre(
506 usimple_lock_t l,
507 pc_t pc)
508 {
509 char caller[] = "usimple_lock";
510
511
512 if (!usld_lock_common_checks(l, caller))
513 return;
514
515 /*
516 * Note that we have a weird case where we are getting a lock when we are]
517 * in the process of putting the system to sleep. We are running with no
518 * current threads, therefore we can't tell if we are trying to retake a lock
519 * we have or someone on the other processor has it. Therefore we just
520 * ignore this test if the locking thread is 0.
521 */
522
523 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
524 l->debug.lock_thread == (void *) current_thread()) {
525 printf("%s: lock %p already locked (at %p) by",
526 caller, l, l->debug.lock_pc);
527 printf(" current thread %p (new attempt at pc %p)\n",
528 l->debug.lock_thread, pc);
529 panic("%s", caller);
530 }
531 mp_disable_preemption();
532 usl_trace(l, cpu_number(), pc, caller);
533 mp_enable_preemption();
534 }
535
536
537 /*
538 * Debug checks on a usimple_lock just after acquiring it.
539 *
540 * Pre-emption has been disabled at this point,
541 * so we are safe in using cpu_number.
542 */
543 void
544 usld_lock_post(
545 usimple_lock_t l,
546 pc_t pc)
547 {
548 register int mycpu;
549 char caller[] = "successful usimple_lock";
550
551
552 if (!usld_lock_common_checks(l, caller))
553 return;
554
555 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
556 panic("%s: lock %p became uninitialized",
557 caller, l);
558 if ((l->debug.state & USLOCK_TAKEN))
559 panic("%s: lock 0x%p became TAKEN by someone else",
560 caller, l);
561
562 mycpu = cpu_number();
563 l->debug.lock_thread = (void *)current_thread();
564 l->debug.state |= USLOCK_TAKEN;
565 l->debug.lock_pc = pc;
566 l->debug.lock_cpu = mycpu;
567
568 usl_trace(l, mycpu, pc, caller);
569 }
570
571
572 /*
573 * Debug checks on a usimple_lock just before
574 * releasing it. Note that the caller has not
575 * yet released the hardware lock.
576 *
577 * Preemption is still disabled, so there's
578 * no problem using cpu_number.
579 */
580 void
581 usld_unlock(
582 usimple_lock_t l,
583 pc_t pc)
584 {
585 register int mycpu;
586 char caller[] = "usimple_unlock";
587
588
589 if (!usld_lock_common_checks(l, caller))
590 return;
591
592 mycpu = cpu_number();
593
594 if (!(l->debug.state & USLOCK_TAKEN))
595 panic("%s: lock 0x%p hasn't been taken",
596 caller, l);
597 if (l->debug.lock_thread != (void *) current_thread())
598 panic("%s: unlocking lock 0x%p, owned by thread %p",
599 caller, l, l->debug.lock_thread);
600 if (l->debug.lock_cpu != mycpu) {
601 printf("%s: unlocking lock 0x%p on cpu 0x%x",
602 caller, l, mycpu);
603 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
604 panic("%s", caller);
605 }
606 usl_trace(l, mycpu, pc, caller);
607
608 l->debug.unlock_thread = l->debug.lock_thread;
609 l->debug.lock_thread = INVALID_PC;
610 l->debug.state &= ~USLOCK_TAKEN;
611 l->debug.unlock_pc = pc;
612 l->debug.unlock_cpu = mycpu;
613 }
614
615
616 /*
617 * Debug checks on a usimple_lock just before
618 * attempting to acquire it.
619 *
620 * Preemption isn't guaranteed to be disabled.
621 */
622 void
623 usld_lock_try_pre(
624 usimple_lock_t l,
625 pc_t pc)
626 {
627 char caller[] = "usimple_lock_try";
628
629 if (!usld_lock_common_checks(l, caller))
630 return;
631 mp_disable_preemption();
632 usl_trace(l, cpu_number(), pc, caller);
633 mp_enable_preemption();
634 }
635
636
637 /*
638 * Debug checks on a usimple_lock just after
639 * successfully attempting to acquire it.
640 *
641 * Preemption has been disabled by the
642 * lock acquisition attempt, so it's safe
643 * to use cpu_number.
644 */
645 void
646 usld_lock_try_post(
647 usimple_lock_t l,
648 pc_t pc)
649 {
650 register int mycpu;
651 char caller[] = "successful usimple_lock_try";
652
653 if (!usld_lock_common_checks(l, caller))
654 return;
655
656 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
657 panic("%s: lock 0x%p became uninitialized",
658 caller, l);
659 if ((l->debug.state & USLOCK_TAKEN))
660 panic("%s: lock 0x%p became TAKEN by someone else",
661 caller, l);
662
663 mycpu = cpu_number();
664 l->debug.lock_thread = (void *) current_thread();
665 l->debug.state |= USLOCK_TAKEN;
666 l->debug.lock_pc = pc;
667 l->debug.lock_cpu = mycpu;
668
669 usl_trace(l, mycpu, pc, caller);
670 }
671
672
673 /*
674 * For very special cases, set traced_lock to point to a
675 * specific lock of interest. The result is a series of
676 * XPRs showing lock operations on that lock. The lock_seq
677 * value is used to show the order of those operations.
678 */
679 usimple_lock_t traced_lock;
680 unsigned int lock_seq;
681
682 void
683 usl_trace(
684 usimple_lock_t l,
685 int mycpu,
686 pc_t pc,
687 const char * op_name)
688 {
689 if (traced_lock == l) {
690 XPR(XPR_SLOCK,
691 "seq %d, cpu %d, %s @ %x\n",
692 (uintptr_t) lock_seq, (uintptr_t) mycpu,
693 (uintptr_t) op_name, (uintptr_t) pc, 0);
694 lock_seq++;
695 }
696 }
697
698
699 #endif /* USLOCK_DEBUG */
700
701 /*
702 * Routine: lock_alloc
703 * Function:
704 * Allocate a lock for external users who cannot
705 * hard-code the structure definition into their
706 * objects.
707 * For now just use kalloc, but a zone is probably
708 * warranted.
709 */
710 lock_t *
711 lock_alloc(
712 boolean_t can_sleep,
713 unsigned short tag,
714 unsigned short tag1)
715 {
716 lock_t *l;
717
718 if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
719 lock_init(l, can_sleep, tag, tag1);
720 return(l);
721 }
722
723 /*
724 * Routine: lock_free
725 * Function:
726 * Free a lock allocated for external users.
727 * For now just use kfree, but a zone is probably
728 * warranted.
729 */
730 void
731 lock_free(
732 lock_t *l)
733 {
734 kfree(l, sizeof(lock_t));
735 }
736
737
738 /*
739 * Routine: lock_init
740 * Function:
741 * Initialize a lock; required before use.
742 * Note that clients declare the "struct lock"
743 * variables and then initialize them, rather
744 * than getting a new one from this module.
745 */
746 void
747 lock_init(
748 lock_t *l,
749 boolean_t can_sleep,
750 __unused unsigned short tag,
751 __unused unsigned short tag1)
752 {
753 hw_lock_byte_init(&l->lck_rw_interlock);
754 l->lck_rw_want_write = FALSE;
755 l->lck_rw_want_upgrade = FALSE;
756 l->lck_rw_shared_count = 0;
757 l->lck_rw_can_sleep = can_sleep;
758 l->lck_rw_tag = tag;
759 l->lck_rw_priv_excl = 1;
760 l->lck_r_waiting = l->lck_w_waiting = 0;
761 }
762
763
764 /*
765 * Sleep locks. These use the same data structure and algorithm
766 * as the spin locks, but the process sleeps while it is waiting
767 * for the lock. These work on uniprocessor systems.
768 */
769
770 #define DECREMENTER_TIMEOUT 1000000
771
772 void
773 lock_write(
774 register lock_t * l)
775 {
776 lck_rw_lock_exclusive(l);
777 }
778
779 void
780 lock_done(
781 register lock_t * l)
782 {
783 (void) lck_rw_done(l);
784 }
785
786 void
787 lock_read(
788 register lock_t * l)
789 {
790 lck_rw_lock_shared(l);
791 }
792
793
794 /*
795 * Routine: lock_read_to_write
796 * Function:
797 * Improves a read-only lock to one with
798 * write permission. If another reader has
799 * already requested an upgrade to a write lock,
800 * no lock is held upon return.
801 *
802 * Returns FALSE if the upgrade *failed*.
803 */
804
805 boolean_t
806 lock_read_to_write(
807 register lock_t * l)
808 {
809 return lck_rw_lock_shared_to_exclusive(l);
810 }
811
812 void
813 lock_write_to_read(
814 register lock_t * l)
815 {
816 lck_rw_lock_exclusive_to_shared(l);
817 }
818
819
820
821 /*
822 * Routine: lck_rw_alloc_init
823 */
824 lck_rw_t *
825 lck_rw_alloc_init(
826 lck_grp_t *grp,
827 lck_attr_t *attr) {
828 lck_rw_t *lck;
829
830 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
831 bzero(lck, sizeof(lck_rw_t));
832 lck_rw_init(lck, grp, attr);
833 }
834
835 return(lck);
836 }
837
838 /*
839 * Routine: lck_rw_free
840 */
841 void
842 lck_rw_free(
843 lck_rw_t *lck,
844 lck_grp_t *grp) {
845 lck_rw_destroy(lck, grp);
846 kfree(lck, sizeof(lck_rw_t));
847 }
848
849 /*
850 * Routine: lck_rw_init
851 */
852 void
853 lck_rw_init(
854 lck_rw_t *lck,
855 lck_grp_t *grp,
856 lck_attr_t *attr)
857 {
858 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
859 attr : &LockDefaultLckAttr;
860
861 hw_lock_byte_init(&lck->lck_rw_interlock);
862 lck->lck_rw_want_write = FALSE;
863 lck->lck_rw_want_upgrade = FALSE;
864 lck->lck_rw_shared_count = 0;
865 lck->lck_rw_can_sleep = TRUE;
866 lck->lck_r_waiting = lck->lck_w_waiting = 0;
867 lck->lck_rw_tag = 0;
868 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
869 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
870
871 lck_grp_reference(grp);
872 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
873 }
874
875 /*
876 * Routine: lck_rw_destroy
877 */
878 void
879 lck_rw_destroy(
880 lck_rw_t *lck,
881 lck_grp_t *grp)
882 {
883 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
884 return;
885 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
886 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
887 lck_grp_deallocate(grp);
888 return;
889 }
890
891 /*
892 * Sleep locks. These use the same data structure and algorithm
893 * as the spin locks, but the process sleeps while it is waiting
894 * for the lock. These work on uniprocessor systems.
895 */
896
897 #define DECREMENTER_TIMEOUT 1000000
898
899 #define RW_LOCK_READER_EVENT(x) \
900 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
901
902 #define RW_LOCK_WRITER_EVENT(x) \
903 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
904
905 /*
906 * We need to disable interrupts while holding the mutex interlock
907 * to prevent an IPI intervening.
908 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
909 */
910 static boolean_t
911 lck_interlock_lock(lck_rw_t *lck)
912 {
913 boolean_t istate;
914
915 istate = ml_set_interrupts_enabled(FALSE);
916 hw_lock_byte_lock(&lck->lck_rw_interlock);
917
918 return istate;
919 }
920
921 static void
922 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
923 {
924 hw_lock_byte_unlock(&lck->lck_rw_interlock);
925 ml_set_interrupts_enabled(istate);
926 }
927
928 /*
929 * This inline is used when busy-waiting for an rw lock.
930 * If interrupts were disabled when the lock primitive was called,
931 * we poll the IPI handler for pending tlb flushes.
932 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
933 */
934 static inline void
935 lck_rw_lock_pause(boolean_t interrupts_enabled)
936 {
937 if (!interrupts_enabled)
938 handle_pending_TLB_flushes();
939 cpu_pause();
940 }
941
942
943 /*
944 * compute the deadline to spin against when
945 * waiting for a change of state on a lck_rw_t
946 */
947 static inline uint64_t
948 lck_rw_deadline_for_spin(lck_rw_t *lck)
949 {
950 if (lck->lck_rw_can_sleep) {
951 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
952 /*
953 * there are already threads waiting on this lock... this
954 * implies that they have spun beyond their deadlines waiting for
955 * the desired state to show up so we will not bother spinning at this time...
956 * or
957 * the current number of threads sharing this lock exceeds our capacity to run them
958 * concurrently and since all states we're going to spin for require the rw_shared_count
959 * to be at 0, we'll not bother spinning since the latency for this to happen is
960 * unpredictable...
961 */
962 return (mach_absolute_time());
963 }
964 return (mach_absolute_time() + MutexSpin);
965 } else
966 return (mach_absolute_time() + (100000LL * 1000000000LL));
967 }
968
969
970 /*
971 * Routine: lck_rw_lock_exclusive
972 */
973 void
974 lck_rw_lock_exclusive_gen(
975 lck_rw_t *lck)
976 {
977 uint64_t deadline = 0;
978 int slept = 0;
979 int gotlock = 0;
980 int lockheld = 0;
981 wait_result_t res = 0;
982 boolean_t istate = -1;
983
984 #if CONFIG_DTRACE
985 boolean_t dtrace_ls_initialized = FALSE;
986 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
987 uint64_t wait_interval = 0;
988 int readers_at_sleep = 0;
989 #endif
990
991 /*
992 * Try to acquire the lck_rw_want_write bit.
993 */
994 while ( !lck_rw_grab_want(lck)) {
995
996 #if CONFIG_DTRACE
997 if (dtrace_ls_initialized == FALSE) {
998 dtrace_ls_initialized = TRUE;
999 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1000 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1001 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1002 if (dtrace_ls_enabled) {
1003 /*
1004 * Either sleeping or spinning is happening,
1005 * start a timing of our delay interval now.
1006 */
1007 readers_at_sleep = lck->lck_rw_shared_count;
1008 wait_interval = mach_absolute_time();
1009 }
1010 }
1011 #endif
1012 if (istate == -1)
1013 istate = ml_get_interrupts_enabled();
1014
1015 deadline = lck_rw_deadline_for_spin(lck);
1016
1017 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1018
1019 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
1020 lck_rw_lock_pause(istate);
1021
1022 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
1023
1024 if (gotlock)
1025 break;
1026 /*
1027 * if we get here, the deadline has expired w/o us
1028 * being able to grab the lock exclusively
1029 * check to see if we're allowed to do a thread_block
1030 */
1031 if (lck->lck_rw_can_sleep) {
1032
1033 istate = lck_interlock_lock(lck);
1034
1035 if (lck->lck_rw_want_write) {
1036
1037 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1038
1039 lck->lck_w_waiting = TRUE;
1040
1041 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1042 lck_interlock_unlock(lck, istate);
1043
1044 if (res == THREAD_WAITING) {
1045 res = thread_block(THREAD_CONTINUE_NULL);
1046 slept++;
1047 }
1048 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1049 } else {
1050 lck->lck_rw_want_write = TRUE;
1051 lck_interlock_unlock(lck, istate);
1052 break;
1053 }
1054 }
1055 }
1056 /*
1057 * Wait for readers (and upgrades) to finish...
1058 * the test for these conditions must be done simultaneously with
1059 * a check of the interlock not being held since
1060 * the rw_shared_count will drop to 0 first and then want_upgrade
1061 * will be set to 1 in the shared_to_exclusive scenario... those
1062 * adjustments are done behind the interlock and represent an
1063 * atomic change in state and must be considered as such
1064 * however, once we see the read count at 0, the want_upgrade not set
1065 * and the interlock not held, we are safe to proceed
1066 */
1067 while (lck_rw_held_read_or_upgrade(lck)) {
1068
1069 #if CONFIG_DTRACE
1070 /*
1071 * Either sleeping or spinning is happening, start
1072 * a timing of our delay interval now. If we set it
1073 * to -1 we don't have accurate data so we cannot later
1074 * decide to record a dtrace spin or sleep event.
1075 */
1076 if (dtrace_ls_initialized == FALSE) {
1077 dtrace_ls_initialized = TRUE;
1078 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1079 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1080 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1081 if (dtrace_ls_enabled) {
1082 /*
1083 * Either sleeping or spinning is happening,
1084 * start a timing of our delay interval now.
1085 */
1086 readers_at_sleep = lck->lck_rw_shared_count;
1087 wait_interval = mach_absolute_time();
1088 }
1089 }
1090 #endif
1091 if (istate == -1)
1092 istate = ml_get_interrupts_enabled();
1093
1094 deadline = lck_rw_deadline_for_spin(lck);
1095
1096 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1097
1098 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
1099 lck_rw_lock_pause(istate);
1100
1101 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
1102
1103 if ( !lockheld)
1104 break;
1105 /*
1106 * if we get here, the deadline has expired w/o us
1107 * being able to grab the lock exclusively
1108 * check to see if we're allowed to do a thread_block
1109 */
1110 if (lck->lck_rw_can_sleep) {
1111
1112 istate = lck_interlock_lock(lck);
1113
1114 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
1115 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1116
1117 lck->lck_w_waiting = TRUE;
1118
1119 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1120 lck_interlock_unlock(lck, istate);
1121
1122 if (res == THREAD_WAITING) {
1123 res = thread_block(THREAD_CONTINUE_NULL);
1124 slept++;
1125 }
1126 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1127 } else {
1128 lck_interlock_unlock(lck, istate);
1129 /*
1130 * must own the lock now, since we checked for
1131 * readers or upgrade owner behind the interlock
1132 * no need for a call to 'lck_rw_held_read_or_upgrade'
1133 */
1134 break;
1135 }
1136 }
1137 }
1138
1139 #if CONFIG_DTRACE
1140 /*
1141 * Decide what latencies we suffered that are Dtrace events.
1142 * If we have set wait_interval, then we either spun or slept.
1143 * At least we get out from under the interlock before we record
1144 * which is the best we can do here to minimize the impact
1145 * of the tracing.
1146 * If we have set wait_interval to -1, then dtrace was not enabled when we
1147 * started sleeping/spinning so we don't record this event.
1148 */
1149 if (dtrace_ls_enabled == TRUE) {
1150 if (slept == 0) {
1151 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1152 mach_absolute_time() - wait_interval, 1);
1153 } else {
1154 /*
1155 * For the blocking case, we also record if when we blocked
1156 * it was held for read or write, and how many readers.
1157 * Notice that above we recorded this before we dropped
1158 * the interlock so the count is accurate.
1159 */
1160 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1161 mach_absolute_time() - wait_interval, 1,
1162 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1163 }
1164 }
1165 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1166 #endif
1167 }
1168
1169
1170 /*
1171 * Routine: lck_rw_done_gen
1172 *
1173 * called from the assembly language wrapper...
1174 * prior_lock_state is the value in the 1st
1175 * word of the lock at the time of a successful
1176 * atomic compare and exchange with the new value...
1177 * it represents the state of the lock before we
1178 * decremented the rw_shared_count or cleared either
1179 * rw_want_upgrade or rw_want_write and
1180 * the lck_x_waiting bits... since the wrapper
1181 * routine has already changed the state atomically,
1182 * we just need to decide if we should
1183 * wake up anyone and what value to return... we do
1184 * this by examining the state of the lock before
1185 * we changed it
1186 */
1187 lck_rw_type_t
1188 lck_rw_done_gen(
1189 lck_rw_t *lck,
1190 int prior_lock_state)
1191 {
1192 lck_rw_t *fake_lck;
1193 lck_rw_type_t lock_type;
1194
1195 /*
1196 * prior_lock state is a snapshot of the 1st word of the
1197 * lock in question... we'll fake up a pointer to it
1198 * and carefully not access anything beyond whats defined
1199 * in the first word of a lck_rw_t
1200 */
1201 fake_lck = (lck_rw_t *)&prior_lock_state;
1202
1203 if (fake_lck->lck_rw_shared_count <= 1) {
1204 if (fake_lck->lck_w_waiting)
1205 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1206
1207 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1208 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1209 }
1210 if (fake_lck->lck_rw_shared_count)
1211 lock_type = LCK_RW_TYPE_SHARED;
1212 else
1213 lock_type = LCK_RW_TYPE_EXCLUSIVE;
1214
1215 #if CONFIG_DTRACE
1216 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1217 #endif
1218
1219 return(lock_type);
1220 }
1221
1222
1223 /*
1224 * Routine: lck_rw_unlock
1225 */
1226 void
1227 lck_rw_unlock(
1228 lck_rw_t *lck,
1229 lck_rw_type_t lck_rw_type)
1230 {
1231 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1232 lck_rw_unlock_shared(lck);
1233 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1234 lck_rw_unlock_exclusive(lck);
1235 else
1236 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1237 }
1238
1239
1240 /*
1241 * Routine: lck_rw_unlock_shared
1242 */
1243 void
1244 lck_rw_unlock_shared(
1245 lck_rw_t *lck)
1246 {
1247 lck_rw_type_t ret;
1248
1249 ret = lck_rw_done(lck);
1250
1251 if (ret != LCK_RW_TYPE_SHARED)
1252 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
1253 }
1254
1255
1256 /*
1257 * Routine: lck_rw_unlock_exclusive
1258 */
1259 void
1260 lck_rw_unlock_exclusive(
1261 lck_rw_t *lck)
1262 {
1263 lck_rw_type_t ret;
1264
1265 ret = lck_rw_done(lck);
1266
1267 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1268 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1269 }
1270
1271
1272 /*
1273 * Routine: lck_rw_lock
1274 */
1275 void
1276 lck_rw_lock(
1277 lck_rw_t *lck,
1278 lck_rw_type_t lck_rw_type)
1279 {
1280 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1281 lck_rw_lock_shared(lck);
1282 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1283 lck_rw_lock_exclusive(lck);
1284 else
1285 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1286 }
1287
1288
1289 /*
1290 * Routine: lck_rw_lock_shared_gen
1291 * Function:
1292 * assembly fast path code has determined that this lock
1293 * is held exclusively... this is where we spin/block
1294 * until we can acquire the lock in the shared mode
1295 */
1296 void
1297 lck_rw_lock_shared_gen(
1298 lck_rw_t *lck)
1299 {
1300 uint64_t deadline = 0;
1301 int gotlock = 0;
1302 int slept = 0;
1303 wait_result_t res = 0;
1304 boolean_t istate = -1;
1305
1306 #if CONFIG_DTRACE
1307 uint64_t wait_interval = 0;
1308 int readers_at_sleep = 0;
1309 boolean_t dtrace_ls_initialized = FALSE;
1310 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1311 #endif
1312
1313 while ( !lck_rw_grab_shared(lck)) {
1314
1315 #if CONFIG_DTRACE
1316 if (dtrace_ls_initialized == FALSE) {
1317 dtrace_ls_initialized = TRUE;
1318 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1319 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1320 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1321 if (dtrace_ls_enabled) {
1322 /*
1323 * Either sleeping or spinning is happening,
1324 * start a timing of our delay interval now.
1325 */
1326 readers_at_sleep = lck->lck_rw_shared_count;
1327 wait_interval = mach_absolute_time();
1328 }
1329 }
1330 #endif
1331 if (istate == -1)
1332 istate = ml_get_interrupts_enabled();
1333
1334 deadline = lck_rw_deadline_for_spin(lck);
1335
1336 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1337 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1338
1339 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
1340 lck_rw_lock_pause(istate);
1341
1342 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1343 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
1344
1345 if (gotlock)
1346 break;
1347 /*
1348 * if we get here, the deadline has expired w/o us
1349 * being able to grab the lock for read
1350 * check to see if we're allowed to do a thread_block
1351 */
1352 if (lck->lck_rw_can_sleep) {
1353
1354 istate = lck_interlock_lock(lck);
1355
1356 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1357 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1358
1359 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1360 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1361
1362 lck->lck_r_waiting = TRUE;
1363
1364 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
1365 lck_interlock_unlock(lck, istate);
1366
1367 if (res == THREAD_WAITING) {
1368 res = thread_block(THREAD_CONTINUE_NULL);
1369 slept++;
1370 }
1371 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1372 (int)lck, res, slept, 0, 0);
1373 } else {
1374 lck->lck_rw_shared_count++;
1375 lck_interlock_unlock(lck, istate);
1376 break;
1377 }
1378 }
1379 }
1380
1381 #if CONFIG_DTRACE
1382 if (dtrace_ls_enabled == TRUE) {
1383 if (slept == 0) {
1384 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1385 } else {
1386 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1387 mach_absolute_time() - wait_interval, 0,
1388 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1389 }
1390 }
1391 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1392 #endif
1393 }
1394
1395
1396 /*
1397 * Routine: lck_rw_lock_shared_to_exclusive_failure
1398 * Function:
1399 * assembly fast path code has already dropped our read
1400 * count and determined that someone else owns 'lck_rw_want_upgrade'
1401 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1402 * all we need to do here is determine if a wakeup is needed
1403 */
1404 boolean_t
1405 lck_rw_lock_shared_to_exclusive_failure(
1406 lck_rw_t *lck,
1407 int prior_lock_state)
1408 {
1409 lck_rw_t *fake_lck;
1410
1411 /*
1412 * prior_lock state is a snapshot of the 1st word of the
1413 * lock in question... we'll fake up a pointer to it
1414 * and carefully not access anything beyond whats defined
1415 * in the first word of a lck_rw_t
1416 */
1417 fake_lck = (lck_rw_t *)&prior_lock_state;
1418
1419 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
1420 /*
1421 * Someone else has requested upgrade.
1422 * Since we've released the read lock, wake
1423 * him up if he's blocked waiting
1424 */
1425 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1426 }
1427 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1428 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1429
1430 return (FALSE);
1431 }
1432
1433
1434 /*
1435 * Routine: lck_rw_lock_shared_to_exclusive_failure
1436 * Function:
1437 * assembly fast path code has already dropped our read
1438 * count and successfully acquired 'lck_rw_want_upgrade'
1439 * we just need to wait for the rest of the readers to drain
1440 * and then we can return as the exclusive holder of this lock
1441 */
1442 boolean_t
1443 lck_rw_lock_shared_to_exclusive_success(
1444 lck_rw_t *lck)
1445 {
1446 uint64_t deadline = 0;
1447 int slept = 0;
1448 int still_shared = 0;
1449 wait_result_t res;
1450 boolean_t istate = -1;
1451
1452 #if CONFIG_DTRACE
1453 uint64_t wait_interval = 0;
1454 int readers_at_sleep = 0;
1455 boolean_t dtrace_ls_initialized = FALSE;
1456 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1457 #endif
1458
1459 while (lck->lck_rw_shared_count != 0) {
1460
1461 #if CONFIG_DTRACE
1462 if (dtrace_ls_initialized == FALSE) {
1463 dtrace_ls_initialized = TRUE;
1464 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1465 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1466 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1467 if (dtrace_ls_enabled) {
1468 /*
1469 * Either sleeping or spinning is happening,
1470 * start a timing of our delay interval now.
1471 */
1472 readers_at_sleep = lck->lck_rw_shared_count;
1473 wait_interval = mach_absolute_time();
1474 }
1475 }
1476 #endif
1477 if (istate == -1)
1478 istate = ml_get_interrupts_enabled();
1479
1480 deadline = lck_rw_deadline_for_spin(lck);
1481
1482 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1483 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1484
1485 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
1486 lck_rw_lock_pause(istate);
1487
1488 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1489 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1490
1491 if ( !still_shared)
1492 break;
1493 /*
1494 * if we get here, the deadline has expired w/o
1495 * the rw_shared_count having drained to 0
1496 * check to see if we're allowed to do a thread_block
1497 */
1498 if (lck->lck_rw_can_sleep) {
1499
1500 istate = lck_interlock_lock(lck);
1501
1502 if (lck->lck_rw_shared_count != 0) {
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1504 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1505
1506 lck->lck_w_waiting = TRUE;
1507
1508 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1509 lck_interlock_unlock(lck, istate);
1510
1511 if (res == THREAD_WAITING) {
1512 res = thread_block(THREAD_CONTINUE_NULL);
1513 slept++;
1514 }
1515 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1516 (int)lck, res, slept, 0, 0);
1517 } else {
1518 lck_interlock_unlock(lck, istate);
1519 break;
1520 }
1521 }
1522 }
1523 #if CONFIG_DTRACE
1524 /*
1525 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1526 */
1527 if (dtrace_ls_enabled == TRUE) {
1528 if (slept == 0) {
1529 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1530 } else {
1531 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1532 mach_absolute_time() - wait_interval, 1,
1533 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1534 }
1535 }
1536 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1537 #endif
1538 return (TRUE);
1539 }
1540
1541
1542 /*
1543 * Routine: lck_rw_lock_exclusive_to_shared
1544 * Function:
1545 * assembly fast path has already dropped
1546 * our exclusive state and bumped lck_rw_shared_count
1547 * all we need to do here is determine if anyone
1548 * needs to be awakened.
1549 */
1550 void
1551 lck_rw_lock_exclusive_to_shared_gen(
1552 lck_rw_t *lck,
1553 int prior_lock_state)
1554 {
1555 lck_rw_t *fake_lck;
1556
1557 /*
1558 * prior_lock state is a snapshot of the 1st word of the
1559 * lock in question... we'll fake up a pointer to it
1560 * and carefully not access anything beyond whats defined
1561 * in the first word of a lck_rw_t
1562 */
1563 fake_lck = (lck_rw_t *)&prior_lock_state;
1564
1565 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1566 (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
1567
1568 /*
1569 * don't wake up anyone waiting to take the lock exclusively
1570 * since we hold a read count... when the read count drops to 0,
1571 * the writers will be woken.
1572 *
1573 * wake up any waiting readers if we don't have any writers waiting,
1574 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1575 */
1576 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1577 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1578
1579 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1580 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1581
1582 #if CONFIG_DTRACE
1583 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1584 #endif
1585 }
1586
1587
1588 /*
1589 * Routine: lck_rw_try_lock
1590 */
1591 boolean_t
1592 lck_rw_try_lock(
1593 lck_rw_t *lck,
1594 lck_rw_type_t lck_rw_type)
1595 {
1596 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1597 return(lck_rw_try_lock_shared(lck));
1598 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1599 return(lck_rw_try_lock_exclusive(lck));
1600 else
1601 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1602 return(FALSE);
1603 }
1604
1605
1606 void
1607 lck_rw_assert(
1608 lck_rw_t *lck,
1609 unsigned int type)
1610 {
1611 switch (type) {
1612 case LCK_RW_ASSERT_SHARED:
1613 if (lck->lck_rw_shared_count != 0) {
1614 return;
1615 }
1616 break;
1617 case LCK_RW_ASSERT_EXCLUSIVE:
1618 if ((lck->lck_rw_want_write ||
1619 lck->lck_rw_want_upgrade) &&
1620 lck->lck_rw_shared_count == 0) {
1621 return;
1622 }
1623 break;
1624 case LCK_RW_ASSERT_HELD:
1625 if (lck->lck_rw_want_write ||
1626 lck->lck_rw_want_upgrade ||
1627 lck->lck_rw_shared_count != 0) {
1628 return;
1629 }
1630 break;
1631 default:
1632 break;
1633 }
1634
1635 panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
1636 }
1637
1638 /*
1639 * Routine: lck_mtx_alloc_init
1640 */
1641 lck_mtx_t *
1642 lck_mtx_alloc_init(
1643 lck_grp_t *grp,
1644 lck_attr_t *attr)
1645 {
1646 lck_mtx_t *lck;
1647
1648 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
1649 lck_mtx_init(lck, grp, attr);
1650
1651 return(lck);
1652 }
1653
1654 /*
1655 * Routine: lck_mtx_free
1656 */
1657 void
1658 lck_mtx_free(
1659 lck_mtx_t *lck,
1660 lck_grp_t *grp)
1661 {
1662 lck_mtx_destroy(lck, grp);
1663 kfree(lck, sizeof(lck_mtx_t));
1664 }
1665
1666 /*
1667 * Routine: lck_mtx_ext_init
1668 */
1669 static void
1670 lck_mtx_ext_init(
1671 lck_mtx_ext_t *lck,
1672 lck_grp_t *grp,
1673 lck_attr_t *attr)
1674 {
1675 bzero((void *)lck, sizeof(lck_mtx_ext_t));
1676
1677 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1678 lck->lck_mtx_deb.type = MUTEX_TAG;
1679 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1680 }
1681
1682 lck->lck_mtx_grp = grp;
1683
1684 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
1685 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
1686
1687 lck->lck_mtx.lck_mtx_ptr = (void *)LCK_MTX_PTR_EXTENDED;
1688 }
1689
1690 /*
1691 * Routine: lck_mtx_init
1692 */
1693 void
1694 lck_mtx_init(
1695 lck_mtx_t *lck,
1696 lck_grp_t *grp,
1697 lck_attr_t *attr)
1698 {
1699 lck_mtx_ext_t *lck_ext;
1700 lck_attr_t *lck_attr;
1701
1702 if (attr != LCK_ATTR_NULL)
1703 lck_attr = attr;
1704 else
1705 lck_attr = &LockDefaultLckAttr;
1706
1707 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1708 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
1709 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1710 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1711 lck->lck_mtx_ptr = lck_ext;
1712 lck->lck_mtx_ilocked = 1;
1713 }
1714 } else {
1715 lck->lck_mtx_owner = 0;
1716 lck->lck_mtx_ptr = 0;
1717 lck->lck_mtx_waiters = 0;
1718 lck->lck_mtx_pri = 0;
1719 lck->lck_mtx_ilocked = 0;
1720 lck->lck_mtx_mlocked = 0;
1721 lck->lck_mtx_promoted = 0;
1722 lck->lck_mtx_spin = 0;
1723 }
1724 lck_grp_reference(grp);
1725 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1726 }
1727
1728 /*
1729 * Routine: lck_mtx_init_ext
1730 */
1731 void
1732 lck_mtx_init_ext(
1733 lck_mtx_t *lck,
1734 lck_mtx_ext_t *lck_ext,
1735 lck_grp_t *grp,
1736 lck_attr_t *attr)
1737 {
1738 lck_attr_t *lck_attr;
1739
1740 if (attr != LCK_ATTR_NULL)
1741 lck_attr = attr;
1742 else
1743 lck_attr = &LockDefaultLckAttr;
1744
1745 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1746 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1747 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1748 lck->lck_mtx_ptr = lck_ext;
1749 lck->lck_mtx_ilocked = 1;
1750 } else {
1751 lck->lck_mtx_owner = 0;
1752 lck->lck_mtx_ptr = 0;
1753 lck->lck_mtx_waiters = 0;
1754 lck->lck_mtx_pri = 0;
1755 lck->lck_mtx_ilocked = 0;
1756 lck->lck_mtx_mlocked = 0;
1757 lck->lck_mtx_promoted = 0;
1758 lck->lck_mtx_spin = 0;
1759 }
1760 lck_grp_reference(grp);
1761 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1762 }
1763
1764 /*
1765 * Routine: lck_mtx_destroy
1766 */
1767 void
1768 lck_mtx_destroy(
1769 lck_mtx_t *lck,
1770 lck_grp_t *grp)
1771 {
1772 boolean_t lck_is_indirect;
1773
1774 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
1775 return;
1776 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
1777
1778 lck_mtx_lock_mark_destroyed(lck);
1779
1780 if (lck_is_indirect)
1781 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
1782 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1783 lck_grp_deallocate(grp);
1784 return;
1785 }
1786
1787
1788 #define LCK_MTX_LCK_WAIT_CODE 0x20
1789 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
1790 #define LCK_MTX_LCK_SPIN_CODE 0x22
1791 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1792 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
1793
1794
1795 /*
1796 * Routine: lck_mtx_unlock_wakeup_x86
1797 *
1798 * Invoked on unlock when there is contention.
1799 *
1800 */
1801 void
1802 lck_mtx_unlock_wakeup_x86 (
1803 lck_mtx_t *mutex,
1804 int owner_was_promoted)
1805 {
1806
1807 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START, (int)mutex, owner_was_promoted, mutex->lck_mtx_waiters, 0, 0);
1808
1809 if (lck_mtx_lock_decr_waiter(mutex))
1810 thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
1811
1812 if (owner_was_promoted) {
1813 thread_t thread = current_thread();
1814
1815
1816 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), thread->promotions,
1817 thread->sched_mode & TH_MODE_PROMOTED, 0, 0);
1818
1819 if (thread->promotions > 0) {
1820 spl_t s = splsched();
1821
1822 thread_lock(thread);
1823
1824 if (--thread->promotions == 0 && (thread->sched_mode & TH_MODE_PROMOTED)) {
1825
1826 thread->sched_mode &= ~TH_MODE_PROMOTED;
1827
1828 if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
1829 KERNEL_DEBUG_CONSTANT(
1830 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
1831 thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
1832
1833 set_sched_pri(thread, DEPRESSPRI);
1834 }
1835 else {
1836 if (thread->priority < thread->sched_pri) {
1837 KERNEL_DEBUG_CONSTANT(
1838 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
1839 thread->sched_pri, thread->priority, 0, mutex, 0);
1840
1841 compute_priority(thread, FALSE);
1842 }
1843 }
1844 }
1845 thread_unlock(thread);
1846 splx(s);
1847 }
1848 }
1849 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1850 }
1851
1852
1853 /*
1854 * Routine: lck_mtx_lock_acquire_x86
1855 *
1856 * Invoked on acquiring the mutex when there is
1857 * contention.
1858 * mutex is owned... interlock is not held
1859 */
1860 void
1861 lck_mtx_lock_acquire_x86(
1862 lck_mtx_t *mutex)
1863 {
1864 thread_t thread = current_thread();
1865 integer_t priority;
1866
1867 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1868
1869 priority = lck_mtx_lock_get_pri(mutex);
1870
1871 if (thread->sched_pri < priority) {
1872
1873 if (lck_mtx_lock_mark_promoted(mutex)) {
1874 spl_t s = splsched();
1875
1876 thread_lock(thread);
1877
1878 if (thread->sched_pri < priority) {
1879
1880 KERNEL_DEBUG_CONSTANT(
1881 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
1882 thread->sched_pri, priority, 0, mutex, 0);
1883
1884 set_sched_pri(thread, priority);
1885 }
1886 thread->promotions++;
1887 thread->sched_mode |= TH_MODE_PROMOTED;
1888
1889 thread_unlock(thread);
1890 splx(s);
1891 }
1892 }
1893 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1894 }
1895
1896
1897
1898 /*
1899 * Routine: lck_mtx_lock_spinwait_x86
1900 *
1901 * Invoked trying to acquire a mutex when there is contention but
1902 * the holder is running on another processor. We spin for up to a maximum
1903 * time waiting for the lock to be released.
1904 *
1905 * Called with the interlock unlocked.
1906 */
1907 int
1908 lck_mtx_lock_spinwait_x86(
1909 lck_mtx_t *mutex)
1910 {
1911 thread_t holder;
1912 uint64_t deadline;
1913 int retval = 1;
1914 int loopcount = 0;
1915
1916 KERNEL_DEBUG(
1917 MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1918 (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
1919
1920 deadline = mach_absolute_time() + MutexSpin;
1921
1922 /*
1923 * Spin while:
1924 * - mutex is locked, and
1925 * - its locked as a spin lock, and
1926 * - owner is running on another processor, and
1927 * - owner (processor) is not idling, and
1928 * - we haven't spun for long enough.
1929 */
1930 do {
1931 if (lck_mtx_lock_grab_mutex(mutex)) {
1932 retval = 0;
1933 break;
1934 }
1935 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
1936
1937 if ( !(holder->machine.specFlags & OnProc) ||
1938 (holder->state & TH_IDLE)) {
1939 if (loopcount == 0)
1940 retval = 2;
1941 break;
1942 }
1943 }
1944 cpu_pause();
1945
1946 loopcount++;
1947
1948 } while (mach_absolute_time() < deadline);
1949
1950
1951 #if CONFIG_DTRACE
1952 /*
1953 * We've already kept a count via deadline of how long we spun.
1954 * If dtrace is active, then we compute backwards to decide how
1955 * long we spun.
1956 *
1957 * Note that we record a different probe id depending on whether
1958 * this is a direct or indirect mutex. This allows us to
1959 * penalize only lock groups that have debug/stats enabled
1960 * with dtrace processing if desired.
1961 */
1962 if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) {
1963 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
1964 mach_absolute_time() - (deadline - MutexSpin));
1965 } else {
1966 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
1967 mach_absolute_time() - (deadline - MutexSpin));
1968 }
1969 /* The lockstat acquire event is recorded by the assembly code beneath us. */
1970 #endif
1971
1972 KERNEL_DEBUG(
1973 MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
1974 (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
1975
1976 return retval;
1977 }
1978
1979
1980
1981 /*
1982 * Routine: lck_mtx_lock_wait_x86
1983 *
1984 * Invoked in order to wait on contention.
1985 *
1986 * Called with the interlock locked and
1987 * returns it unlocked.
1988 */
1989 void
1990 lck_mtx_lock_wait_x86 (
1991 lck_mtx_t *mutex)
1992 {
1993 thread_t self = current_thread();
1994 thread_t holder;
1995 integer_t priority;
1996 integer_t old_lck_mtx_pri;
1997 spl_t s;
1998 #if CONFIG_DTRACE
1999 uint64_t sleep_start = 0;
2000
2001 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
2002 sleep_start = mach_absolute_time();
2003 }
2004 #endif
2005 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
2006
2007 priority = self->sched_pri;
2008
2009 if (priority < self->priority)
2010 priority = self->priority;
2011 if (priority < BASEPRI_DEFAULT)
2012 priority = BASEPRI_DEFAULT;
2013
2014 if (mutex->lck_mtx_waiters == 0)
2015 old_lck_mtx_pri = 0;
2016 else
2017 old_lck_mtx_pri = mutex->lck_mtx_pri;
2018
2019 if (old_lck_mtx_pri < priority)
2020 mutex->lck_mtx_pri = priority;
2021
2022 if ( (holder = (thread_t)mutex->lck_mtx_owner) ) {
2023
2024 s = splsched();
2025 thread_lock(holder);
2026
2027 if (holder->sched_pri < priority) {
2028 KERNEL_DEBUG_CONSTANT(
2029 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
2030 holder->sched_pri, priority, holder, mutex, 0);
2031
2032 set_sched_pri(holder, priority);
2033
2034 if (mutex->lck_mtx_promoted == 0) {
2035 holder->promotions++;
2036 holder->sched_mode |= TH_MODE_PROMOTED;
2037
2038 mutex->lck_mtx_promoted = 1;
2039 }
2040 }
2041 thread_unlock(holder);
2042 splx(s);
2043 }
2044 mutex->lck_mtx_waiters++;
2045
2046 assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
2047
2048 lck_mtx_ilk_unlock(mutex);
2049
2050 thread_block(THREAD_CONTINUE_NULL);
2051
2052 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
2053
2054 #if CONFIG_DTRACE
2055 /*
2056 * Record the Dtrace lockstat probe for blocking, block time
2057 * measured from when we were entered.
2058 */
2059 if (sleep_start) {
2060 if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) {
2061 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2062 mach_absolute_time() - sleep_start);
2063 } else {
2064 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2065 mach_absolute_time() - sleep_start);
2066 }
2067 }
2068 #endif
2069 }
2070
2071
2072 #if MACH_KDB
2073
2074 void
2075 db_show_one_lock(
2076 lock_t *lock)
2077 {
2078 db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
2079 lock->lck_rw_shared_count,
2080 lock->lck_rw_want_upgrade ? "" : "!",
2081 lock->lck_rw_want_write ? "" : "!");
2082 db_printf("%swaiting, %scan_sleep\n",
2083 (lock->lck_r_waiting || lock->lck_w_waiting) ? "" : "!",
2084 lock->lck_rw_can_sleep ? "" : "!");
2085 db_printf("Interlock:\n");
2086 db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->lck_rw_interlock)),
2087 TRUE, (db_expr_t)0, (char *)0);
2088 }
2089
2090 /*
2091 * Routines to print out simple_locks and mutexes in a nicely-formatted
2092 * fashion.
2093 */
2094
2095 const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER";
2096
2097 void
2098 db_show_one_simple_lock (
2099 db_expr_t addr,
2100 boolean_t have_addr,
2101 __unused db_expr_t count,
2102 __unused char * modif)
2103 {
2104 simple_lock_t saddr = (simple_lock_t) ((vm_offset_t) addr);
2105
2106 if (saddr == (simple_lock_t)0 || !have_addr) {
2107 db_error ("No simple_lock\n");
2108 }
2109 #if USLOCK_DEBUG
2110 else if (saddr->lock_type != USLOCK_TAG)
2111 db_error ("Not a simple_lock\n");
2112 #endif /* USLOCK_DEBUG */
2113
2114 db_printf ("%s\n", simple_lock_labels);
2115 db_print_simple_lock (saddr);
2116 }
2117
2118 void
2119 db_print_simple_lock (
2120 simple_lock_t addr)
2121 {
2122
2123 db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
2124 #if USLOCK_DEBUG
2125 db_printf (" %08x", addr->debug.lock_thread);
2126 db_printf (" %08x ", addr->debug.duration[1]);
2127 db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
2128 #endif /* USLOCK_DEBUG */
2129 db_printf ("\n");
2130 }
2131
2132 #endif /* MACH_KDB */