]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/locks_i386.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64 #include <mach_ldebug.h>
65
66 #include <kern/locks.h>
67 #include <kern/kalloc.h>
68 #include <kern/misc_protos.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/cpu_data.h>
72 #include <kern/cpu_number.h>
73 #include <kern/sched_prim.h>
74 #include <kern/xpr.h>
75 #include <kern/debug.h>
76 #include <string.h>
77
78 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
79 #include <machine/machine_cpu.h>
80 #include <i386/mp.h>
81
82 #include <sys/kdebug.h>
83 #include <mach/branch_predicates.h>
84
85 /*
86 * We need only enough declarations from the BSD-side to be able to
87 * test if our probe is active, and to call __dtrace_probe(). Setting
88 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
89 */
90 #if CONFIG_DTRACE
91 #define NEED_DTRACE_DEFS
92 #include <../bsd/sys/lockstat.h>
93 #endif
94
95 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
96 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
97 #define LCK_RW_LCK_SHARED_CODE 0x102
98 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
99 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
100 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
101
102 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
103 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
104 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
105 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
106 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
107 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
108 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
109 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
110
111
112 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
113
114 unsigned int LcksOpts=0;
115
116 /* Forwards */
117
118 #if USLOCK_DEBUG
119 /*
120 * Perform simple lock checks.
121 */
122 int uslock_check = 1;
123 int max_lock_loops = 100000000;
124 decl_simple_lock_data(extern , printf_lock)
125 decl_simple_lock_data(extern , panic_lock)
126 #endif /* USLOCK_DEBUG */
127
128 extern unsigned int not_in_kdp;
129
130 /*
131 * We often want to know the addresses of the callers
132 * of the various lock routines. However, this information
133 * is only used for debugging and statistics.
134 */
135 typedef void *pc_t;
136 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
137 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
138 #if ANY_LOCK_DEBUG
139 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
140 #define DECL_PC(pc) pc_t pc;
141 #else /* ANY_LOCK_DEBUG */
142 #define DECL_PC(pc)
143 #ifdef lint
144 /*
145 * Eliminate lint complaints about unused local pc variables.
146 */
147 #define OBTAIN_PC(pc) ++pc
148 #else /* lint */
149 #define OBTAIN_PC(pc)
150 #endif /* lint */
151 #endif /* USLOCK_DEBUG */
152
153
154 /*
155 * Portable lock package implementation of usimple_locks.
156 */
157
158 #if USLOCK_DEBUG
159 #define USLDBG(stmt) stmt
160 void usld_lock_init(usimple_lock_t, unsigned short);
161 void usld_lock_pre(usimple_lock_t, pc_t);
162 void usld_lock_post(usimple_lock_t, pc_t);
163 void usld_unlock(usimple_lock_t, pc_t);
164 void usld_lock_try_pre(usimple_lock_t, pc_t);
165 void usld_lock_try_post(usimple_lock_t, pc_t);
166 int usld_lock_common_checks(usimple_lock_t, char *);
167 #else /* USLOCK_DEBUG */
168 #define USLDBG(stmt)
169 #endif /* USLOCK_DEBUG */
170
171
172 extern int lck_rw_grab_want(lck_rw_t *lck);
173 extern int lck_rw_grab_shared(lck_rw_t *lck);
174 extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
175
176
177 /*
178 * Forward definitions
179 */
180
181 void lck_rw_lock_shared_gen(
182 lck_rw_t *lck);
183
184 void lck_rw_lock_exclusive_gen(
185 lck_rw_t *lck);
186
187 boolean_t lck_rw_lock_shared_to_exclusive_success(
188 lck_rw_t *lck);
189
190 boolean_t lck_rw_lock_shared_to_exclusive_failure(
191 lck_rw_t *lck,
192 int prior_lock_state);
193
194 void lck_rw_lock_exclusive_to_shared_gen(
195 lck_rw_t *lck,
196 int prior_lock_state);
197
198 lck_rw_type_t lck_rw_done_gen(
199 lck_rw_t *lck,
200 int prior_lock_state);
201
202 void lck_rw_clear_promotions_x86(thread_t thread);
203
204 /*
205 * Routine: lck_spin_alloc_init
206 */
207 lck_spin_t *
208 lck_spin_alloc_init(
209 lck_grp_t *grp,
210 lck_attr_t *attr)
211 {
212 lck_spin_t *lck;
213
214 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
215 lck_spin_init(lck, grp, attr);
216
217 return(lck);
218 }
219
220 /*
221 * Routine: lck_spin_free
222 */
223 void
224 lck_spin_free(
225 lck_spin_t *lck,
226 lck_grp_t *grp)
227 {
228 lck_spin_destroy(lck, grp);
229 kfree(lck, sizeof(lck_spin_t));
230 }
231
232 /*
233 * Routine: lck_spin_init
234 */
235 void
236 lck_spin_init(
237 lck_spin_t *lck,
238 lck_grp_t *grp,
239 __unused lck_attr_t *attr)
240 {
241 usimple_lock_init((usimple_lock_t) lck, 0);
242 lck_grp_reference(grp);
243 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
244 }
245
246 /*
247 * Routine: lck_spin_destroy
248 */
249 void
250 lck_spin_destroy(
251 lck_spin_t *lck,
252 lck_grp_t *grp)
253 {
254 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
255 return;
256 lck->interlock = LCK_SPIN_TAG_DESTROYED;
257 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
258 lck_grp_deallocate(grp);
259 return;
260 }
261
262 /*
263 * Routine: lck_spin_lock
264 */
265 void
266 lck_spin_lock(
267 lck_spin_t *lck)
268 {
269 usimple_lock((usimple_lock_t) lck);
270 }
271
272 /*
273 * Routine: lck_spin_unlock
274 */
275 void
276 lck_spin_unlock(
277 lck_spin_t *lck)
278 {
279 usimple_unlock((usimple_lock_t) lck);
280 }
281
282
283 /*
284 * Routine: lck_spin_try_lock
285 */
286 boolean_t
287 lck_spin_try_lock(
288 lck_spin_t *lck)
289 {
290 return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
291 }
292
293 /*
294 * Routine: lck_spin_is_acquired
295 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
296 * Returns: TRUE if lock is acquired.
297 */
298 boolean_t
299 lck_spin_is_acquired(lck_spin_t *lck) {
300 if (not_in_kdp) {
301 panic("panic: spinlock acquired check done outside of kernel debugger");
302 }
303 return (lck->interlock != 0)? TRUE : FALSE;
304 }
305
306 /*
307 * Initialize a usimple_lock.
308 *
309 * No change in preemption state.
310 */
311 void
312 usimple_lock_init(
313 usimple_lock_t l,
314 __unused unsigned short tag)
315 {
316 #ifndef MACHINE_SIMPLE_LOCK
317 USLDBG(usld_lock_init(l, tag));
318 hw_lock_init(&l->interlock);
319 #else
320 simple_lock_init((simple_lock_t)l,tag);
321 #endif
322 }
323
324 volatile uint32_t spinlock_owner_cpu = ~0;
325 volatile usimple_lock_t spinlock_timed_out;
326
327 uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
328 uint64_t deadline;
329 uint32_t i;
330
331 for (i = 0; i < real_ncpus; i++) {
332 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
333 spinlock_owner_cpu = i;
334 if ((uint32_t) cpu_number() == i)
335 break;
336 cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
337 cpu_NMI_interrupt(i);
338 deadline = mach_absolute_time() + (LockTimeOut * 2);
339 while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
340 cpu_pause();
341 break;
342 }
343 }
344
345 return spinlock_owner_cpu;
346 }
347
348 /*
349 * Acquire a usimple_lock.
350 *
351 * Returns with preemption disabled. Note
352 * that the hw_lock routines are responsible for
353 * maintaining preemption state.
354 */
355 void
356 usimple_lock(
357 usimple_lock_t l)
358 {
359 #ifndef MACHINE_SIMPLE_LOCK
360 DECL_PC(pc);
361
362 OBTAIN_PC(pc);
363 USLDBG(usld_lock_pre(l, pc));
364
365 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) {
366 boolean_t uslock_acquired = FALSE;
367 while (machine_timeout_suspended()) {
368 enable_preemption();
369 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
370 break;
371 }
372
373 if (uslock_acquired == FALSE) {
374 uint32_t lock_cpu;
375 uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
376 spinlock_timed_out = l;
377 lock_cpu = spinlock_timeout_NMI(lowner);
378 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
379 }
380 }
381 USLDBG(usld_lock_post(l, pc));
382 #else
383 simple_lock((simple_lock_t)l);
384 #endif
385 }
386
387
388 /*
389 * Release a usimple_lock.
390 *
391 * Returns with preemption enabled. Note
392 * that the hw_lock routines are responsible for
393 * maintaining preemption state.
394 */
395 void
396 usimple_unlock(
397 usimple_lock_t l)
398 {
399 #ifndef MACHINE_SIMPLE_LOCK
400 DECL_PC(pc);
401
402 OBTAIN_PC(pc);
403 USLDBG(usld_unlock(l, pc));
404 hw_lock_unlock(&l->interlock);
405 #else
406 simple_unlock_rwmb((simple_lock_t)l);
407 #endif
408 }
409
410
411 /*
412 * Conditionally acquire a usimple_lock.
413 *
414 * On success, returns with preemption disabled.
415 * On failure, returns with preemption in the same state
416 * as when first invoked. Note that the hw_lock routines
417 * are responsible for maintaining preemption state.
418 *
419 * XXX No stats are gathered on a miss; I preserved this
420 * behavior from the original assembly-language code, but
421 * doesn't it make sense to log misses? XXX
422 */
423 unsigned int
424 usimple_lock_try(
425 usimple_lock_t l)
426 {
427 #ifndef MACHINE_SIMPLE_LOCK
428 unsigned int success;
429 DECL_PC(pc);
430
431 OBTAIN_PC(pc);
432 USLDBG(usld_lock_try_pre(l, pc));
433 if ((success = hw_lock_try(&l->interlock))) {
434 USLDBG(usld_lock_try_post(l, pc));
435 }
436 return success;
437 #else
438 return(simple_lock_try((simple_lock_t)l));
439 #endif
440 }
441
442 #if USLOCK_DEBUG
443 /*
444 * States of a usimple_lock. The default when initializing
445 * a usimple_lock is setting it up for debug checking.
446 */
447 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
448 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
449 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
450 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
451 #define USLOCK_CHECKING(l) (uslock_check && \
452 ((l)->debug.state & USLOCK_CHECKED))
453
454 /*
455 * Trace activities of a particularly interesting lock.
456 */
457 void usl_trace(usimple_lock_t, int, pc_t, const char *);
458
459
460 /*
461 * Initialize the debugging information contained
462 * in a usimple_lock.
463 */
464 void
465 usld_lock_init(
466 usimple_lock_t l,
467 __unused unsigned short tag)
468 {
469 if (l == USIMPLE_LOCK_NULL)
470 panic("lock initialization: null lock pointer");
471 l->lock_type = USLOCK_TAG;
472 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
473 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
474 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
475 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
476 l->debug.duration[0] = l->debug.duration[1] = 0;
477 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
478 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
479 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
480 }
481
482
483 /*
484 * These checks apply to all usimple_locks, not just
485 * those with USLOCK_CHECKED turned on.
486 */
487 int
488 usld_lock_common_checks(
489 usimple_lock_t l,
490 char *caller)
491 {
492 if (l == USIMPLE_LOCK_NULL)
493 panic("%s: null lock pointer", caller);
494 if (l->lock_type != USLOCK_TAG)
495 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
496 if (!(l->debug.state & USLOCK_INIT))
497 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
498 return USLOCK_CHECKING(l);
499 }
500
501
502 /*
503 * Debug checks on a usimple_lock just before attempting
504 * to acquire it.
505 */
506 /* ARGSUSED */
507 void
508 usld_lock_pre(
509 usimple_lock_t l,
510 pc_t pc)
511 {
512 char caller[] = "usimple_lock";
513
514
515 if (!usld_lock_common_checks(l, caller))
516 return;
517
518 /*
519 * Note that we have a weird case where we are getting a lock when we are]
520 * in the process of putting the system to sleep. We are running with no
521 * current threads, therefore we can't tell if we are trying to retake a lock
522 * we have or someone on the other processor has it. Therefore we just
523 * ignore this test if the locking thread is 0.
524 */
525
526 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
527 l->debug.lock_thread == (void *) current_thread()) {
528 printf("%s: lock %p already locked (at %p) by",
529 caller, l, l->debug.lock_pc);
530 printf(" current thread %p (new attempt at pc %p)\n",
531 l->debug.lock_thread, pc);
532 panic("%s", caller);
533 }
534 mp_disable_preemption();
535 usl_trace(l, cpu_number(), pc, caller);
536 mp_enable_preemption();
537 }
538
539
540 /*
541 * Debug checks on a usimple_lock just after acquiring it.
542 *
543 * Pre-emption has been disabled at this point,
544 * so we are safe in using cpu_number.
545 */
546 void
547 usld_lock_post(
548 usimple_lock_t l,
549 pc_t pc)
550 {
551 register int mycpu;
552 char caller[] = "successful usimple_lock";
553
554
555 if (!usld_lock_common_checks(l, caller))
556 return;
557
558 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
559 panic("%s: lock %p became uninitialized",
560 caller, l);
561 if ((l->debug.state & USLOCK_TAKEN))
562 panic("%s: lock 0x%p became TAKEN by someone else",
563 caller, l);
564
565 mycpu = cpu_number();
566 l->debug.lock_thread = (void *)current_thread();
567 l->debug.state |= USLOCK_TAKEN;
568 l->debug.lock_pc = pc;
569 l->debug.lock_cpu = mycpu;
570
571 usl_trace(l, mycpu, pc, caller);
572 }
573
574
575 /*
576 * Debug checks on a usimple_lock just before
577 * releasing it. Note that the caller has not
578 * yet released the hardware lock.
579 *
580 * Preemption is still disabled, so there's
581 * no problem using cpu_number.
582 */
583 void
584 usld_unlock(
585 usimple_lock_t l,
586 pc_t pc)
587 {
588 register int mycpu;
589 char caller[] = "usimple_unlock";
590
591
592 if (!usld_lock_common_checks(l, caller))
593 return;
594
595 mycpu = cpu_number();
596
597 if (!(l->debug.state & USLOCK_TAKEN))
598 panic("%s: lock 0x%p hasn't been taken",
599 caller, l);
600 if (l->debug.lock_thread != (void *) current_thread())
601 panic("%s: unlocking lock 0x%p, owned by thread %p",
602 caller, l, l->debug.lock_thread);
603 if (l->debug.lock_cpu != mycpu) {
604 printf("%s: unlocking lock 0x%p on cpu 0x%x",
605 caller, l, mycpu);
606 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
607 panic("%s", caller);
608 }
609 usl_trace(l, mycpu, pc, caller);
610
611 l->debug.unlock_thread = l->debug.lock_thread;
612 l->debug.lock_thread = INVALID_PC;
613 l->debug.state &= ~USLOCK_TAKEN;
614 l->debug.unlock_pc = pc;
615 l->debug.unlock_cpu = mycpu;
616 }
617
618
619 /*
620 * Debug checks on a usimple_lock just before
621 * attempting to acquire it.
622 *
623 * Preemption isn't guaranteed to be disabled.
624 */
625 void
626 usld_lock_try_pre(
627 usimple_lock_t l,
628 pc_t pc)
629 {
630 char caller[] = "usimple_lock_try";
631
632 if (!usld_lock_common_checks(l, caller))
633 return;
634 mp_disable_preemption();
635 usl_trace(l, cpu_number(), pc, caller);
636 mp_enable_preemption();
637 }
638
639
640 /*
641 * Debug checks on a usimple_lock just after
642 * successfully attempting to acquire it.
643 *
644 * Preemption has been disabled by the
645 * lock acquisition attempt, so it's safe
646 * to use cpu_number.
647 */
648 void
649 usld_lock_try_post(
650 usimple_lock_t l,
651 pc_t pc)
652 {
653 register int mycpu;
654 char caller[] = "successful usimple_lock_try";
655
656 if (!usld_lock_common_checks(l, caller))
657 return;
658
659 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
660 panic("%s: lock 0x%p became uninitialized",
661 caller, l);
662 if ((l->debug.state & USLOCK_TAKEN))
663 panic("%s: lock 0x%p became TAKEN by someone else",
664 caller, l);
665
666 mycpu = cpu_number();
667 l->debug.lock_thread = (void *) current_thread();
668 l->debug.state |= USLOCK_TAKEN;
669 l->debug.lock_pc = pc;
670 l->debug.lock_cpu = mycpu;
671
672 usl_trace(l, mycpu, pc, caller);
673 }
674
675
676 /*
677 * For very special cases, set traced_lock to point to a
678 * specific lock of interest. The result is a series of
679 * XPRs showing lock operations on that lock. The lock_seq
680 * value is used to show the order of those operations.
681 */
682 usimple_lock_t traced_lock;
683 unsigned int lock_seq;
684
685 void
686 usl_trace(
687 usimple_lock_t l,
688 int mycpu,
689 pc_t pc,
690 const char * op_name)
691 {
692 if (traced_lock == l) {
693 XPR(XPR_SLOCK,
694 "seq %d, cpu %d, %s @ %x\n",
695 (uintptr_t) lock_seq, (uintptr_t) mycpu,
696 (uintptr_t) op_name, (uintptr_t) pc, 0);
697 lock_seq++;
698 }
699 }
700
701
702 #endif /* USLOCK_DEBUG */
703
704 /*
705 * Routine: lck_rw_alloc_init
706 */
707 lck_rw_t *
708 lck_rw_alloc_init(
709 lck_grp_t *grp,
710 lck_attr_t *attr) {
711 lck_rw_t *lck;
712
713 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
714 bzero(lck, sizeof(lck_rw_t));
715 lck_rw_init(lck, grp, attr);
716 }
717
718 return(lck);
719 }
720
721 /*
722 * Routine: lck_rw_free
723 */
724 void
725 lck_rw_free(
726 lck_rw_t *lck,
727 lck_grp_t *grp) {
728 lck_rw_destroy(lck, grp);
729 kfree(lck, sizeof(lck_rw_t));
730 }
731
732 /*
733 * Routine: lck_rw_init
734 */
735 void
736 lck_rw_init(
737 lck_rw_t *lck,
738 lck_grp_t *grp,
739 lck_attr_t *attr)
740 {
741 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
742 attr : &LockDefaultLckAttr;
743
744 hw_lock_byte_init(&lck->lck_rw_interlock);
745 lck->lck_rw_want_write = FALSE;
746 lck->lck_rw_want_upgrade = FALSE;
747 lck->lck_rw_shared_count = 0;
748 lck->lck_rw_can_sleep = TRUE;
749 lck->lck_r_waiting = lck->lck_w_waiting = 0;
750 lck->lck_rw_tag = 0;
751 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
752 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
753
754 lck_grp_reference(grp);
755 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
756 }
757
758 /*
759 * Routine: lck_rw_destroy
760 */
761 void
762 lck_rw_destroy(
763 lck_rw_t *lck,
764 lck_grp_t *grp)
765 {
766 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
767 return;
768 #if MACH_LDEBUG
769 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
770 #endif
771 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
772 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
773 lck_grp_deallocate(grp);
774 return;
775 }
776
777 /*
778 * Sleep locks. These use the same data structure and algorithm
779 * as the spin locks, but the process sleeps while it is waiting
780 * for the lock. These work on uniprocessor systems.
781 */
782
783 #define DECREMENTER_TIMEOUT 1000000
784
785 #define RW_LOCK_READER_EVENT(x) \
786 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
787
788 #define RW_LOCK_WRITER_EVENT(x) \
789 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
790
791 /*
792 * We disable interrupts while holding the RW interlock to prevent an
793 * interrupt from exacerbating hold time.
794 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
795 */
796 static boolean_t
797 lck_interlock_lock(lck_rw_t *lck)
798 {
799 boolean_t istate;
800
801 istate = ml_set_interrupts_enabled(FALSE);
802 hw_lock_byte_lock(&lck->lck_rw_interlock);
803
804 return istate;
805 }
806
807 static void
808 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
809 {
810 hw_lock_byte_unlock(&lck->lck_rw_interlock);
811 ml_set_interrupts_enabled(istate);
812 }
813
814 /*
815 * This inline is used when busy-waiting for an rw lock.
816 * If interrupts were disabled when the lock primitive was called,
817 * we poll the IPI handler for pending tlb flushes.
818 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
819 */
820 static inline void
821 lck_rw_lock_pause(boolean_t interrupts_enabled)
822 {
823 if (!interrupts_enabled)
824 handle_pending_TLB_flushes();
825 cpu_pause();
826 }
827
828
829 /*
830 * compute the deadline to spin against when
831 * waiting for a change of state on a lck_rw_t
832 */
833 static inline uint64_t
834 lck_rw_deadline_for_spin(lck_rw_t *lck)
835 {
836 if (lck->lck_rw_can_sleep) {
837 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
838 /*
839 * there are already threads waiting on this lock... this
840 * implies that they have spun beyond their deadlines waiting for
841 * the desired state to show up so we will not bother spinning at this time...
842 * or
843 * the current number of threads sharing this lock exceeds our capacity to run them
844 * concurrently and since all states we're going to spin for require the rw_shared_count
845 * to be at 0, we'll not bother spinning since the latency for this to happen is
846 * unpredictable...
847 */
848 return (mach_absolute_time());
849 }
850 return (mach_absolute_time() + MutexSpin);
851 } else
852 return (mach_absolute_time() + (100000LL * 1000000000LL));
853 }
854
855
856 /*
857 * Routine: lck_rw_lock_exclusive
858 */
859 void
860 lck_rw_lock_exclusive_gen(
861 lck_rw_t *lck)
862 {
863 uint64_t deadline = 0;
864 int slept = 0;
865 int gotlock = 0;
866 int lockheld = 0;
867 wait_result_t res = 0;
868 boolean_t istate = -1;
869
870 #if CONFIG_DTRACE
871 boolean_t dtrace_ls_initialized = FALSE;
872 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
873 uint64_t wait_interval = 0;
874 int readers_at_sleep = 0;
875 #endif
876
877 /*
878 * Try to acquire the lck_rw_want_write bit.
879 */
880 while ( !lck_rw_grab_want(lck)) {
881
882 #if CONFIG_DTRACE
883 if (dtrace_ls_initialized == FALSE) {
884 dtrace_ls_initialized = TRUE;
885 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
886 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
887 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
888 if (dtrace_ls_enabled) {
889 /*
890 * Either sleeping or spinning is happening,
891 * start a timing of our delay interval now.
892 */
893 readers_at_sleep = lck->lck_rw_shared_count;
894 wait_interval = mach_absolute_time();
895 }
896 }
897 #endif
898 if (istate == -1)
899 istate = ml_get_interrupts_enabled();
900
901 deadline = lck_rw_deadline_for_spin(lck);
902
903 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
904
905 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
906 lck_rw_lock_pause(istate);
907
908 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
909
910 if (gotlock)
911 break;
912 /*
913 * if we get here, the deadline has expired w/o us
914 * being able to grab the lock exclusively
915 * check to see if we're allowed to do a thread_block
916 */
917 if (lck->lck_rw_can_sleep) {
918
919 istate = lck_interlock_lock(lck);
920
921 if (lck->lck_rw_want_write) {
922
923 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
924
925 lck->lck_w_waiting = TRUE;
926
927 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
928 lck_interlock_unlock(lck, istate);
929
930 if (res == THREAD_WAITING) {
931 res = thread_block(THREAD_CONTINUE_NULL);
932 slept++;
933 }
934 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
935 } else {
936 lck->lck_rw_want_write = TRUE;
937 lck_interlock_unlock(lck, istate);
938 break;
939 }
940 }
941 }
942 /*
943 * Wait for readers (and upgrades) to finish...
944 * the test for these conditions must be done simultaneously with
945 * a check of the interlock not being held since
946 * the rw_shared_count will drop to 0 first and then want_upgrade
947 * will be set to 1 in the shared_to_exclusive scenario... those
948 * adjustments are done behind the interlock and represent an
949 * atomic change in state and must be considered as such
950 * however, once we see the read count at 0, the want_upgrade not set
951 * and the interlock not held, we are safe to proceed
952 */
953 while (lck_rw_held_read_or_upgrade(lck)) {
954
955 #if CONFIG_DTRACE
956 /*
957 * Either sleeping or spinning is happening, start
958 * a timing of our delay interval now. If we set it
959 * to -1 we don't have accurate data so we cannot later
960 * decide to record a dtrace spin or sleep event.
961 */
962 if (dtrace_ls_initialized == FALSE) {
963 dtrace_ls_initialized = TRUE;
964 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
965 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
966 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
967 if (dtrace_ls_enabled) {
968 /*
969 * Either sleeping or spinning is happening,
970 * start a timing of our delay interval now.
971 */
972 readers_at_sleep = lck->lck_rw_shared_count;
973 wait_interval = mach_absolute_time();
974 }
975 }
976 #endif
977 if (istate == -1)
978 istate = ml_get_interrupts_enabled();
979
980 deadline = lck_rw_deadline_for_spin(lck);
981
982 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
983
984 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
985 lck_rw_lock_pause(istate);
986
987 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
988
989 if ( !lockheld)
990 break;
991 /*
992 * if we get here, the deadline has expired w/o us
993 * being able to grab the lock exclusively
994 * check to see if we're allowed to do a thread_block
995 */
996 if (lck->lck_rw_can_sleep) {
997
998 istate = lck_interlock_lock(lck);
999
1000 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
1001 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1002
1003 lck->lck_w_waiting = TRUE;
1004
1005 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1006 lck_interlock_unlock(lck, istate);
1007
1008 if (res == THREAD_WAITING) {
1009 res = thread_block(THREAD_CONTINUE_NULL);
1010 slept++;
1011 }
1012 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1013 } else {
1014 lck_interlock_unlock(lck, istate);
1015 /*
1016 * must own the lock now, since we checked for
1017 * readers or upgrade owner behind the interlock
1018 * no need for a call to 'lck_rw_held_read_or_upgrade'
1019 */
1020 break;
1021 }
1022 }
1023 }
1024
1025 #if CONFIG_DTRACE
1026 /*
1027 * Decide what latencies we suffered that are Dtrace events.
1028 * If we have set wait_interval, then we either spun or slept.
1029 * At least we get out from under the interlock before we record
1030 * which is the best we can do here to minimize the impact
1031 * of the tracing.
1032 * If we have set wait_interval to -1, then dtrace was not enabled when we
1033 * started sleeping/spinning so we don't record this event.
1034 */
1035 if (dtrace_ls_enabled == TRUE) {
1036 if (slept == 0) {
1037 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1038 mach_absolute_time() - wait_interval, 1);
1039 } else {
1040 /*
1041 * For the blocking case, we also record if when we blocked
1042 * it was held for read or write, and how many readers.
1043 * Notice that above we recorded this before we dropped
1044 * the interlock so the count is accurate.
1045 */
1046 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1047 mach_absolute_time() - wait_interval, 1,
1048 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1049 }
1050 }
1051 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1052 #endif
1053 }
1054
1055
1056 /*
1057 * Routine: lck_rw_done_gen
1058 *
1059 * called from the assembly language wrapper...
1060 * prior_lock_state is the value in the 1st
1061 * word of the lock at the time of a successful
1062 * atomic compare and exchange with the new value...
1063 * it represents the state of the lock before we
1064 * decremented the rw_shared_count or cleared either
1065 * rw_want_upgrade or rw_want_write and
1066 * the lck_x_waiting bits... since the wrapper
1067 * routine has already changed the state atomically,
1068 * we just need to decide if we should
1069 * wake up anyone and what value to return... we do
1070 * this by examining the state of the lock before
1071 * we changed it
1072 */
1073 lck_rw_type_t
1074 lck_rw_done_gen(
1075 lck_rw_t *lck,
1076 int prior_lock_state)
1077 {
1078 lck_rw_t *fake_lck;
1079 lck_rw_type_t lock_type;
1080 thread_t thread;
1081 uint32_t rwlock_count;
1082
1083 /*
1084 * prior_lock state is a snapshot of the 1st word of the
1085 * lock in question... we'll fake up a pointer to it
1086 * and carefully not access anything beyond whats defined
1087 * in the first word of a lck_rw_t
1088 */
1089 fake_lck = (lck_rw_t *)&prior_lock_state;
1090
1091 if (fake_lck->lck_rw_shared_count <= 1) {
1092 if (fake_lck->lck_w_waiting)
1093 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1094
1095 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1096 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1097 }
1098 if (fake_lck->lck_rw_shared_count)
1099 lock_type = LCK_RW_TYPE_SHARED;
1100 else
1101 lock_type = LCK_RW_TYPE_EXCLUSIVE;
1102
1103 /* Check if dropping the lock means that we need to unpromote */
1104 thread = current_thread();
1105 rwlock_count = thread->rwlock_count--;
1106 #if MACH_LDEBUG
1107 if (rwlock_count == 0) {
1108 panic("rw lock count underflow for thread %p", thread);
1109 }
1110 #endif
1111 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1112 /* sched_flags checked without lock, but will be rechecked while clearing */
1113 lck_rw_clear_promotion(thread);
1114 }
1115
1116 #if CONFIG_DTRACE
1117 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1118 #endif
1119
1120 return(lock_type);
1121 }
1122
1123
1124 /*
1125 * Routine: lck_rw_unlock
1126 */
1127 void
1128 lck_rw_unlock(
1129 lck_rw_t *lck,
1130 lck_rw_type_t lck_rw_type)
1131 {
1132 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1133 lck_rw_unlock_shared(lck);
1134 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1135 lck_rw_unlock_exclusive(lck);
1136 else
1137 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1138 }
1139
1140
1141 /*
1142 * Routine: lck_rw_unlock_shared
1143 */
1144 void
1145 lck_rw_unlock_shared(
1146 lck_rw_t *lck)
1147 {
1148 lck_rw_type_t ret;
1149
1150 ret = lck_rw_done(lck);
1151
1152 if (ret != LCK_RW_TYPE_SHARED)
1153 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
1154 }
1155
1156
1157 /*
1158 * Routine: lck_rw_unlock_exclusive
1159 */
1160 void
1161 lck_rw_unlock_exclusive(
1162 lck_rw_t *lck)
1163 {
1164 lck_rw_type_t ret;
1165
1166 ret = lck_rw_done(lck);
1167
1168 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1169 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1170 }
1171
1172
1173 /*
1174 * Routine: lck_rw_lock
1175 */
1176 void
1177 lck_rw_lock(
1178 lck_rw_t *lck,
1179 lck_rw_type_t lck_rw_type)
1180 {
1181 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1182 lck_rw_lock_shared(lck);
1183 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1184 lck_rw_lock_exclusive(lck);
1185 else
1186 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1187 }
1188
1189
1190 /*
1191 * Routine: lck_rw_lock_shared_gen
1192 * Function:
1193 * assembly fast path code has determined that this lock
1194 * is held exclusively... this is where we spin/block
1195 * until we can acquire the lock in the shared mode
1196 */
1197 void
1198 lck_rw_lock_shared_gen(
1199 lck_rw_t *lck)
1200 {
1201 uint64_t deadline = 0;
1202 int gotlock = 0;
1203 int slept = 0;
1204 wait_result_t res = 0;
1205 boolean_t istate = -1;
1206
1207 #if CONFIG_DTRACE
1208 uint64_t wait_interval = 0;
1209 int readers_at_sleep = 0;
1210 boolean_t dtrace_ls_initialized = FALSE;
1211 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1212 #endif
1213
1214 while ( !lck_rw_grab_shared(lck)) {
1215
1216 #if CONFIG_DTRACE
1217 if (dtrace_ls_initialized == FALSE) {
1218 dtrace_ls_initialized = TRUE;
1219 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1220 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1221 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1222 if (dtrace_ls_enabled) {
1223 /*
1224 * Either sleeping or spinning is happening,
1225 * start a timing of our delay interval now.
1226 */
1227 readers_at_sleep = lck->lck_rw_shared_count;
1228 wait_interval = mach_absolute_time();
1229 }
1230 }
1231 #endif
1232 if (istate == -1)
1233 istate = ml_get_interrupts_enabled();
1234
1235 deadline = lck_rw_deadline_for_spin(lck);
1236
1237 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1238 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1239
1240 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
1241 lck_rw_lock_pause(istate);
1242
1243 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1244 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
1245
1246 if (gotlock)
1247 break;
1248 /*
1249 * if we get here, the deadline has expired w/o us
1250 * being able to grab the lock for read
1251 * check to see if we're allowed to do a thread_block
1252 */
1253 if (lck->lck_rw_can_sleep) {
1254
1255 istate = lck_interlock_lock(lck);
1256
1257 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1258 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1259
1260 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1261 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1262
1263 lck->lck_r_waiting = TRUE;
1264
1265 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
1266 lck_interlock_unlock(lck, istate);
1267
1268 if (res == THREAD_WAITING) {
1269 res = thread_block(THREAD_CONTINUE_NULL);
1270 slept++;
1271 }
1272 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1273 (int)lck, res, slept, 0, 0);
1274 } else {
1275 lck->lck_rw_shared_count++;
1276 lck_interlock_unlock(lck, istate);
1277 break;
1278 }
1279 }
1280 }
1281
1282 #if CONFIG_DTRACE
1283 if (dtrace_ls_enabled == TRUE) {
1284 if (slept == 0) {
1285 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1286 } else {
1287 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1288 mach_absolute_time() - wait_interval, 0,
1289 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1290 }
1291 }
1292 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1293 #endif
1294 }
1295
1296
1297 /*
1298 * Routine: lck_rw_lock_shared_to_exclusive_failure
1299 * Function:
1300 * assembly fast path code has already dropped our read
1301 * count and determined that someone else owns 'lck_rw_want_upgrade'
1302 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1303 * all we need to do here is determine if a wakeup is needed
1304 */
1305 boolean_t
1306 lck_rw_lock_shared_to_exclusive_failure(
1307 lck_rw_t *lck,
1308 int prior_lock_state)
1309 {
1310 lck_rw_t *fake_lck;
1311 thread_t thread = current_thread();
1312 uint32_t rwlock_count;
1313
1314 /* Check if dropping the lock means that we need to unpromote */
1315 rwlock_count = thread->rwlock_count--;
1316 #if MACH_LDEBUG
1317 if (rwlock_count == 0) {
1318 panic("rw lock count underflow for thread %p", thread);
1319 }
1320 #endif
1321 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1322 /* sched_flags checked without lock, but will be rechecked while clearing */
1323 lck_rw_clear_promotion(thread);
1324 }
1325
1326 /*
1327 * prior_lock state is a snapshot of the 1st word of the
1328 * lock in question... we'll fake up a pointer to it
1329 * and carefully not access anything beyond whats defined
1330 * in the first word of a lck_rw_t
1331 */
1332 fake_lck = (lck_rw_t *)&prior_lock_state;
1333
1334 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
1335 /*
1336 * Someone else has requested upgrade.
1337 * Since we've released the read lock, wake
1338 * him up if he's blocked waiting
1339 */
1340 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1341 }
1342 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1343 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1344
1345 return (FALSE);
1346 }
1347
1348
1349 /*
1350 * Routine: lck_rw_lock_shared_to_exclusive_failure
1351 * Function:
1352 * assembly fast path code has already dropped our read
1353 * count and successfully acquired 'lck_rw_want_upgrade'
1354 * we just need to wait for the rest of the readers to drain
1355 * and then we can return as the exclusive holder of this lock
1356 */
1357 boolean_t
1358 lck_rw_lock_shared_to_exclusive_success(
1359 lck_rw_t *lck)
1360 {
1361 uint64_t deadline = 0;
1362 int slept = 0;
1363 int still_shared = 0;
1364 wait_result_t res;
1365 boolean_t istate = -1;
1366
1367 #if CONFIG_DTRACE
1368 uint64_t wait_interval = 0;
1369 int readers_at_sleep = 0;
1370 boolean_t dtrace_ls_initialized = FALSE;
1371 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1372 #endif
1373
1374 while (lck->lck_rw_shared_count != 0) {
1375
1376 #if CONFIG_DTRACE
1377 if (dtrace_ls_initialized == FALSE) {
1378 dtrace_ls_initialized = TRUE;
1379 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1380 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1381 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1382 if (dtrace_ls_enabled) {
1383 /*
1384 * Either sleeping or spinning is happening,
1385 * start a timing of our delay interval now.
1386 */
1387 readers_at_sleep = lck->lck_rw_shared_count;
1388 wait_interval = mach_absolute_time();
1389 }
1390 }
1391 #endif
1392 if (istate == -1)
1393 istate = ml_get_interrupts_enabled();
1394
1395 deadline = lck_rw_deadline_for_spin(lck);
1396
1397 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1398 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1399
1400 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
1401 lck_rw_lock_pause(istate);
1402
1403 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1404 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1405
1406 if ( !still_shared)
1407 break;
1408 /*
1409 * if we get here, the deadline has expired w/o
1410 * the rw_shared_count having drained to 0
1411 * check to see if we're allowed to do a thread_block
1412 */
1413 if (lck->lck_rw_can_sleep) {
1414
1415 istate = lck_interlock_lock(lck);
1416
1417 if (lck->lck_rw_shared_count != 0) {
1418 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1419 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1420
1421 lck->lck_w_waiting = TRUE;
1422
1423 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1424 lck_interlock_unlock(lck, istate);
1425
1426 if (res == THREAD_WAITING) {
1427 res = thread_block(THREAD_CONTINUE_NULL);
1428 slept++;
1429 }
1430 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1431 (int)lck, res, slept, 0, 0);
1432 } else {
1433 lck_interlock_unlock(lck, istate);
1434 break;
1435 }
1436 }
1437 }
1438 #if CONFIG_DTRACE
1439 /*
1440 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1441 */
1442 if (dtrace_ls_enabled == TRUE) {
1443 if (slept == 0) {
1444 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1445 } else {
1446 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1447 mach_absolute_time() - wait_interval, 1,
1448 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1449 }
1450 }
1451 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1452 #endif
1453 return (TRUE);
1454 }
1455
1456
1457 /*
1458 * Routine: lck_rw_lock_exclusive_to_shared
1459 * Function:
1460 * assembly fast path has already dropped
1461 * our exclusive state and bumped lck_rw_shared_count
1462 * all we need to do here is determine if anyone
1463 * needs to be awakened.
1464 */
1465 void
1466 lck_rw_lock_exclusive_to_shared_gen(
1467 lck_rw_t *lck,
1468 int prior_lock_state)
1469 {
1470 lck_rw_t *fake_lck;
1471
1472 /*
1473 * prior_lock state is a snapshot of the 1st word of the
1474 * lock in question... we'll fake up a pointer to it
1475 * and carefully not access anything beyond whats defined
1476 * in the first word of a lck_rw_t
1477 */
1478 fake_lck = (lck_rw_t *)&prior_lock_state;
1479
1480 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1481 (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
1482
1483 /*
1484 * don't wake up anyone waiting to take the lock exclusively
1485 * since we hold a read count... when the read count drops to 0,
1486 * the writers will be woken.
1487 *
1488 * wake up any waiting readers if we don't have any writers waiting,
1489 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1490 */
1491 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1492 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1493
1494 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1495 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1496
1497 #if CONFIG_DTRACE
1498 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1499 #endif
1500 }
1501
1502
1503 /*
1504 * Routine: lck_rw_try_lock
1505 */
1506 boolean_t
1507 lck_rw_try_lock(
1508 lck_rw_t *lck,
1509 lck_rw_type_t lck_rw_type)
1510 {
1511 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1512 return(lck_rw_try_lock_shared(lck));
1513 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1514 return(lck_rw_try_lock_exclusive(lck));
1515 else
1516 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1517 return(FALSE);
1518 }
1519
1520
1521 void
1522 lck_rw_assert(
1523 lck_rw_t *lck,
1524 unsigned int type)
1525 {
1526 switch (type) {
1527 case LCK_RW_ASSERT_SHARED:
1528 if (lck->lck_rw_shared_count != 0) {
1529 return;
1530 }
1531 break;
1532 case LCK_RW_ASSERT_EXCLUSIVE:
1533 if ((lck->lck_rw_want_write ||
1534 lck->lck_rw_want_upgrade) &&
1535 lck->lck_rw_shared_count == 0) {
1536 return;
1537 }
1538 break;
1539 case LCK_RW_ASSERT_HELD:
1540 if (lck->lck_rw_want_write ||
1541 lck->lck_rw_want_upgrade ||
1542 lck->lck_rw_shared_count != 0) {
1543 return;
1544 }
1545 break;
1546 case LCK_RW_ASSERT_NOTHELD:
1547 if (!(lck->lck_rw_want_write ||
1548 lck->lck_rw_want_upgrade ||
1549 lck->lck_rw_shared_count != 0)) {
1550 return;
1551 }
1552 break;
1553 default:
1554 break;
1555 }
1556
1557 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck);
1558 }
1559
1560 /* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
1561 void
1562 lck_rw_clear_promotions_x86(thread_t thread)
1563 {
1564 #if MACH_LDEBUG
1565 /* It's fatal to leave a RW lock locked and return to userspace */
1566 panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
1567 #else
1568 /* Paper over the issue */
1569 thread->rwlock_count = 0;
1570 lck_rw_clear_promotion(thread);
1571 #endif
1572 }
1573
1574
1575 #ifdef MUTEX_ZONE
1576 extern zone_t lck_mtx_zone;
1577 #endif
1578 /*
1579 * Routine: lck_mtx_alloc_init
1580 */
1581 lck_mtx_t *
1582 lck_mtx_alloc_init(
1583 lck_grp_t *grp,
1584 lck_attr_t *attr)
1585 {
1586 lck_mtx_t *lck;
1587 #ifdef MUTEX_ZONE
1588 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
1589 lck_mtx_init(lck, grp, attr);
1590 #else
1591 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
1592 lck_mtx_init(lck, grp, attr);
1593 #endif
1594 return(lck);
1595 }
1596
1597 /*
1598 * Routine: lck_mtx_free
1599 */
1600 void
1601 lck_mtx_free(
1602 lck_mtx_t *lck,
1603 lck_grp_t *grp)
1604 {
1605 lck_mtx_destroy(lck, grp);
1606 #ifdef MUTEX_ZONE
1607 zfree(lck_mtx_zone, lck);
1608 #else
1609 kfree(lck, sizeof(lck_mtx_t));
1610 #endif
1611 }
1612
1613 /*
1614 * Routine: lck_mtx_ext_init
1615 */
1616 static void
1617 lck_mtx_ext_init(
1618 lck_mtx_ext_t *lck,
1619 lck_grp_t *grp,
1620 lck_attr_t *attr)
1621 {
1622 bzero((void *)lck, sizeof(lck_mtx_ext_t));
1623
1624 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1625 lck->lck_mtx_deb.type = MUTEX_TAG;
1626 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1627 }
1628
1629 lck->lck_mtx_grp = grp;
1630
1631 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
1632 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
1633
1634 lck->lck_mtx.lck_mtx_is_ext = 1;
1635 lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1636 }
1637
1638 /*
1639 * Routine: lck_mtx_init
1640 */
1641 void
1642 lck_mtx_init(
1643 lck_mtx_t *lck,
1644 lck_grp_t *grp,
1645 lck_attr_t *attr)
1646 {
1647 lck_mtx_ext_t *lck_ext;
1648 lck_attr_t *lck_attr;
1649
1650 if (attr != LCK_ATTR_NULL)
1651 lck_attr = attr;
1652 else
1653 lck_attr = &LockDefaultLckAttr;
1654
1655 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1656 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
1657 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1658 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1659 lck->lck_mtx_ptr = lck_ext;
1660 }
1661 } else {
1662 lck->lck_mtx_owner = 0;
1663 lck->lck_mtx_state = 0;
1664 }
1665 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1666 lck_grp_reference(grp);
1667 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1668 }
1669
1670 /*
1671 * Routine: lck_mtx_init_ext
1672 */
1673 void
1674 lck_mtx_init_ext(
1675 lck_mtx_t *lck,
1676 lck_mtx_ext_t *lck_ext,
1677 lck_grp_t *grp,
1678 lck_attr_t *attr)
1679 {
1680 lck_attr_t *lck_attr;
1681
1682 if (attr != LCK_ATTR_NULL)
1683 lck_attr = attr;
1684 else
1685 lck_attr = &LockDefaultLckAttr;
1686
1687 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1688 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1689 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1690 lck->lck_mtx_ptr = lck_ext;
1691 } else {
1692 lck->lck_mtx_owner = 0;
1693 lck->lck_mtx_state = 0;
1694 }
1695 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1696
1697 lck_grp_reference(grp);
1698 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1699 }
1700
1701 /*
1702 * Routine: lck_mtx_destroy
1703 */
1704 void
1705 lck_mtx_destroy(
1706 lck_mtx_t *lck,
1707 lck_grp_t *grp)
1708 {
1709 boolean_t lck_is_indirect;
1710
1711 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
1712 return;
1713 #if MACH_LDEBUG
1714 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1715 #endif
1716 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
1717
1718 lck_mtx_lock_mark_destroyed(lck);
1719
1720 if (lck_is_indirect)
1721 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
1722 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1723 lck_grp_deallocate(grp);
1724 return;
1725 }
1726
1727
1728 #define LCK_MTX_LCK_WAIT_CODE 0x20
1729 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
1730 #define LCK_MTX_LCK_SPIN_CODE 0x22
1731 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1732 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
1733
1734
1735 /*
1736 * Routine: lck_mtx_unlock_wakeup_x86
1737 *
1738 * Invoked on unlock when there is
1739 * contention (i.e. the assembly routine sees that
1740 * that mutex->lck_mtx_waiters != 0 or
1741 * that mutex->lck_mtx_promoted != 0...
1742 *
1743 * neither the mutex or interlock is held
1744 */
1745 void
1746 lck_mtx_unlock_wakeup_x86 (
1747 lck_mtx_t *mutex,
1748 int prior_lock_state)
1749 {
1750 lck_mtx_t fake_lck;
1751
1752 /*
1753 * prior_lock state is a snapshot of the 2nd word of the
1754 * lock in question... we'll fake up a lock with the bits
1755 * copied into place and carefully not access anything
1756 * beyond whats defined in the second word of a lck_mtx_t
1757 */
1758 fake_lck.lck_mtx_state = prior_lock_state;
1759
1760 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1761 mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
1762
1763 if (__probable(fake_lck.lck_mtx_waiters)) {
1764 if (fake_lck.lck_mtx_waiters > 1)
1765 thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
1766 else
1767 thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
1768 }
1769
1770 if (__improbable(fake_lck.lck_mtx_promoted)) {
1771 thread_t thread = current_thread();
1772
1773
1774 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
1775 thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
1776
1777 if (thread->promotions > 0) {
1778 spl_t s = splsched();
1779
1780 thread_lock(thread);
1781
1782 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
1783
1784 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
1785
1786 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1787 /* Thread still has a RW lock promotion */
1788 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1789 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
1790 thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
1791
1792 set_sched_pri(thread, DEPRESSPRI);
1793 }
1794 else {
1795 if (thread->priority < thread->sched_pri) {
1796 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
1797 thread->sched_pri, thread->priority, 0, mutex, 0);
1798
1799 SCHED(compute_priority)(thread, FALSE);
1800 }
1801 }
1802 }
1803 thread_unlock(thread);
1804 splx(s);
1805 }
1806 }
1807 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1808 mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1809 }
1810
1811
1812 /*
1813 * Routine: lck_mtx_lock_acquire_x86
1814 *
1815 * Invoked on acquiring the mutex when there is
1816 * contention (i.e. the assembly routine sees that
1817 * that mutex->lck_mtx_waiters != 0 or
1818 * thread->was_promoted_on_wakeup != 0)...
1819 *
1820 * mutex is owned... interlock is held... preemption is disabled
1821 */
1822 void
1823 lck_mtx_lock_acquire_x86(
1824 lck_mtx_t *mutex)
1825 {
1826 thread_t thread;
1827 integer_t priority;
1828 spl_t s;
1829
1830 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1831 mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
1832
1833 if (mutex->lck_mtx_waiters)
1834 priority = mutex->lck_mtx_pri;
1835 else
1836 priority = 0;
1837
1838 thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */
1839
1840 if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
1841
1842 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
1843 thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0);
1844
1845 s = splsched();
1846 thread_lock(thread);
1847
1848 if (thread->sched_pri < priority) {
1849 /* Do not promote past promotion ceiling */
1850 assert(priority <= MAXPRI_PROMOTE);
1851 set_sched_pri(thread, priority);
1852 }
1853 if (mutex->lck_mtx_promoted == 0) {
1854 mutex->lck_mtx_promoted = 1;
1855
1856 thread->promotions++;
1857 thread->sched_flags |= TH_SFLAG_PROMOTED;
1858 }
1859 thread->was_promoted_on_wakeup = 0;
1860
1861 thread_unlock(thread);
1862 splx(s);
1863 }
1864 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1865 mutex, 0, mutex->lck_mtx_waiters, 0, 0);
1866 }
1867
1868
1869
1870 /*
1871 * Routine: lck_mtx_lock_spinwait_x86
1872 *
1873 * Invoked trying to acquire a mutex when there is contention but
1874 * the holder is running on another processor. We spin for up to a maximum
1875 * time waiting for the lock to be released.
1876 *
1877 * Called with the interlock unlocked.
1878 * returns 0 if mutex acquired
1879 * returns 1 if we spun
1880 * returns 2 if we didn't spin due to the holder not running
1881 */
1882 int
1883 lck_mtx_lock_spinwait_x86(
1884 lck_mtx_t *mutex)
1885 {
1886 thread_t holder;
1887 uint64_t deadline;
1888 int retval = 1;
1889 int loopcount = 0;
1890
1891
1892 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1893 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
1894
1895 deadline = mach_absolute_time() + MutexSpin;
1896
1897 /*
1898 * Spin while:
1899 * - mutex is locked, and
1900 * - its locked as a spin lock, and
1901 * - owner is running on another processor, and
1902 * - owner (processor) is not idling, and
1903 * - we haven't spun for long enough.
1904 */
1905 do {
1906 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
1907 retval = 0;
1908 break;
1909 }
1910 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
1911
1912 if ( !(holder->machine.specFlags & OnProc) ||
1913 (holder->state & TH_IDLE)) {
1914 if (loopcount == 0)
1915 retval = 2;
1916 break;
1917 }
1918 }
1919 cpu_pause();
1920
1921 loopcount++;
1922
1923 } while (mach_absolute_time() < deadline);
1924
1925
1926 #if CONFIG_DTRACE
1927 /*
1928 * We've already kept a count via deadline of how long we spun.
1929 * If dtrace is active, then we compute backwards to decide how
1930 * long we spun.
1931 *
1932 * Note that we record a different probe id depending on whether
1933 * this is a direct or indirect mutex. This allows us to
1934 * penalize only lock groups that have debug/stats enabled
1935 * with dtrace processing if desired.
1936 */
1937 if (__probable(mutex->lck_mtx_is_ext == 0)) {
1938 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
1939 mach_absolute_time() - (deadline - MutexSpin));
1940 } else {
1941 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
1942 mach_absolute_time() - (deadline - MutexSpin));
1943 }
1944 /* The lockstat acquire event is recorded by the assembly code beneath us. */
1945 #endif
1946
1947 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
1948 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
1949
1950 return retval;
1951 }
1952
1953
1954
1955 /*
1956 * Routine: lck_mtx_lock_wait_x86
1957 *
1958 * Invoked in order to wait on contention.
1959 *
1960 * Called with the interlock locked and
1961 * preemption disabled...
1962 * returns it unlocked and with preemption enabled
1963 */
1964 void
1965 lck_mtx_lock_wait_x86 (
1966 lck_mtx_t *mutex)
1967 {
1968 thread_t self = current_thread();
1969 thread_t holder;
1970 integer_t priority;
1971 spl_t s;
1972 #if CONFIG_DTRACE
1973 uint64_t sleep_start = 0;
1974
1975 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1976 sleep_start = mach_absolute_time();
1977 }
1978 #endif
1979 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1980 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
1981
1982 priority = self->sched_pri;
1983
1984 if (priority < self->priority)
1985 priority = self->priority;
1986 if (priority < BASEPRI_DEFAULT)
1987 priority = BASEPRI_DEFAULT;
1988
1989 /* Do not promote past promotion ceiling */
1990 priority = MIN(priority, MAXPRI_PROMOTE);
1991
1992 if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
1993 mutex->lck_mtx_pri = priority;
1994 mutex->lck_mtx_waiters++;
1995
1996 if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
1997 holder->sched_pri < mutex->lck_mtx_pri ) {
1998 s = splsched();
1999 thread_lock(holder);
2000
2001 /* holder priority may have been bumped by another thread
2002 * before thread_lock was taken
2003 */
2004 if (holder->sched_pri < mutex->lck_mtx_pri) {
2005 KERNEL_DEBUG_CONSTANT(
2006 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
2007 holder->sched_pri, priority, thread_tid(holder), mutex, 0);
2008 /* Assert that we're not altering the priority of a
2009 * thread above the MAXPRI_PROMOTE band
2010 */
2011 assert(holder->sched_pri < MAXPRI_PROMOTE);
2012 set_sched_pri(holder, priority);
2013
2014 if (mutex->lck_mtx_promoted == 0) {
2015 holder->promotions++;
2016 holder->sched_flags |= TH_SFLAG_PROMOTED;
2017
2018 mutex->lck_mtx_promoted = 1;
2019 }
2020 }
2021 thread_unlock(holder);
2022 splx(s);
2023 }
2024 assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
2025
2026 lck_mtx_ilk_unlock(mutex);
2027
2028 thread_block(THREAD_CONTINUE_NULL);
2029
2030 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2031 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
2032
2033 #if CONFIG_DTRACE
2034 /*
2035 * Record the Dtrace lockstat probe for blocking, block time
2036 * measured from when we were entered.
2037 */
2038 if (sleep_start) {
2039 if (mutex->lck_mtx_is_ext == 0) {
2040 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2041 mach_absolute_time() - sleep_start);
2042 } else {
2043 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2044 mach_absolute_time() - sleep_start);
2045 }
2046 }
2047 #endif
2048 }