]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/locks_i386.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64 #include <mach_kdb.h>
65 #include <mach_ldebug.h>
66
67 #include <kern/lock.h>
68 #include <kern/locks.h>
69 #include <kern/kalloc.h>
70 #include <kern/misc_protos.h>
71 #include <kern/thread.h>
72 #include <kern/processor.h>
73 #include <kern/cpu_data.h>
74 #include <kern/cpu_number.h>
75 #include <kern/sched_prim.h>
76 #include <kern/xpr.h>
77 #include <kern/debug.h>
78 #include <string.h>
79
80 #if MACH_KDB
81 #include <ddb/db_command.h>
82 #include <ddb/db_output.h>
83 #include <ddb/db_sym.h>
84 #include <ddb/db_print.h>
85 #endif /* MACH_KDB */
86
87 #include <i386/machine_cpu.h>
88
89 #include <sys/kdebug.h>
90
91 /*
92 * We need only enough declarations from the BSD-side to be able to
93 * test if our probe is active, and to call __dtrace_probe(). Setting
94 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
95 */
96 #if CONFIG_DTRACE
97 #define NEED_DTRACE_DEFS
98 #include <../bsd/sys/lockstat.h>
99 #endif
100
101 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
102 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
103 #define LCK_RW_LCK_SHARED_CODE 0x102
104 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
105 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
106 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
107
108 #define LCK_MTX_LCK_SPIN 0x200
109
110 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
111
112 unsigned int LcksOpts=0;
113 unsigned int lock_wait_time[2] = { (unsigned int)-1, 0 } ;
114
115 /* Forwards */
116
117 #if MACH_KDB
118 void db_print_simple_lock(
119 simple_lock_t addr);
120
121 void db_print_mutex(
122 mutex_t * addr);
123 #endif /* MACH_KDB */
124
125
126 #if USLOCK_DEBUG
127 /*
128 * Perform simple lock checks.
129 */
130 int uslock_check = 1;
131 int max_lock_loops = 100000000;
132 decl_simple_lock_data(extern , printf_lock)
133 decl_simple_lock_data(extern , panic_lock)
134 #if MACH_KDB
135 decl_simple_lock_data(extern , kdb_lock)
136 #endif /* MACH_KDB */
137 #endif /* USLOCK_DEBUG */
138
139
140 /*
141 * We often want to know the addresses of the callers
142 * of the various lock routines. However, this information
143 * is only used for debugging and statistics.
144 */
145 typedef void *pc_t;
146 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
147 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
148 #if ANY_LOCK_DEBUG
149 #define OBTAIN_PC(pc,l) ((pc) = (void *) GET_RETURN_PC(&(l)))
150 #define DECL_PC(pc) pc_t pc;
151 #else /* ANY_LOCK_DEBUG */
152 #define DECL_PC(pc)
153 #ifdef lint
154 /*
155 * Eliminate lint complaints about unused local pc variables.
156 */
157 #define OBTAIN_PC(pc,l) ++pc
158 #else /* lint */
159 #define OBTAIN_PC(pc,l)
160 #endif /* lint */
161 #endif /* USLOCK_DEBUG */
162
163
164 /*
165 * Portable lock package implementation of usimple_locks.
166 */
167
168 #if USLOCK_DEBUG
169 #define USLDBG(stmt) stmt
170 void usld_lock_init(usimple_lock_t, unsigned short);
171 void usld_lock_pre(usimple_lock_t, pc_t);
172 void usld_lock_post(usimple_lock_t, pc_t);
173 void usld_unlock(usimple_lock_t, pc_t);
174 void usld_lock_try_pre(usimple_lock_t, pc_t);
175 void usld_lock_try_post(usimple_lock_t, pc_t);
176 int usld_lock_common_checks(usimple_lock_t, char *);
177 #else /* USLOCK_DEBUG */
178 #define USLDBG(stmt)
179 #endif /* USLOCK_DEBUG */
180
181 /*
182 * Forward definitions
183 */
184
185 void lck_rw_lock_shared_gen(
186 lck_rw_t *lck);
187
188 lck_rw_type_t lck_rw_done_gen(
189 lck_rw_t *lck);
190
191 /*
192 * Routine: lck_spin_alloc_init
193 */
194 lck_spin_t *
195 lck_spin_alloc_init(
196 lck_grp_t *grp,
197 lck_attr_t *attr)
198 {
199 lck_spin_t *lck;
200
201 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
202 lck_spin_init(lck, grp, attr);
203
204 return(lck);
205 }
206
207 /*
208 * Routine: lck_spin_free
209 */
210 void
211 lck_spin_free(
212 lck_spin_t *lck,
213 lck_grp_t *grp)
214 {
215 lck_spin_destroy(lck, grp);
216 kfree(lck, sizeof(lck_spin_t));
217 }
218
219 /*
220 * Routine: lck_spin_init
221 */
222 void
223 lck_spin_init(
224 lck_spin_t *lck,
225 lck_grp_t *grp,
226 __unused lck_attr_t *attr)
227 {
228 usimple_lock_init((usimple_lock_t) lck, 0);
229 lck_grp_reference(grp);
230 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
231 }
232
233 /*
234 * Routine: lck_spin_destroy
235 */
236 void
237 lck_spin_destroy(
238 lck_spin_t *lck,
239 lck_grp_t *grp)
240 {
241 if (lck->lck_spin_data[0] == LCK_SPIN_TAG_DESTROYED)
242 return;
243 lck->lck_spin_data[0] = LCK_SPIN_TAG_DESTROYED;
244 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
245 lck_grp_deallocate(grp);
246 return;
247 }
248
249 /*
250 * Routine: lck_spin_lock
251 */
252 void
253 lck_spin_lock(
254 lck_spin_t *lck)
255 {
256 usimple_lock((usimple_lock_t) lck);
257 }
258
259 /*
260 * Routine: lck_spin_unlock
261 */
262 void
263 lck_spin_unlock(
264 lck_spin_t *lck)
265 {
266 usimple_unlock((usimple_lock_t) lck);
267 }
268
269
270 /*
271 * Routine: lck_spin_try_lock
272 */
273 boolean_t
274 lck_spin_try_lock(
275 lck_spin_t *lck)
276 {
277 return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
278 }
279
280 /*
281 * Initialize a usimple_lock.
282 *
283 * No change in preemption state.
284 */
285 void
286 usimple_lock_init(
287 usimple_lock_t l,
288 __unused unsigned short tag)
289 {
290 #ifndef MACHINE_SIMPLE_LOCK
291 USLDBG(usld_lock_init(l, tag));
292 hw_lock_init(&l->interlock);
293 #else
294 simple_lock_init((simple_lock_t)l,tag);
295 #endif
296 }
297
298
299 /*
300 * Acquire a usimple_lock.
301 *
302 * Returns with preemption disabled. Note
303 * that the hw_lock routines are responsible for
304 * maintaining preemption state.
305 */
306 void
307 usimple_lock(
308 usimple_lock_t l)
309 {
310 #ifndef MACHINE_SIMPLE_LOCK
311 DECL_PC(pc);
312
313 OBTAIN_PC(pc, l);
314 USLDBG(usld_lock_pre(l, pc));
315
316 if(!hw_lock_to(&l->interlock, LockTimeOutTSC)) /* Try to get the lock with a timeout */
317 panic("simple lock deadlock detection: lock=%p, cpu=%d, owning thread=0x%x", l, cpu_number(), l->interlock.lock_data);
318
319 USLDBG(usld_lock_post(l, pc));
320 #else
321 simple_lock((simple_lock_t)l);
322 #endif
323 }
324
325
326 /*
327 * Release a usimple_lock.
328 *
329 * Returns with preemption enabled. Note
330 * that the hw_lock routines are responsible for
331 * maintaining preemption state.
332 */
333 void
334 usimple_unlock(
335 usimple_lock_t l)
336 {
337 #ifndef MACHINE_SIMPLE_LOCK
338 DECL_PC(pc);
339
340 OBTAIN_PC(pc, l);
341 USLDBG(usld_unlock(l, pc));
342 hw_lock_unlock(&l->interlock);
343 #else
344 simple_unlock_rwmb((simple_lock_t)l);
345 #endif
346 }
347
348
349 /*
350 * Conditionally acquire a usimple_lock.
351 *
352 * On success, returns with preemption disabled.
353 * On failure, returns with preemption in the same state
354 * as when first invoked. Note that the hw_lock routines
355 * are responsible for maintaining preemption state.
356 *
357 * XXX No stats are gathered on a miss; I preserved this
358 * behavior from the original assembly-language code, but
359 * doesn't it make sense to log misses? XXX
360 */
361 unsigned int
362 usimple_lock_try(
363 usimple_lock_t l)
364 {
365 #ifndef MACHINE_SIMPLE_LOCK
366 unsigned int success;
367 DECL_PC(pc);
368
369 OBTAIN_PC(pc, l);
370 USLDBG(usld_lock_try_pre(l, pc));
371 if ((success = hw_lock_try(&l->interlock))) {
372 USLDBG(usld_lock_try_post(l, pc));
373 }
374 return success;
375 #else
376 return(simple_lock_try((simple_lock_t)l));
377 #endif
378 }
379
380 #if USLOCK_DEBUG
381 /*
382 * States of a usimple_lock. The default when initializing
383 * a usimple_lock is setting it up for debug checking.
384 */
385 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
386 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
387 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
388 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
389 #define USLOCK_CHECKING(l) (uslock_check && \
390 ((l)->debug.state & USLOCK_CHECKED))
391
392 /*
393 * Trace activities of a particularly interesting lock.
394 */
395 void usl_trace(usimple_lock_t, int, pc_t, const char *);
396
397
398 /*
399 * Initialize the debugging information contained
400 * in a usimple_lock.
401 */
402 void
403 usld_lock_init(
404 usimple_lock_t l,
405 __unused unsigned short tag)
406 {
407 if (l == USIMPLE_LOCK_NULL)
408 panic("lock initialization: null lock pointer");
409 l->lock_type = USLOCK_TAG;
410 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
411 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
412 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
413 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
414 l->debug.duration[0] = l->debug.duration[1] = 0;
415 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
416 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
417 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
418 }
419
420
421 /*
422 * These checks apply to all usimple_locks, not just
423 * those with USLOCK_CHECKED turned on.
424 */
425 int
426 usld_lock_common_checks(
427 usimple_lock_t l,
428 char *caller)
429 {
430 if (l == USIMPLE_LOCK_NULL)
431 panic("%s: null lock pointer", caller);
432 if (l->lock_type != USLOCK_TAG)
433 panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l);
434 if (!(l->debug.state & USLOCK_INIT))
435 panic("%s: 0x%x is not an initialized lock",
436 caller, (integer_t) l);
437 return USLOCK_CHECKING(l);
438 }
439
440
441 /*
442 * Debug checks on a usimple_lock just before attempting
443 * to acquire it.
444 */
445 /* ARGSUSED */
446 void
447 usld_lock_pre(
448 usimple_lock_t l,
449 pc_t pc)
450 {
451 char caller[] = "usimple_lock";
452
453
454 if (!usld_lock_common_checks(l, caller))
455 return;
456
457 /*
458 * Note that we have a weird case where we are getting a lock when we are]
459 * in the process of putting the system to sleep. We are running with no
460 * current threads, therefore we can't tell if we are trying to retake a lock
461 * we have or someone on the other processor has it. Therefore we just
462 * ignore this test if the locking thread is 0.
463 */
464
465 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
466 l->debug.lock_thread == (void *) current_thread()) {
467 printf("%s: lock %p already locked (at %p) by",
468 caller, l, l->debug.lock_pc);
469 printf(" current thread %p (new attempt at pc %p)\n",
470 l->debug.lock_thread, pc);
471 panic("%s", caller);
472 }
473 mp_disable_preemption();
474 usl_trace(l, cpu_number(), pc, caller);
475 mp_enable_preemption();
476 }
477
478
479 /*
480 * Debug checks on a usimple_lock just after acquiring it.
481 *
482 * Pre-emption has been disabled at this point,
483 * so we are safe in using cpu_number.
484 */
485 void
486 usld_lock_post(
487 usimple_lock_t l,
488 pc_t pc)
489 {
490 register int mycpu;
491 char caller[] = "successful usimple_lock";
492
493
494 if (!usld_lock_common_checks(l, caller))
495 return;
496
497 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
498 panic("%s: lock 0x%x became uninitialized",
499 caller, (integer_t) l);
500 if ((l->debug.state & USLOCK_TAKEN))
501 panic("%s: lock 0x%x became TAKEN by someone else",
502 caller, (integer_t) l);
503
504 mycpu = cpu_number();
505 l->debug.lock_thread = (void *)current_thread();
506 l->debug.state |= USLOCK_TAKEN;
507 l->debug.lock_pc = pc;
508 l->debug.lock_cpu = mycpu;
509
510 usl_trace(l, mycpu, pc, caller);
511 }
512
513
514 /*
515 * Debug checks on a usimple_lock just before
516 * releasing it. Note that the caller has not
517 * yet released the hardware lock.
518 *
519 * Preemption is still disabled, so there's
520 * no problem using cpu_number.
521 */
522 void
523 usld_unlock(
524 usimple_lock_t l,
525 pc_t pc)
526 {
527 register int mycpu;
528 char caller[] = "usimple_unlock";
529
530
531 if (!usld_lock_common_checks(l, caller))
532 return;
533
534 mycpu = cpu_number();
535
536 if (!(l->debug.state & USLOCK_TAKEN))
537 panic("%s: lock 0x%x hasn't been taken",
538 caller, (integer_t) l);
539 if (l->debug.lock_thread != (void *) current_thread())
540 panic("%s: unlocking lock 0x%x, owned by thread %p",
541 caller, (integer_t) l, l->debug.lock_thread);
542 if (l->debug.lock_cpu != mycpu) {
543 printf("%s: unlocking lock 0x%x on cpu 0x%x",
544 caller, (integer_t) l, mycpu);
545 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
546 panic("%s", caller);
547 }
548 usl_trace(l, mycpu, pc, caller);
549
550 l->debug.unlock_thread = l->debug.lock_thread;
551 l->debug.lock_thread = INVALID_PC;
552 l->debug.state &= ~USLOCK_TAKEN;
553 l->debug.unlock_pc = pc;
554 l->debug.unlock_cpu = mycpu;
555 }
556
557
558 /*
559 * Debug checks on a usimple_lock just before
560 * attempting to acquire it.
561 *
562 * Preemption isn't guaranteed to be disabled.
563 */
564 void
565 usld_lock_try_pre(
566 usimple_lock_t l,
567 pc_t pc)
568 {
569 char caller[] = "usimple_lock_try";
570
571 if (!usld_lock_common_checks(l, caller))
572 return;
573 mp_disable_preemption();
574 usl_trace(l, cpu_number(), pc, caller);
575 mp_enable_preemption();
576 }
577
578
579 /*
580 * Debug checks on a usimple_lock just after
581 * successfully attempting to acquire it.
582 *
583 * Preemption has been disabled by the
584 * lock acquisition attempt, so it's safe
585 * to use cpu_number.
586 */
587 void
588 usld_lock_try_post(
589 usimple_lock_t l,
590 pc_t pc)
591 {
592 register int mycpu;
593 char caller[] = "successful usimple_lock_try";
594
595 if (!usld_lock_common_checks(l, caller))
596 return;
597
598 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
599 panic("%s: lock 0x%x became uninitialized",
600 caller, (integer_t) l);
601 if ((l->debug.state & USLOCK_TAKEN))
602 panic("%s: lock 0x%x became TAKEN by someone else",
603 caller, (integer_t) l);
604
605 mycpu = cpu_number();
606 l->debug.lock_thread = (void *) current_thread();
607 l->debug.state |= USLOCK_TAKEN;
608 l->debug.lock_pc = pc;
609 l->debug.lock_cpu = mycpu;
610
611 usl_trace(l, mycpu, pc, caller);
612 }
613
614
615 /*
616 * For very special cases, set traced_lock to point to a
617 * specific lock of interest. The result is a series of
618 * XPRs showing lock operations on that lock. The lock_seq
619 * value is used to show the order of those operations.
620 */
621 usimple_lock_t traced_lock;
622 unsigned int lock_seq;
623
624 void
625 usl_trace(
626 usimple_lock_t l,
627 int mycpu,
628 pc_t pc,
629 const char * op_name)
630 {
631 if (traced_lock == l) {
632 XPR(XPR_SLOCK,
633 "seq %d, cpu %d, %s @ %x\n",
634 (integer_t) lock_seq, (integer_t) mycpu,
635 (integer_t) op_name, (integer_t) pc, 0);
636 lock_seq++;
637 }
638 }
639
640
641 #endif /* USLOCK_DEBUG */
642
643 /*
644 * Routine: lock_alloc
645 * Function:
646 * Allocate a lock for external users who cannot
647 * hard-code the structure definition into their
648 * objects.
649 * For now just use kalloc, but a zone is probably
650 * warranted.
651 */
652 lock_t *
653 lock_alloc(
654 boolean_t can_sleep,
655 unsigned short tag,
656 unsigned short tag1)
657 {
658 lock_t *l;
659
660 if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
661 lock_init(l, can_sleep, tag, tag1);
662 return(l);
663 }
664
665 /*
666 * Routine: lock_free
667 * Function:
668 * Free a lock allocated for external users.
669 * For now just use kfree, but a zone is probably
670 * warranted.
671 */
672 void
673 lock_free(
674 lock_t *l)
675 {
676 kfree(l, sizeof(lock_t));
677 }
678
679
680 /*
681 * Routine: lock_init
682 * Function:
683 * Initialize a lock; required before use.
684 * Note that clients declare the "struct lock"
685 * variables and then initialize them, rather
686 * than getting a new one from this module.
687 */
688 void
689 lock_init(
690 lock_t *l,
691 boolean_t can_sleep,
692 __unused unsigned short tag,
693 __unused unsigned short tag1)
694 {
695 hw_lock_byte_init(&l->lck_rw_interlock);
696 l->lck_rw_want_write = FALSE;
697 l->lck_rw_want_upgrade = FALSE;
698 l->lck_rw_shared_count = 0;
699 l->lck_rw_can_sleep = can_sleep;
700 l->lck_rw_tag = tag;
701 l->lck_rw_priv_excl = 1;
702 }
703
704
705 /*
706 * Sleep locks. These use the same data structure and algorithm
707 * as the spin locks, but the process sleeps while it is waiting
708 * for the lock. These work on uniprocessor systems.
709 */
710
711 #define DECREMENTER_TIMEOUT 1000000
712
713 void
714 lock_write(
715 register lock_t * l)
716 {
717 lck_rw_lock_exclusive(l);
718 }
719
720 void
721 lock_done(
722 register lock_t * l)
723 {
724 (void) lck_rw_done(l);
725 }
726
727 void
728 lock_read(
729 register lock_t * l)
730 {
731 lck_rw_lock_shared(l);
732 }
733
734
735 /*
736 * Routine: lock_read_to_write
737 * Function:
738 * Improves a read-only lock to one with
739 * write permission. If another reader has
740 * already requested an upgrade to a write lock,
741 * no lock is held upon return.
742 *
743 * Returns FALSE if the upgrade *failed*.
744 */
745
746 boolean_t
747 lock_read_to_write(
748 register lock_t * l)
749 {
750 return lck_rw_lock_shared_to_exclusive(l);
751 }
752
753 void
754 lock_write_to_read(
755 register lock_t * l)
756 {
757 lck_rw_lock_exclusive_to_shared(l);
758 }
759
760
761
762 /*
763 * Routine: lck_rw_alloc_init
764 */
765 lck_rw_t *
766 lck_rw_alloc_init(
767 lck_grp_t *grp,
768 lck_attr_t *attr) {
769 lck_rw_t *lck;
770
771 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
772 lck_rw_init(lck, grp, attr);
773
774 return(lck);
775 }
776
777 /*
778 * Routine: lck_rw_free
779 */
780 void
781 lck_rw_free(
782 lck_rw_t *lck,
783 lck_grp_t *grp) {
784 lck_rw_destroy(lck, grp);
785 kfree(lck, sizeof(lck_rw_t));
786 }
787
788 /*
789 * Routine: lck_rw_init
790 */
791 void
792 lck_rw_init(
793 lck_rw_t *lck,
794 lck_grp_t *grp,
795 lck_attr_t *attr)
796 {
797 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
798 attr : &LockDefaultLckAttr;
799
800 hw_lock_byte_init(&lck->lck_rw_interlock);
801 lck->lck_rw_want_write = FALSE;
802 lck->lck_rw_want_upgrade = FALSE;
803 lck->lck_rw_shared_count = 0;
804 lck->lck_rw_can_sleep = TRUE;
805 lck->lck_rw_tag = 0;
806 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
807 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
808
809 lck_grp_reference(grp);
810 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
811 }
812
813 /*
814 * Routine: lck_rw_destroy
815 */
816 void
817 lck_rw_destroy(
818 lck_rw_t *lck,
819 lck_grp_t *grp) {
820 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
821 return;
822 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
823 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
824 lck_grp_deallocate(grp);
825 return;
826 }
827
828 /*
829 * Sleep locks. These use the same data structure and algorithm
830 * as the spin locks, but the process sleeps while it is waiting
831 * for the lock. These work on uniprocessor systems.
832 */
833
834 #define DECREMENTER_TIMEOUT 1000000
835
836 #define RW_LOCK_READER_EVENT(x) \
837 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
838
839 #define RW_LOCK_WRITER_EVENT(x) \
840 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
841
842 /*
843 * We need to disable interrupts while holding the mutex interlock
844 * to prevent an IPI intervening.
845 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
846 */
847 static boolean_t
848 lck_interlock_lock(lck_rw_t *lck)
849 {
850 boolean_t istate;
851
852 istate = ml_set_interrupts_enabled(FALSE);
853 hw_lock_byte_lock(&lck->lck_rw_interlock);
854
855 return istate;
856 }
857
858 static void
859 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
860 {
861 hw_lock_byte_unlock(&lck->lck_rw_interlock);
862 ml_set_interrupts_enabled(istate);
863 }
864
865 /*
866 * This inline is used when busy-waiting for an rw lock.
867 * If interrupts were disabled when the lock primitive was called,
868 * we poll the IPI handler for pending tlb flushes.
869 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
870 */
871 static inline void
872 lck_rw_lock_pause(boolean_t interrupts_enabled)
873 {
874 if (!interrupts_enabled)
875 handle_pending_TLB_flushes();
876 cpu_pause();
877 }
878
879 /*
880 * Routine: lck_rw_lock_exclusive
881 */
882 void
883 lck_rw_lock_exclusive(
884 lck_rw_t *lck)
885 {
886 int i;
887 wait_result_t res;
888 #if MACH_LDEBUG
889 int decrementer;
890 #endif /* MACH_LDEBUG */
891 boolean_t istate;
892 #if CONFIG_DTRACE
893 uint64_t wait_interval = 0;
894 int slept = 0;
895 int readers_at_sleep;
896 #endif
897
898 istate = lck_interlock_lock(lck);
899 #if CONFIG_DTRACE
900 readers_at_sleep = lck->lck_rw_shared_count;
901 #endif
902
903 #if MACH_LDEBUG
904 decrementer = DECREMENTER_TIMEOUT;
905 #endif /* MACH_LDEBUG */
906
907 /*
908 * Try to acquire the lck_rw_want_write bit.
909 */
910 while (lck->lck_rw_want_write) {
911
912 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
913 /*
914 * Either sleeping or spinning is happening, start
915 * a timing of our delay interval now.
916 */
917 #if CONFIG_DTRACE
918 if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
919 wait_interval = mach_absolute_time();
920 } else {
921 wait_interval = -1;
922 }
923 #endif
924
925
926 i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];
927 if (i != 0) {
928 lck_interlock_unlock(lck, istate);
929 #if MACH_LDEBUG
930 if (!--decrementer)
931 Debugger("timeout - lck_rw_want_write");
932 #endif /* MACH_LDEBUG */
933 while (--i != 0 && lck->lck_rw_want_write)
934 lck_rw_lock_pause(istate);
935 istate = lck_interlock_lock(lck);
936 }
937
938 if (lck->lck_rw_can_sleep && lck->lck_rw_want_write) {
939 lck->lck_w_waiting = TRUE;
940 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
941 if (res == THREAD_WAITING) {
942 lck_interlock_unlock(lck, istate);
943 res = thread_block(THREAD_CONTINUE_NULL);
944 #if CONFIG_DTRACE
945 slept = 1;
946 #endif
947 istate = lck_interlock_lock(lck);
948 }
949 }
950 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0);
951 }
952 lck->lck_rw_want_write = TRUE;
953
954 /* Wait for readers (and upgrades) to finish */
955
956 #if MACH_LDEBUG
957 decrementer = DECREMENTER_TIMEOUT;
958 #endif /* MACH_LDEBUG */
959 while ((lck->lck_rw_shared_count != 0) || lck->lck_rw_want_upgrade) {
960
961 i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];
962
963 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
964 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, i, 0);
965
966 #if CONFIG_DTRACE
967 /*
968 * Either sleeping or spinning is happening, start
969 * a timing of our delay interval now. If we set it
970 * to -1 we don't have accurate data so we cannot later
971 * decide to record a dtrace spin or sleep event.
972 */
973 if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
974 wait_interval = mach_absolute_time();
975 } else {
976 wait_interval = (unsigned) -1;
977 }
978 #endif
979
980 if (i != 0) {
981 lck_interlock_unlock(lck, istate);
982 #if MACH_LDEBUG
983 if (!--decrementer)
984 Debugger("timeout - wait for readers");
985 #endif /* MACH_LDEBUG */
986 while (--i != 0 && (lck->lck_rw_shared_count != 0 ||
987 lck->lck_rw_want_upgrade))
988 lck_rw_lock_pause(istate);
989 istate = lck_interlock_lock(lck);
990 }
991
992 if (lck->lck_rw_can_sleep && (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade)) {
993 lck->lck_w_waiting = TRUE;
994 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
995 if (res == THREAD_WAITING) {
996 lck_interlock_unlock(lck, istate);
997 res = thread_block(THREAD_CONTINUE_NULL);
998 #if CONFIG_DTRACE
999 slept = 1;
1000 #endif
1001 istate = lck_interlock_lock(lck);
1002 }
1003 }
1004 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
1005 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, res, 0);
1006 }
1007
1008 lck_interlock_unlock(lck, istate);
1009 #if CONFIG_DTRACE
1010 /*
1011 * Decide what latencies we suffered that are Dtrace events.
1012 * If we have set wait_interval, then we either spun or slept.
1013 * At least we get out from under the interlock before we record
1014 * which is the best we can do here to minimize the impact
1015 * of the tracing.
1016 * If we have set wait_interval to -1, then dtrace was not enabled when we
1017 * started sleeping/spinning so we don't record this event.
1018 */
1019 if (wait_interval != 0 && wait_interval != (unsigned) -1) {
1020 if (slept == 0) {
1021 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1022 mach_absolute_time() - wait_interval, 1);
1023 } else {
1024 /*
1025 * For the blocking case, we also record if when we blocked
1026 * it was held for read or write, and how many readers.
1027 * Notice that above we recorded this before we dropped
1028 * the interlock so the count is accurate.
1029 */
1030 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1031 mach_absolute_time() - wait_interval, 1,
1032 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1033 }
1034 }
1035 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1036 #endif
1037 }
1038
1039
1040 /*
1041 * Routine: lck_rw_done_gen
1042 */
1043 lck_rw_type_t
1044 lck_rw_done_gen(
1045 lck_rw_t *lck)
1046 {
1047 boolean_t wakeup_readers = FALSE;
1048 boolean_t wakeup_writers = FALSE;
1049 lck_rw_type_t lck_rw_type;
1050 boolean_t istate;
1051
1052 istate = lck_interlock_lock(lck);
1053
1054 if (lck->lck_rw_shared_count != 0) {
1055 lck_rw_type = LCK_RW_TYPE_SHARED;
1056 lck->lck_rw_shared_count--;
1057 }
1058 else {
1059 lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1060 if (lck->lck_rw_want_upgrade)
1061 lck->lck_rw_want_upgrade = FALSE;
1062 else
1063 lck->lck_rw_want_write = FALSE;
1064 }
1065
1066 /*
1067 * There is no reason to wakeup a waiting thread
1068 * if the read-count is non-zero. Consider:
1069 * we must be dropping a read lock
1070 * threads are waiting only if one wants a write lock
1071 * if there are still readers, they can't proceed
1072 */
1073
1074 if (lck->lck_rw_shared_count == 0) {
1075 if (lck->lck_w_waiting) {
1076 lck->lck_w_waiting = FALSE;
1077 wakeup_writers = TRUE;
1078 }
1079 if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) &&
1080 lck->lck_r_waiting) {
1081 lck->lck_r_waiting = FALSE;
1082 wakeup_readers = TRUE;
1083 }
1084 }
1085
1086 lck_interlock_unlock(lck, istate);
1087
1088 if (wakeup_readers)
1089 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1090 if (wakeup_writers)
1091 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1092
1093 #if CONFIG_DTRACE
1094 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE ? 1 : 0));
1095 #endif
1096
1097 return(lck_rw_type);
1098 }
1099
1100
1101
1102
1103 /*
1104 * Routine: lck_rw_unlock
1105 */
1106 void
1107 lck_rw_unlock(
1108 lck_rw_t *lck,
1109 lck_rw_type_t lck_rw_type)
1110 {
1111 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1112 lck_rw_unlock_shared(lck);
1113 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1114 lck_rw_unlock_exclusive(lck);
1115 else
1116 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1117 }
1118
1119
1120 /*
1121 * Routine: lck_rw_unlock_shared
1122 */
1123 void
1124 lck_rw_unlock_shared(
1125 lck_rw_t *lck)
1126 {
1127 lck_rw_type_t ret;
1128
1129 ret = lck_rw_done(lck);
1130
1131 if (ret != LCK_RW_TYPE_SHARED)
1132 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
1133 }
1134
1135
1136 /*
1137 * Routine: lck_rw_unlock_exclusive
1138 */
1139 void
1140 lck_rw_unlock_exclusive(
1141 lck_rw_t *lck)
1142 {
1143 lck_rw_type_t ret;
1144
1145 ret = lck_rw_done(lck);
1146
1147 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1148 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1149 }
1150
1151
1152 /*
1153 * Routine: lck_rw_lock
1154 */
1155 void
1156 lck_rw_lock(
1157 lck_rw_t *lck,
1158 lck_rw_type_t lck_rw_type)
1159 {
1160 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1161 lck_rw_lock_shared(lck);
1162 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1163 lck_rw_lock_exclusive(lck);
1164 else
1165 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1166 }
1167
1168
1169 /*
1170 * Routine: lck_rw_lock_shared_gen
1171 */
1172 void
1173 lck_rw_lock_shared_gen(
1174 lck_rw_t *lck)
1175 {
1176 int i;
1177 wait_result_t res;
1178 #if MACH_LDEBUG
1179 int decrementer;
1180 #endif /* MACH_LDEBUG */
1181 boolean_t istate;
1182 #if CONFIG_DTRACE
1183 uint64_t wait_interval = 0;
1184 int slept = 0;
1185 int readers_at_sleep;
1186 #endif
1187
1188 istate = lck_interlock_lock(lck);
1189 #if CONFIG_DTRACE
1190 readers_at_sleep = lck->lck_rw_shared_count;
1191 #endif
1192
1193 #if MACH_LDEBUG
1194 decrementer = DECREMENTER_TIMEOUT;
1195 #endif /* MACH_LDEBUG */
1196 while ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1197 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1198
1199 i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];
1200
1201 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
1202 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, i, 0);
1203 #if CONFIG_DTRACE
1204 if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) {
1205 wait_interval = mach_absolute_time();
1206 } else {
1207 wait_interval = -1;
1208 }
1209 #endif
1210
1211 if (i != 0) {
1212 lck_interlock_unlock(lck, istate);
1213 #if MACH_LDEBUG
1214 if (!--decrementer)
1215 Debugger("timeout - wait no writers");
1216 #endif /* MACH_LDEBUG */
1217 while (--i != 0 &&
1218 (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1219 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl))
1220 lck_rw_lock_pause(istate);
1221 istate = lck_interlock_lock(lck);
1222 }
1223
1224 if (lck->lck_rw_can_sleep &&
1225 (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1226 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1227 lck->lck_r_waiting = TRUE;
1228 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
1229 if (res == THREAD_WAITING) {
1230 lck_interlock_unlock(lck, istate);
1231 res = thread_block(THREAD_CONTINUE_NULL);
1232 #if CONFIG_DTRACE
1233 slept = 1;
1234 #endif
1235 istate = lck_interlock_lock(lck);
1236 }
1237 }
1238 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
1239 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, res, 0);
1240 }
1241
1242 lck->lck_rw_shared_count++;
1243
1244 lck_interlock_unlock(lck, istate);
1245 #if CONFIG_DTRACE
1246 if (wait_interval != 0 && wait_interval != (unsigned) -1) {
1247 if (slept == 0) {
1248 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1249 } else {
1250 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1251 mach_absolute_time() - wait_interval, 0,
1252 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1253 }
1254 }
1255 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1256 #endif
1257 }
1258
1259
1260 /*
1261 * Routine: lck_rw_lock_shared_to_exclusive
1262 * Function:
1263 * Improves a read-only lock to one with
1264 * write permission. If another reader has
1265 * already requested an upgrade to a write lock,
1266 * no lock is held upon return.
1267 *
1268 * Returns FALSE if the upgrade *failed*.
1269 */
1270
1271 boolean_t
1272 lck_rw_lock_shared_to_exclusive(
1273 lck_rw_t *lck)
1274 {
1275 int i;
1276 boolean_t do_wakeup = FALSE;
1277 wait_result_t res;
1278 #if MACH_LDEBUG
1279 int decrementer;
1280 #endif /* MACH_LDEBUG */
1281 boolean_t istate;
1282 #if CONFIG_DTRACE
1283 uint64_t wait_interval = 0;
1284 int slept = 0;
1285 int readers_at_sleep = 0;
1286 #endif
1287
1288 istate = lck_interlock_lock(lck);
1289
1290 lck->lck_rw_shared_count--;
1291
1292 if (lck->lck_rw_want_upgrade) {
1293 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
1294 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1295
1296 /*
1297 * Someone else has requested upgrade.
1298 * Since we've released a read lock, wake
1299 * him up.
1300 */
1301 if (lck->lck_w_waiting && (lck->lck_rw_shared_count == 0)) {
1302 lck->lck_w_waiting = FALSE;
1303 do_wakeup = TRUE;
1304 }
1305
1306 lck_interlock_unlock(lck, istate);
1307
1308 if (do_wakeup)
1309 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1310
1311 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
1312 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1313
1314 return (FALSE);
1315 }
1316
1317 lck->lck_rw_want_upgrade = TRUE;
1318
1319 #if MACH_LDEBUG
1320 decrementer = DECREMENTER_TIMEOUT;
1321 #endif /* MACH_LDEBUG */
1322 while (lck->lck_rw_shared_count != 0) {
1323 #if CONFIG_DTRACE
1324 if (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] && wait_interval == 0) {
1325 wait_interval = mach_absolute_time();
1326 readers_at_sleep = lck->lck_rw_shared_count;
1327 } else {
1328 wait_interval = -1;
1329 }
1330 #endif
1331 i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];
1332
1333 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
1334 (int)lck, lck->lck_rw_shared_count, i, 0, 0);
1335
1336 if (i != 0) {
1337 lck_interlock_unlock(lck, istate);
1338 #if MACH_LDEBUG
1339 if (!--decrementer)
1340 Debugger("timeout - lck_rw_shared_count");
1341 #endif /* MACH_LDEBUG */
1342 while (--i != 0 && lck->lck_rw_shared_count != 0)
1343 lck_rw_lock_pause(istate);
1344 istate = lck_interlock_lock(lck);
1345 }
1346
1347 if (lck->lck_rw_can_sleep && lck->lck_rw_shared_count != 0) {
1348 lck->lck_w_waiting = TRUE;
1349 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1350 if (res == THREAD_WAITING) {
1351 lck_interlock_unlock(lck, istate);
1352 res = thread_block(THREAD_CONTINUE_NULL);
1353 #if CONFIG_DTRACE
1354 slept = 1;
1355 #endif
1356 istate = lck_interlock_lock(lck);
1357 }
1358 }
1359 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
1360 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1361 }
1362
1363 lck_interlock_unlock(lck, istate);
1364 #if CONFIG_DTRACE
1365 /*
1366 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1367 */
1368 if (wait_interval != 0 && wait_interval != (unsigned) -1 && readers_at_sleep) {
1369 if (slept == 0) {
1370 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1371 } else {
1372 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1373 mach_absolute_time() - wait_interval, 1,
1374 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1375 }
1376 }
1377
1378 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1379 #endif
1380 return (TRUE);
1381 }
1382
1383 /*
1384 * Routine: lck_rw_lock_exclusive_to_shared
1385 */
1386 void
1387 lck_rw_lock_exclusive_to_shared(
1388 lck_rw_t *lck)
1389 {
1390 boolean_t wakeup_readers = FALSE;
1391 boolean_t wakeup_writers = FALSE;
1392 boolean_t istate;
1393
1394 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1395 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1396
1397 istate = lck_interlock_lock(lck);
1398
1399 lck->lck_rw_shared_count++;
1400 if (lck->lck_rw_want_upgrade)
1401 lck->lck_rw_want_upgrade = FALSE;
1402 else
1403 lck->lck_rw_want_write = FALSE;
1404
1405 if (lck->lck_w_waiting) {
1406 lck->lck_w_waiting = FALSE;
1407 wakeup_writers = TRUE;
1408 }
1409 if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) &&
1410 lck->lck_r_waiting) {
1411 lck->lck_r_waiting = FALSE;
1412 wakeup_readers = TRUE;
1413 }
1414
1415 lck_interlock_unlock(lck, istate);
1416
1417 if (wakeup_readers)
1418 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1419 if (wakeup_writers)
1420 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1421
1422 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1423 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1424
1425 #if CONFIG_DTRACE
1426 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1427 #endif
1428 }
1429
1430
1431 /*
1432 * Routine: lck_rw_try_lock
1433 */
1434 boolean_t
1435 lck_rw_try_lock(
1436 lck_rw_t *lck,
1437 lck_rw_type_t lck_rw_type)
1438 {
1439 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1440 return(lck_rw_try_lock_shared(lck));
1441 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1442 return(lck_rw_try_lock_exclusive(lck));
1443 else
1444 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1445 return(FALSE);
1446 }
1447
1448 /*
1449 * Routine: lck_rw_try_lock_exclusive
1450 * Function:
1451 * Tries to get a write lock.
1452 *
1453 * Returns FALSE if the lock is not held on return.
1454 */
1455
1456 boolean_t
1457 lck_rw_try_lock_exclusive(
1458 lck_rw_t *lck)
1459 {
1460 boolean_t istate;
1461
1462 istate = lck_interlock_lock(lck);
1463
1464 if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || lck->lck_rw_shared_count) {
1465 /*
1466 * Can't get lock.
1467 */
1468 lck_interlock_unlock(lck, istate);
1469 return(FALSE);
1470 }
1471
1472 /*
1473 * Have lock.
1474 */
1475
1476 lck->lck_rw_want_write = TRUE;
1477
1478 lck_interlock_unlock(lck, istate);
1479
1480 #if CONFIG_DTRACE
1481 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1);
1482 #endif
1483 return(TRUE);
1484 }
1485
1486 /*
1487 * Routine: lck_rw_try_lock_shared
1488 * Function:
1489 * Tries to get a read lock.
1490 *
1491 * Returns FALSE if the lock is not held on return.
1492 */
1493
1494 boolean_t
1495 lck_rw_try_lock_shared(
1496 lck_rw_t *lck)
1497 {
1498 boolean_t istate;
1499
1500 istate = lck_interlock_lock(lck);
1501 /* No reader priority check here... */
1502 if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) {
1503 lck_interlock_unlock(lck, istate);
1504 return(FALSE);
1505 }
1506
1507 lck->lck_rw_shared_count++;
1508
1509 lck_interlock_unlock(lck, istate);
1510
1511 #if CONFIG_DTRACE
1512 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0);
1513 #endif
1514 return(TRUE);
1515 }
1516
1517 void
1518 lck_rw_assert(
1519 lck_rw_t *lck,
1520 unsigned int type)
1521 {
1522 switch (type) {
1523 case LCK_RW_ASSERT_SHARED:
1524 if (lck->lck_rw_shared_count != 0) {
1525 return;
1526 }
1527 break;
1528 case LCK_RW_ASSERT_EXCLUSIVE:
1529 if ((lck->lck_rw_want_write ||
1530 lck->lck_rw_want_upgrade) &&
1531 lck->lck_rw_shared_count == 0) {
1532 return;
1533 }
1534 break;
1535 case LCK_RW_ASSERT_HELD:
1536 if (lck->lck_rw_want_write ||
1537 lck->lck_rw_want_upgrade ||
1538 lck->lck_rw_shared_count != 0) {
1539 return;
1540 }
1541 break;
1542 default:
1543 break;
1544 }
1545
1546 panic("rw lock (%p) not held (mode=%u)\n", lck, type);
1547 }
1548
1549 /*
1550 * Routine: lck_mtx_alloc_init
1551 */
1552 lck_mtx_t *
1553 lck_mtx_alloc_init(
1554 lck_grp_t *grp,
1555 lck_attr_t *attr)
1556 {
1557 lck_mtx_t *lck;
1558
1559 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
1560 lck_mtx_init(lck, grp, attr);
1561
1562 return(lck);
1563 }
1564
1565 /*
1566 * Routine: lck_mtx_free
1567 */
1568 void
1569 lck_mtx_free(
1570 lck_mtx_t *lck,
1571 lck_grp_t *grp)
1572 {
1573 lck_mtx_destroy(lck, grp);
1574 kfree(lck, sizeof(lck_mtx_t));
1575 }
1576
1577 /*
1578 * Routine: lck_mtx_ext_init
1579 */
1580 static void
1581 lck_mtx_ext_init(
1582 lck_mtx_ext_t *lck,
1583 lck_grp_t *grp,
1584 lck_attr_t *attr)
1585 {
1586 bzero((void *)lck, sizeof(lck_mtx_ext_t));
1587
1588 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1589 lck->lck_mtx_deb.type = MUTEX_TAG;
1590 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1591 }
1592
1593 lck->lck_mtx_grp = grp;
1594
1595 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
1596 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
1597 }
1598
1599 /*
1600 * Routine: lck_mtx_init
1601 */
1602 void
1603 lck_mtx_init(
1604 lck_mtx_t *lck,
1605 lck_grp_t *grp,
1606 lck_attr_t *attr)
1607 {
1608 lck_mtx_ext_t *lck_ext;
1609 lck_attr_t *lck_attr;
1610
1611 if (attr != LCK_ATTR_NULL)
1612 lck_attr = attr;
1613 else
1614 lck_attr = &LockDefaultLckAttr;
1615
1616 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1617 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
1618 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1619 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1620 lck->lck_mtx_ptr = lck_ext;
1621 }
1622 } else {
1623 lck->lck_mtx_ilk = 0;
1624 lck->lck_mtx_locked = 0;
1625 lck->lck_mtx_waiters = 0;
1626 lck->lck_mtx_pri = 0;
1627 }
1628 lck_grp_reference(grp);
1629 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1630 }
1631
1632 /*
1633 * Routine: lck_mtx_init_ext
1634 */
1635 void
1636 lck_mtx_init_ext(
1637 lck_mtx_t *lck,
1638 lck_mtx_ext_t *lck_ext,
1639 lck_grp_t *grp,
1640 lck_attr_t *attr)
1641 {
1642 lck_attr_t *lck_attr;
1643
1644 if (attr != LCK_ATTR_NULL)
1645 lck_attr = attr;
1646 else
1647 lck_attr = &LockDefaultLckAttr;
1648
1649 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1650 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1651 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1652 lck->lck_mtx_ptr = lck_ext;
1653 } else {
1654 lck->lck_mtx_ilk = 0;
1655 lck->lck_mtx_locked = 0;
1656 lck->lck_mtx_waiters = 0;
1657 lck->lck_mtx_pri = 0;
1658 }
1659 lck_grp_reference(grp);
1660 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1661 }
1662
1663 /*
1664 * Routine: lck_mtx_destroy
1665 */
1666 void
1667 lck_mtx_destroy(
1668 lck_mtx_t *lck,
1669 lck_grp_t *grp)
1670 {
1671 boolean_t lck_is_indirect;
1672
1673 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
1674 return;
1675 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
1676 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
1677 if (lck_is_indirect)
1678 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
1679 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1680 lck_grp_deallocate(grp);
1681 return;
1682 }
1683
1684 /*
1685 * Routine: lck_mtx_lock_spinwait
1686 *
1687 * Invoked trying to acquire a mutex when there is contention but
1688 * the holder is running on another processor. We spin for up to a maximum
1689 * time waiting for the lock to be released.
1690 *
1691 * Called with the interlock unlocked.
1692 */
1693 void
1694 lck_mtx_lock_spinwait(
1695 lck_mtx_t *lck)
1696 {
1697 thread_t holder;
1698 volatile lck_mtx_t *mutex;
1699 uint64_t deadline;
1700
1701 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1702 mutex = lck;
1703 else
1704 mutex = &lck->lck_mtx_ptr->lck_mtx;
1705
1706 KERNEL_DEBUG(
1707 MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN) | DBG_FUNC_NONE,
1708 (int)lck, (int)mutex->lck_mtx_locked, 0, 0, 0);
1709
1710 deadline = mach_absolute_time() + MutexSpin;
1711 /*
1712 * Spin while:
1713 * - mutex is locked, and
1714 * - its locked as a spin lock, or
1715 * - owner is running on another processor, and
1716 * - owner (processor) is not idling, and
1717 * - we haven't spun for long enough.
1718 */
1719 while ((holder = (thread_t) mutex->lck_mtx_locked) != NULL) {
1720 if ((holder == (thread_t)MUTEX_LOCKED_AS_SPIN) ||
1721 ((holder->machine.specFlags & OnProc) != 0 &&
1722 (holder->state & TH_IDLE) == 0 &&
1723 mach_absolute_time() < deadline)) {
1724 cpu_pause();
1725 continue;
1726 }
1727 break;
1728 }
1729 #if CONFIG_DTRACE
1730 /*
1731 * We've already kept a count via deadline of how long we spun.
1732 * If dtrace is active, then we compute backwards to decide how
1733 * long we spun.
1734 *
1735 * Note that we record a different probe id depending on whether
1736 * this is a direct or indirect mutex. This allows us to
1737 * penalize only lock groups that have debug/stats enabled
1738 * with dtrace processing if desired.
1739 */
1740 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1741 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lck,
1742 mach_absolute_time() - (deadline - MutexSpin));
1743 } else {
1744 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lck,
1745 mach_absolute_time() - (deadline - MutexSpin));
1746 }
1747 /* The lockstat acquire event is recorded by the assembly code beneath us. */
1748 #endif
1749 }
1750
1751 /*
1752 * Called from assembly code when a destroyed mutex is detected
1753 * during a lock/unlock/try/convert
1754 */
1755
1756 void
1757 lck_mtx_interlock_panic(
1758 lck_mtx_t *lck)
1759 {
1760 panic("trying to interlock destroyed mutex %p", lck);
1761 }
1762
1763
1764 #if MACH_KDB
1765
1766 void
1767 db_show_one_lock(
1768 lock_t *lock)
1769 {
1770 db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
1771 lock->lck_rw_shared_count,
1772 lock->lck_rw_want_upgrade ? "" : "!",
1773 lock->lck_rw_want_write ? "" : "!");
1774 db_printf("%swaiting, %scan_sleep\n",
1775 (lock->lck_r_waiting || lock->lck_w_waiting) ? "" : "!",
1776 lock->lck_rw_can_sleep ? "" : "!");
1777 db_printf("Interlock:\n");
1778 db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->lck_rw_interlock)),
1779 TRUE, (db_expr_t)0, (char *)0);
1780 }
1781
1782 #endif /* MACH_KDB */
1783
1784 /*
1785 * The C portion of the mutex package. These routines are only invoked
1786 * if the optimized assembler routines can't do the work.
1787 */
1788
1789 /*
1790 * Routine: lock_alloc
1791 * Function:
1792 * Allocate a mutex for external users who cannot
1793 * hard-code the structure definition into their
1794 * objects.
1795 * For now just use kalloc, but a zone is probably
1796 * warranted.
1797 */
1798 mutex_t *
1799 mutex_alloc(
1800 unsigned short tag)
1801 {
1802 mutex_t *m;
1803
1804 if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0)
1805 mutex_init(m, tag);
1806 return(m);
1807 }
1808
1809 /*
1810 * Routine: mutex_free
1811 * Function:
1812 * Free a mutex allocated for external users.
1813 * For now just use kfree, but a zone is probably
1814 * warranted.
1815 */
1816 void
1817 mutex_free(
1818 mutex_t *m)
1819 {
1820 kfree(m, sizeof(mutex_t));
1821 }
1822
1823
1824 #if MACH_KDB
1825 /*
1826 * Routines to print out simple_locks and mutexes in a nicely-formatted
1827 * fashion.
1828 */
1829
1830 const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER";
1831 const char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER";
1832
1833 void
1834 db_show_one_simple_lock (
1835 db_expr_t addr,
1836 boolean_t have_addr,
1837 __unused db_expr_t count,
1838 __unused char * modif)
1839 {
1840 simple_lock_t saddr = (simple_lock_t) ((vm_offset_t) addr);
1841
1842 if (saddr == (simple_lock_t)0 || !have_addr) {
1843 db_error ("No simple_lock\n");
1844 }
1845 #if USLOCK_DEBUG
1846 else if (saddr->lock_type != USLOCK_TAG)
1847 db_error ("Not a simple_lock\n");
1848 #endif /* USLOCK_DEBUG */
1849
1850 db_printf ("%s\n", simple_lock_labels);
1851 db_print_simple_lock (saddr);
1852 }
1853
1854 void
1855 db_print_simple_lock (
1856 simple_lock_t addr)
1857 {
1858
1859 db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
1860 #if USLOCK_DEBUG
1861 db_printf (" %08x", addr->debug.lock_thread);
1862 db_printf (" %08x ", addr->debug.duration[1]);
1863 db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
1864 #endif /* USLOCK_DEBUG */
1865 db_printf ("\n");
1866 }
1867
1868 void
1869 db_show_one_mutex (
1870 db_expr_t addr,
1871 boolean_t have_addr,
1872 __unused db_expr_t count,
1873 __unused char * modif)
1874 {
1875 mutex_t * maddr = (mutex_t *)((vm_offset_t) addr);
1876
1877 if (maddr == (mutex_t *)0 || !have_addr)
1878 db_error ("No mutex\n");
1879 #if MACH_LDEBUG
1880 else if (maddr->type != MUTEX_TAG)
1881 db_error ("Not a mutex\n");
1882 #endif /* MACH_LDEBUG */
1883
1884 db_printf ("%s\n", mutex_labels);
1885 db_print_mutex (maddr);
1886 }
1887
1888 void
1889 db_print_mutex (
1890 mutex_t * addr)
1891 {
1892 db_printf ("%08x %6d %7d",
1893 addr, *addr, addr->lck_mtx.lck_mtx_waiters);
1894 #if MACH_LDEBUG
1895 db_printf (" %08x ", addr->thread);
1896 db_printsym (addr->pc, DB_STGY_ANY);
1897 #endif /* MACH_LDEBUG */
1898 db_printf ("\n");
1899 }
1900
1901 #endif /* MACH_KDB */