]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/locks.c
xnu-3247.1.106.tar.gz
[apple/xnu.git] / osfmk / kern / locks.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #include <mach_ldebug.h>
57 #include <debug.h>
58
59 #include <mach/kern_return.h>
60 #include <mach/mach_host_server.h>
61 #include <mach_debug/lockgroup_info.h>
62
63 #include <kern/locks.h>
64 #include <kern/misc_protos.h>
65 #include <kern/kalloc.h>
66 #include <kern/thread.h>
67 #include <kern/processor.h>
68 #include <kern/sched_prim.h>
69 #include <kern/debug.h>
70 #include <string.h>
71
72
73 #include <sys/kdebug.h>
74
75 #if CONFIG_DTRACE
76 /*
77 * We need only enough declarations from the BSD-side to be able to
78 * test if our probe is active, and to call __dtrace_probe(). Setting
79 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
80 */
81 #define NEED_DTRACE_DEFS
82 #include <../bsd/sys/lockstat.h>
83 #endif
84
85 #define LCK_MTX_SLEEP_CODE 0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
87 #define LCK_MTX_LCK_WAIT_CODE 2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
89
90 static queue_head_t lck_grp_queue;
91 static unsigned int lck_grp_cnt;
92
93 decl_lck_mtx_data(static,lck_grp_lock)
94 static lck_mtx_ext_t lck_grp_lock_ext;
95
96 lck_grp_attr_t LockDefaultGroupAttr;
97 lck_grp_t LockCompatGroup;
98 lck_attr_t LockDefaultLckAttr;
99
100 /*
101 * Routine: lck_mod_init
102 */
103
104 void
105 lck_mod_init(
106 void)
107 {
108 /*
109 * Obtain "lcks" options:this currently controls lock statistics
110 */
111 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
112 LcksOpts = 0;
113
114 queue_init(&lck_grp_queue);
115
116 /*
117 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
118 * grabbing the lck_grp_lock before it is initialized.
119 */
120
121 bzero(&LockCompatGroup, sizeof(lck_grp_t));
122 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
123
124 if (LcksOpts & enaLkStat)
125 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
126 else
127 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
128
129 LockCompatGroup.lck_grp_refcnt = 1;
130
131 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
132 lck_grp_cnt = 1;
133
134 lck_grp_attr_setdefault(&LockDefaultGroupAttr);
135 lck_attr_setdefault(&LockDefaultLckAttr);
136
137 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
138
139 }
140
141 /*
142 * Routine: lck_grp_attr_alloc_init
143 */
144
145 lck_grp_attr_t *
146 lck_grp_attr_alloc_init(
147 void)
148 {
149 lck_grp_attr_t *attr;
150
151 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0)
152 lck_grp_attr_setdefault(attr);
153
154 return(attr);
155 }
156
157
158 /*
159 * Routine: lck_grp_attr_setdefault
160 */
161
162 void
163 lck_grp_attr_setdefault(
164 lck_grp_attr_t *attr)
165 {
166 if (LcksOpts & enaLkStat)
167 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
168 else
169 attr->grp_attr_val = 0;
170 }
171
172
173 /*
174 * Routine: lck_grp_attr_setstat
175 */
176
177 void
178 lck_grp_attr_setstat(
179 lck_grp_attr_t *attr)
180 {
181 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
182 }
183
184
185 /*
186 * Routine: lck_grp_attr_free
187 */
188
189 void
190 lck_grp_attr_free(
191 lck_grp_attr_t *attr)
192 {
193 kfree(attr, sizeof(lck_grp_attr_t));
194 }
195
196
197 /*
198 * Routine: lck_grp_alloc_init
199 */
200
201 lck_grp_t *
202 lck_grp_alloc_init(
203 const char* grp_name,
204 lck_grp_attr_t *attr)
205 {
206 lck_grp_t *grp;
207
208 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0)
209 lck_grp_init(grp, grp_name, attr);
210
211 return(grp);
212 }
213
214 /*
215 * Routine: lck_grp_init
216 */
217
218 void
219 lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
220 {
221 bzero((void *)grp, sizeof(lck_grp_t));
222
223 (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
224
225 if (attr != LCK_GRP_ATTR_NULL)
226 grp->lck_grp_attr = attr->grp_attr_val;
227 else if (LcksOpts & enaLkStat)
228 grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
229 else
230 grp->lck_grp_attr = LCK_ATTR_NONE;
231
232 grp->lck_grp_refcnt = 1;
233
234 lck_mtx_lock(&lck_grp_lock);
235 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
236 lck_grp_cnt++;
237 lck_mtx_unlock(&lck_grp_lock);
238 }
239
240 /*
241 * Routine: lck_grp_free
242 */
243
244 void
245 lck_grp_free(
246 lck_grp_t *grp)
247 {
248 lck_mtx_lock(&lck_grp_lock);
249 lck_grp_cnt--;
250 (void)remque((queue_entry_t)grp);
251 lck_mtx_unlock(&lck_grp_lock);
252 lck_grp_deallocate(grp);
253 }
254
255
256 /*
257 * Routine: lck_grp_reference
258 */
259
260 void
261 lck_grp_reference(
262 lck_grp_t *grp)
263 {
264 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1);
265 }
266
267
268 /*
269 * Routine: lck_grp_deallocate
270 */
271
272 void
273 lck_grp_deallocate(
274 lck_grp_t *grp)
275 {
276 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0)
277 kfree(grp, sizeof(lck_grp_t));
278 }
279
280 /*
281 * Routine: lck_grp_lckcnt_incr
282 */
283
284 void
285 lck_grp_lckcnt_incr(
286 lck_grp_t *grp,
287 lck_type_t lck_type)
288 {
289 unsigned int *lckcnt;
290
291 switch (lck_type) {
292 case LCK_TYPE_SPIN:
293 lckcnt = &grp->lck_grp_spincnt;
294 break;
295 case LCK_TYPE_MTX:
296 lckcnt = &grp->lck_grp_mtxcnt;
297 break;
298 case LCK_TYPE_RW:
299 lckcnt = &grp->lck_grp_rwcnt;
300 break;
301 default:
302 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
303 }
304
305 (void)hw_atomic_add(lckcnt, 1);
306 }
307
308 /*
309 * Routine: lck_grp_lckcnt_decr
310 */
311
312 void
313 lck_grp_lckcnt_decr(
314 lck_grp_t *grp,
315 lck_type_t lck_type)
316 {
317 unsigned int *lckcnt;
318
319 switch (lck_type) {
320 case LCK_TYPE_SPIN:
321 lckcnt = &grp->lck_grp_spincnt;
322 break;
323 case LCK_TYPE_MTX:
324 lckcnt = &grp->lck_grp_mtxcnt;
325 break;
326 case LCK_TYPE_RW:
327 lckcnt = &grp->lck_grp_rwcnt;
328 break;
329 default:
330 return panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
331 }
332
333 (void)hw_atomic_sub(lckcnt, 1);
334 }
335
336 /*
337 * Routine: lck_attr_alloc_init
338 */
339
340 lck_attr_t *
341 lck_attr_alloc_init(
342 void)
343 {
344 lck_attr_t *attr;
345
346 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0)
347 lck_attr_setdefault(attr);
348
349 return(attr);
350 }
351
352
353 /*
354 * Routine: lck_attr_setdefault
355 */
356
357 void
358 lck_attr_setdefault(
359 lck_attr_t *attr)
360 {
361 #if __i386__ || __x86_64__
362 #if !DEBUG
363 if (LcksOpts & enaLkDeb)
364 attr->lck_attr_val = LCK_ATTR_DEBUG;
365 else
366 attr->lck_attr_val = LCK_ATTR_NONE;
367 #else
368 attr->lck_attr_val = LCK_ATTR_DEBUG;
369 #endif /* !DEBUG */
370 #else
371 #error Unknown architecture.
372 #endif /* __arm__ */
373 }
374
375
376 /*
377 * Routine: lck_attr_setdebug
378 */
379 void
380 lck_attr_setdebug(
381 lck_attr_t *attr)
382 {
383 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
384 }
385
386 /*
387 * Routine: lck_attr_setdebug
388 */
389 void
390 lck_attr_cleardebug(
391 lck_attr_t *attr)
392 {
393 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
394 }
395
396
397 /*
398 * Routine: lck_attr_rw_shared_priority
399 */
400 void
401 lck_attr_rw_shared_priority(
402 lck_attr_t *attr)
403 {
404 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
405 }
406
407
408 /*
409 * Routine: lck_attr_free
410 */
411 void
412 lck_attr_free(
413 lck_attr_t *attr)
414 {
415 kfree(attr, sizeof(lck_attr_t));
416 }
417
418
419 /*
420 * Routine: lck_spin_sleep
421 */
422 wait_result_t
423 lck_spin_sleep(
424 lck_spin_t *lck,
425 lck_sleep_action_t lck_sleep_action,
426 event_t event,
427 wait_interrupt_t interruptible)
428 {
429 wait_result_t res;
430
431 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
432 panic("Invalid lock sleep action %x\n", lck_sleep_action);
433
434 res = assert_wait(event, interruptible);
435 if (res == THREAD_WAITING) {
436 lck_spin_unlock(lck);
437 res = thread_block(THREAD_CONTINUE_NULL);
438 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
439 lck_spin_lock(lck);
440 }
441 else
442 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
443 lck_spin_unlock(lck);
444
445 return res;
446 }
447
448
449 /*
450 * Routine: lck_spin_sleep_deadline
451 */
452 wait_result_t
453 lck_spin_sleep_deadline(
454 lck_spin_t *lck,
455 lck_sleep_action_t lck_sleep_action,
456 event_t event,
457 wait_interrupt_t interruptible,
458 uint64_t deadline)
459 {
460 wait_result_t res;
461
462 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
463 panic("Invalid lock sleep action %x\n", lck_sleep_action);
464
465 res = assert_wait_deadline(event, interruptible, deadline);
466 if (res == THREAD_WAITING) {
467 lck_spin_unlock(lck);
468 res = thread_block(THREAD_CONTINUE_NULL);
469 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
470 lck_spin_lock(lck);
471 }
472 else
473 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
474 lck_spin_unlock(lck);
475
476 return res;
477 }
478
479
480 /*
481 * Routine: lck_mtx_clear_promoted
482 *
483 * Handle clearing of TH_SFLAG_PROMOTED,
484 * adjusting thread priority as needed.
485 *
486 * Called with thread lock held
487 */
488 static void
489 lck_mtx_clear_promoted (
490 thread_t thread,
491 __kdebug_only uintptr_t trace_lck)
492 {
493 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
494
495 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
496 /* Thread still has a RW lock promotion */
497 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
498 KERNEL_DEBUG_CONSTANT(
499 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
500 thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
501 set_sched_pri(thread, DEPRESSPRI);
502 } else {
503 if (thread->base_pri < thread->sched_pri) {
504 KERNEL_DEBUG_CONSTANT(
505 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
506 thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
507 }
508 thread_recompute_sched_pri(thread, FALSE);
509 }
510 }
511
512
513 /*
514 * Routine: lck_mtx_sleep
515 */
516 wait_result_t
517 lck_mtx_sleep(
518 lck_mtx_t *lck,
519 lck_sleep_action_t lck_sleep_action,
520 event_t event,
521 wait_interrupt_t interruptible)
522 {
523 wait_result_t res;
524 thread_t thread = current_thread();
525
526 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
527 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
528
529 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
530 panic("Invalid lock sleep action %x\n", lck_sleep_action);
531
532 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
533 /*
534 * We overload the RW lock promotion to give us a priority ceiling
535 * during the time that this thread is asleep, so that when it
536 * is re-awakened (and not yet contending on the mutex), it is
537 * runnable at a reasonably high priority.
538 */
539 thread->rwlock_count++;
540 }
541
542 res = assert_wait(event, interruptible);
543 if (res == THREAD_WAITING) {
544 lck_mtx_unlock(lck);
545 res = thread_block(THREAD_CONTINUE_NULL);
546 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
547 if ((lck_sleep_action & LCK_SLEEP_SPIN))
548 lck_mtx_lock_spin(lck);
549 else
550 lck_mtx_lock(lck);
551 }
552 }
553 else
554 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
555 lck_mtx_unlock(lck);
556
557 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
558 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
559 /* sched_flags checked without lock, but will be rechecked while clearing */
560 lck_rw_clear_promotion(thread);
561 }
562 }
563
564 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
565
566 return res;
567 }
568
569
570 /*
571 * Routine: lck_mtx_sleep_deadline
572 */
573 wait_result_t
574 lck_mtx_sleep_deadline(
575 lck_mtx_t *lck,
576 lck_sleep_action_t lck_sleep_action,
577 event_t event,
578 wait_interrupt_t interruptible,
579 uint64_t deadline)
580 {
581 wait_result_t res;
582 thread_t thread = current_thread();
583
584 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
585 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
586
587 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
588 panic("Invalid lock sleep action %x\n", lck_sleep_action);
589
590 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
591 /*
592 * See lck_mtx_sleep().
593 */
594 thread->rwlock_count++;
595 }
596
597 res = assert_wait_deadline(event, interruptible, deadline);
598 if (res == THREAD_WAITING) {
599 lck_mtx_unlock(lck);
600 res = thread_block(THREAD_CONTINUE_NULL);
601 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
602 if ((lck_sleep_action & LCK_SLEEP_SPIN))
603 lck_mtx_lock_spin(lck);
604 else
605 lck_mtx_lock(lck);
606 }
607 }
608 else
609 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
610 lck_mtx_unlock(lck);
611
612 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
613 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
614 /* sched_flags checked without lock, but will be rechecked while clearing */
615 lck_rw_clear_promotion(thread);
616 }
617 }
618
619 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
620
621 return res;
622 }
623
624 /*
625 * Routine: lck_mtx_lock_wait
626 *
627 * Invoked in order to wait on contention.
628 *
629 * Called with the interlock locked and
630 * returns it unlocked.
631 */
632 void
633 lck_mtx_lock_wait (
634 lck_mtx_t *lck,
635 thread_t holder)
636 {
637 thread_t self = current_thread();
638 lck_mtx_t *mutex;
639 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
640 __kdebug_only uintptr_t trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
641 integer_t priority;
642 spl_t s = splsched();
643 #if CONFIG_DTRACE
644 uint64_t sleep_start = 0;
645
646 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
647 sleep_start = mach_absolute_time();
648 }
649 #endif
650
651 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
652 mutex = lck;
653 else
654 mutex = &lck->lck_mtx_ptr->lck_mtx;
655
656 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);
657
658 priority = self->sched_pri;
659 if (priority < self->base_pri)
660 priority = self->base_pri;
661 if (priority < BASEPRI_DEFAULT)
662 priority = BASEPRI_DEFAULT;
663
664 /* Do not promote past promotion ceiling */
665 priority = MIN(priority, MAXPRI_PROMOTE);
666
667 thread_lock(holder);
668 if (mutex->lck_mtx_pri == 0)
669 holder->promotions++;
670 holder->sched_flags |= TH_SFLAG_PROMOTED;
671 if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
672 KERNEL_DEBUG_CONSTANT(
673 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
674 holder->sched_pri, priority, trace_holder, trace_lck, 0);
675 set_sched_pri(holder, priority);
676 }
677 thread_unlock(holder);
678 splx(s);
679
680 if (mutex->lck_mtx_pri < priority)
681 mutex->lck_mtx_pri = priority;
682 if (self->pending_promoter[self->pending_promoter_index] == NULL) {
683 self->pending_promoter[self->pending_promoter_index] = mutex;
684 mutex->lck_mtx_waiters++;
685 }
686 else
687 if (self->pending_promoter[self->pending_promoter_index] != mutex) {
688 self->pending_promoter[++self->pending_promoter_index] = mutex;
689 mutex->lck_mtx_waiters++;
690 }
691
692 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
693 lck_mtx_ilk_unlock(mutex);
694
695 thread_block(THREAD_CONTINUE_NULL);
696
697 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
698 #if CONFIG_DTRACE
699 /*
700 * Record the Dtrace lockstat probe for blocking, block time
701 * measured from when we were entered.
702 */
703 if (sleep_start) {
704 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
705 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
706 mach_absolute_time() - sleep_start);
707 } else {
708 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
709 mach_absolute_time() - sleep_start);
710 }
711 }
712 #endif
713 }
714
715 /*
716 * Routine: lck_mtx_lock_acquire
717 *
718 * Invoked on acquiring the mutex when there is
719 * contention.
720 *
721 * Returns the current number of waiters.
722 *
723 * Called with the interlock locked.
724 */
725 int
726 lck_mtx_lock_acquire(
727 lck_mtx_t *lck)
728 {
729 thread_t thread = current_thread();
730 lck_mtx_t *mutex;
731 integer_t priority;
732 spl_t s;
733 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
734
735 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
736 mutex = lck;
737 else
738 mutex = &lck->lck_mtx_ptr->lck_mtx;
739
740 if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
741 thread->pending_promoter[thread->pending_promoter_index] = NULL;
742 if (thread->pending_promoter_index > 0)
743 thread->pending_promoter_index--;
744 mutex->lck_mtx_waiters--;
745 }
746
747 if (mutex->lck_mtx_waiters)
748 priority = mutex->lck_mtx_pri;
749 else {
750 mutex->lck_mtx_pri = 0;
751 priority = 0;
752 }
753
754 if (priority || thread->was_promoted_on_wakeup) {
755 s = splsched();
756 thread_lock(thread);
757
758 if (priority) {
759 thread->promotions++;
760 thread->sched_flags |= TH_SFLAG_PROMOTED;
761 if (thread->sched_pri < priority) {
762 KERNEL_DEBUG_CONSTANT(
763 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
764 thread->sched_pri, priority, 0, trace_lck, 0);
765 /* Do not promote past promotion ceiling */
766 assert(priority <= MAXPRI_PROMOTE);
767 set_sched_pri(thread, priority);
768 }
769 }
770 if (thread->was_promoted_on_wakeup) {
771 thread->was_promoted_on_wakeup = 0;
772 if (thread->promotions == 0)
773 lck_mtx_clear_promoted(thread, trace_lck);
774 }
775
776 thread_unlock(thread);
777 splx(s);
778 }
779
780 #if CONFIG_DTRACE
781 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
782 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
783 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
784 } else {
785 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
786 }
787 }
788 #endif
789 return (mutex->lck_mtx_waiters);
790 }
791
792 /*
793 * Routine: lck_mtx_unlock_wakeup
794 *
795 * Invoked on unlock when there is contention.
796 *
797 * Called with the interlock locked.
798 */
799 void
800 lck_mtx_unlock_wakeup (
801 lck_mtx_t *lck,
802 thread_t holder)
803 {
804 thread_t thread = current_thread();
805 lck_mtx_t *mutex;
806 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
807
808 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
809 mutex = lck;
810 else
811 mutex = &lck->lck_mtx_ptr->lck_mtx;
812
813 if (thread != holder)
814 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
815
816 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder), 0, 0, 0);
817
818 assert(mutex->lck_mtx_waiters > 0);
819 if (mutex->lck_mtx_waiters > 1)
820 thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
821 else
822 thread_wakeup_one(LCK_MTX_EVENT(lck));
823
824 if (thread->promotions > 0) {
825 spl_t s = splsched();
826
827 thread_lock(thread);
828 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED))
829 lck_mtx_clear_promoted(thread, trace_lck);
830 thread_unlock(thread);
831 splx(s);
832 }
833
834 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
835 }
836
837 void
838 lck_mtx_unlockspin_wakeup (
839 lck_mtx_t *lck)
840 {
841 assert(lck->lck_mtx_waiters > 0);
842 thread_wakeup_one(LCK_MTX_EVENT(lck));
843
844 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
845 #if CONFIG_DTRACE
846 /*
847 * When there are waiters, we skip the hot-patch spot in the
848 * fastpath, so we record it here.
849 */
850 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
851 #endif
852 }
853
854
855 /*
856 * Routine: mutex_pause
857 *
858 * Called by former callers of simple_lock_pause().
859 */
860 #define MAX_COLLISION_COUNTS 32
861 #define MAX_COLLISION 8
862
863 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
864
865 uint32_t collision_backoffs[MAX_COLLISION] = {
866 10, 50, 100, 200, 400, 600, 800, 1000
867 };
868
869
870 void
871 mutex_pause(uint32_t collisions)
872 {
873 wait_result_t wait_result;
874 uint32_t back_off;
875
876 if (collisions >= MAX_COLLISION_COUNTS)
877 collisions = MAX_COLLISION_COUNTS - 1;
878 max_collision_count[collisions]++;
879
880 if (collisions >= MAX_COLLISION)
881 collisions = MAX_COLLISION - 1;
882 back_off = collision_backoffs[collisions];
883
884 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
885 assert(wait_result == THREAD_WAITING);
886
887 wait_result = thread_block(THREAD_CONTINUE_NULL);
888 assert(wait_result == THREAD_TIMED_OUT);
889 }
890
891
892 unsigned int mutex_yield_wait = 0;
893 unsigned int mutex_yield_no_wait = 0;
894
895 void
896 lck_mtx_yield(
897 lck_mtx_t *lck)
898 {
899 int waiters;
900
901 #if DEBUG
902 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
903 #endif /* DEBUG */
904
905 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
906 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
907 else
908 waiters = lck->lck_mtx_waiters;
909
910 if ( !waiters) {
911 mutex_yield_no_wait++;
912 } else {
913 mutex_yield_wait++;
914 lck_mtx_unlock(lck);
915 mutex_pause(0);
916 lck_mtx_lock(lck);
917 }
918 }
919
920
921 /*
922 * Routine: lck_rw_sleep
923 */
924 wait_result_t
925 lck_rw_sleep(
926 lck_rw_t *lck,
927 lck_sleep_action_t lck_sleep_action,
928 event_t event,
929 wait_interrupt_t interruptible)
930 {
931 wait_result_t res;
932 lck_rw_type_t lck_rw_type;
933 thread_t thread = current_thread();
934
935 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
936 panic("Invalid lock sleep action %x\n", lck_sleep_action);
937
938 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
939 /*
940 * Although we are dropping the RW lock, the intent in most cases
941 * is that this thread remains as an observer, since it may hold
942 * some secondary resource, but must yield to avoid deadlock. In
943 * this situation, make sure that the thread is boosted to the
944 * RW lock ceiling while blocked, so that it can re-acquire the
945 * RW lock at that priority.
946 */
947 thread->rwlock_count++;
948 }
949
950 res = assert_wait(event, interruptible);
951 if (res == THREAD_WAITING) {
952 lck_rw_type = lck_rw_done(lck);
953 res = thread_block(THREAD_CONTINUE_NULL);
954 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
955 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
956 lck_rw_lock(lck, lck_rw_type);
957 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
958 lck_rw_lock_exclusive(lck);
959 else
960 lck_rw_lock_shared(lck);
961 }
962 }
963 else
964 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
965 (void)lck_rw_done(lck);
966
967 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
968 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
969 /* sched_flags checked without lock, but will be rechecked while clearing */
970
971 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
972 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
973
974 lck_rw_clear_promotion(thread);
975 }
976 }
977
978 return res;
979 }
980
981
982 /*
983 * Routine: lck_rw_sleep_deadline
984 */
985 wait_result_t
986 lck_rw_sleep_deadline(
987 lck_rw_t *lck,
988 lck_sleep_action_t lck_sleep_action,
989 event_t event,
990 wait_interrupt_t interruptible,
991 uint64_t deadline)
992 {
993 wait_result_t res;
994 lck_rw_type_t lck_rw_type;
995 thread_t thread = current_thread();
996
997 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
998 panic("Invalid lock sleep action %x\n", lck_sleep_action);
999
1000 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1001 thread->rwlock_count++;
1002 }
1003
1004 res = assert_wait_deadline(event, interruptible, deadline);
1005 if (res == THREAD_WAITING) {
1006 lck_rw_type = lck_rw_done(lck);
1007 res = thread_block(THREAD_CONTINUE_NULL);
1008 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1009 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1010 lck_rw_lock(lck, lck_rw_type);
1011 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1012 lck_rw_lock_exclusive(lck);
1013 else
1014 lck_rw_lock_shared(lck);
1015 }
1016 }
1017 else
1018 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1019 (void)lck_rw_done(lck);
1020
1021 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1022 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1023 /* sched_flags checked without lock, but will be rechecked while clearing */
1024
1025 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1026 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1027
1028 lck_rw_clear_promotion(thread);
1029 }
1030 }
1031
1032 return res;
1033 }
1034
1035 /*
1036 * Reader-writer lock promotion
1037 *
1038 * We support a limited form of reader-writer
1039 * lock promotion whose effects are:
1040 *
1041 * * Qualifying threads have decay disabled
1042 * * Scheduler priority is reset to a floor of
1043 * of their statically assigned priority
1044 * or BASEPRI_BACKGROUND
1045 *
1046 * The rationale is that lck_rw_ts do not have
1047 * a single owner, so we cannot apply a directed
1048 * priority boost from all waiting threads
1049 * to all holding threads without maintaining
1050 * lists of all shared owners and all waiting
1051 * threads for every lock.
1052 *
1053 * Instead (and to preserve the uncontended fast-
1054 * path), acquiring (or attempting to acquire)
1055 * a RW lock in shared or exclusive lock increments
1056 * a per-thread counter. Only if that thread stops
1057 * making forward progress (for instance blocking
1058 * on a mutex, or being preempted) do we consult
1059 * the counter and apply the priority floor.
1060 * When the thread becomes runnable again (or in
1061 * the case of preemption it never stopped being
1062 * runnable), it has the priority boost and should
1063 * be in a good position to run on the CPU and
1064 * release all RW locks (at which point the priority
1065 * boost is cleared).
1066 *
1067 * Care must be taken to ensure that priority
1068 * boosts are not retained indefinitely, since unlike
1069 * mutex priority boosts (where the boost is tied
1070 * to the mutex lifecycle), the boost is tied
1071 * to the thread and independent of any particular
1072 * lck_rw_t. Assertions are in place on return
1073 * to userspace so that the boost is not held
1074 * indefinitely.
1075 *
1076 * The routines that increment/decrement the
1077 * per-thread counter should err on the side of
1078 * incrementing any time a preemption is possible
1079 * and the lock would be visible to the rest of the
1080 * system as held (so it should be incremented before
1081 * interlocks are dropped/preemption is enabled, or
1082 * before a CAS is executed to acquire the lock).
1083 *
1084 */
1085
1086 /*
1087 * lck_rw_clear_promotion: Undo priority promotions when the last RW
1088 * lock is released by a thread (if a promotion was active)
1089 */
1090 void lck_rw_clear_promotion(thread_t thread)
1091 {
1092 assert(thread->rwlock_count == 0);
1093
1094 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1095 spl_t s = splsched();
1096
1097 thread_lock(thread);
1098
1099 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1100 thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED;
1101
1102 if (thread->sched_flags & TH_SFLAG_PROMOTED) {
1103 /* Thread still has a mutex promotion */
1104 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1105 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1106 thread->sched_pri, DEPRESSPRI, 0, 0, 0);
1107
1108 set_sched_pri(thread, DEPRESSPRI);
1109 } else {
1110 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1111 thread->sched_pri, thread->base_pri, 0, 0, 0);
1112
1113 thread_recompute_sched_pri(thread, FALSE);
1114 }
1115 }
1116
1117 thread_unlock(thread);
1118 splx(s);
1119 }
1120
1121 kern_return_t
1122 host_lockgroup_info(
1123 host_t host,
1124 lockgroup_info_array_t *lockgroup_infop,
1125 mach_msg_type_number_t *lockgroup_infoCntp)
1126 {
1127 lockgroup_info_t *lockgroup_info_base;
1128 lockgroup_info_t *lockgroup_info;
1129 vm_offset_t lockgroup_info_addr;
1130 vm_size_t lockgroup_info_size;
1131 lck_grp_t *lck_grp;
1132 unsigned int i;
1133 vm_size_t used;
1134 vm_map_copy_t copy;
1135 kern_return_t kr;
1136
1137 if (host == HOST_NULL)
1138 return KERN_INVALID_HOST;
1139
1140 lck_mtx_lock(&lck_grp_lock);
1141
1142 lockgroup_info_size = round_page(lck_grp_cnt * sizeof *lockgroup_info);
1143 kr = kmem_alloc_pageable(ipc_kernel_map,
1144 &lockgroup_info_addr, lockgroup_info_size, VM_KERN_MEMORY_IPC);
1145 if (kr != KERN_SUCCESS) {
1146 lck_mtx_unlock(&lck_grp_lock);
1147 return(kr);
1148 }
1149
1150 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1151 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1152 lockgroup_info = lockgroup_info_base;
1153
1154 for (i = 0; i < lck_grp_cnt; i++) {
1155
1156 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1157 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1158 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1159 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1160 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1161 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1162
1163 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1164 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1165 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1166 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1167 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1168 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1169 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1170 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1171 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1172
1173 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1174 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1175 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1176 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1177 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1178 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1179 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1180 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1181 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1182
1183 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1184
1185 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1186 lockgroup_info++;
1187 }
1188
1189 *lockgroup_infoCntp = lck_grp_cnt;
1190 lck_mtx_unlock(&lck_grp_lock);
1191
1192 used = (*lockgroup_infoCntp) * sizeof *lockgroup_info;
1193
1194 if (used != lockgroup_info_size)
1195 bzero((char *) lockgroup_info, lockgroup_info_size - used);
1196
1197 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1198 (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1199 assert(kr == KERN_SUCCESS);
1200
1201 *lockgroup_infop = (lockgroup_info_t *) copy;
1202
1203 return(KERN_SUCCESS);
1204 }
1205