]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/locks.c
xnu-2782.40.9.tar.gz
[apple/xnu.git] / osfmk / kern / locks.c
CommitLineData
91447636 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
91447636
A
56#include <mach_ldebug.h>
57#include <debug.h>
58
59#include <mach/kern_return.h>
60#include <mach/mach_host_server.h>
61#include <mach_debug/lockgroup_info.h>
62
63#include <kern/locks.h>
64#include <kern/misc_protos.h>
65#include <kern/kalloc.h>
66#include <kern/thread.h>
67#include <kern/processor.h>
68#include <kern/sched_prim.h>
69#include <kern/debug.h>
70#include <string.h>
71
72
73#include <sys/kdebug.h>
74
2d21ac55
A
75#if CONFIG_DTRACE
76/*
77 * We need only enough declarations from the BSD-side to be able to
78 * test if our probe is active, and to call __dtrace_probe(). Setting
79 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
80 */
81#define NEED_DTRACE_DEFS
82#include <../bsd/sys/lockstat.h>
83#endif
84
91447636
A
85#define LCK_MTX_SLEEP_CODE 0
86#define LCK_MTX_SLEEP_DEADLINE_CODE 1
87#define LCK_MTX_LCK_WAIT_CODE 2
88#define LCK_MTX_UNLCK_WAKEUP_CODE 3
89
90
91static queue_head_t lck_grp_queue;
92static unsigned int lck_grp_cnt;
93
b0d623f7
A
94decl_lck_mtx_data(static,lck_grp_lock)
95static lck_mtx_ext_t lck_grp_lock_ext;
91447636
A
96
97lck_grp_attr_t LockDefaultGroupAttr;
b0d623f7
A
98lck_grp_t LockCompatGroup;
99lck_attr_t LockDefaultLckAttr;
91447636
A
100
101/*
102 * Routine: lck_mod_init
103 */
104
105void
106lck_mod_init(
107 void)
108{
6d2010ae
A
109 /*
110 * Obtain "lcks" options:this currently controls lock statistics
111 */
112 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
113 LcksOpts = 0;
114
91447636 115 queue_init(&lck_grp_queue);
b0d623f7
A
116
117 /*
118 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
119 * grabbing the lck_grp_lock before it is initialized.
120 */
121
122 bzero(&LockCompatGroup, sizeof(lck_grp_t));
123 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
124
125 if (LcksOpts & enaLkStat)
126 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
127 else
128 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
129
130 LockCompatGroup.lck_grp_refcnt = 1;
131
132 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
133 lck_grp_cnt = 1;
134
135 lck_grp_attr_setdefault(&LockDefaultGroupAttr);
91447636 136 lck_attr_setdefault(&LockDefaultLckAttr);
b0d623f7
A
137
138 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
139
91447636
A
140}
141
142/*
143 * Routine: lck_grp_attr_alloc_init
144 */
145
146lck_grp_attr_t *
147lck_grp_attr_alloc_init(
148 void)
149{
150 lck_grp_attr_t *attr;
151
152 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0)
153 lck_grp_attr_setdefault(attr);
154
155 return(attr);
156}
157
158
159/*
160 * Routine: lck_grp_attr_setdefault
161 */
162
163void
164lck_grp_attr_setdefault(
165 lck_grp_attr_t *attr)
166{
167 if (LcksOpts & enaLkStat)
168 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
169 else
170 attr->grp_attr_val = 0;
171}
172
173
174/*
175 * Routine: lck_grp_attr_setstat
176 */
177
178void
179lck_grp_attr_setstat(
180 lck_grp_attr_t *attr)
181{
2d21ac55 182 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
91447636
A
183}
184
185
186/*
187 * Routine: lck_grp_attr_free
188 */
189
190void
191lck_grp_attr_free(
192 lck_grp_attr_t *attr)
193{
194 kfree(attr, sizeof(lck_grp_attr_t));
195}
196
197
198/*
199 * Routine: lck_grp_alloc_init
200 */
201
202lck_grp_t *
203lck_grp_alloc_init(
204 const char* grp_name,
205 lck_grp_attr_t *attr)
206{
207 lck_grp_t *grp;
208
209 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0)
210 lck_grp_init(grp, grp_name, attr);
211
212 return(grp);
213}
214
215
216/*
217 * Routine: lck_grp_init
218 */
219
220void
221lck_grp_init(
222 lck_grp_t *grp,
223 const char* grp_name,
224 lck_grp_attr_t *attr)
225{
226 bzero((void *)grp, sizeof(lck_grp_t));
227
228 (void) strncpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
229
230 if (attr != LCK_GRP_ATTR_NULL)
231 grp->lck_grp_attr = attr->grp_attr_val;
232 else if (LcksOpts & enaLkStat)
233 grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
234 else
235 grp->lck_grp_attr = LCK_ATTR_NONE;
236
237 grp->lck_grp_refcnt = 1;
238
b0d623f7 239 lck_mtx_lock(&lck_grp_lock);
91447636
A
240 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
241 lck_grp_cnt++;
b0d623f7 242 lck_mtx_unlock(&lck_grp_lock);
91447636
A
243
244}
245
246
247/*
248 * Routine: lck_grp_free
249 */
250
251void
252lck_grp_free(
253 lck_grp_t *grp)
254{
b0d623f7 255 lck_mtx_lock(&lck_grp_lock);
91447636
A
256 lck_grp_cnt--;
257 (void)remque((queue_entry_t)grp);
b0d623f7 258 lck_mtx_unlock(&lck_grp_lock);
91447636
A
259 lck_grp_deallocate(grp);
260}
261
262
263/*
264 * Routine: lck_grp_reference
265 */
266
267void
268lck_grp_reference(
269 lck_grp_t *grp)
270{
2d21ac55 271 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1);
91447636
A
272}
273
274
275/*
276 * Routine: lck_grp_deallocate
277 */
278
279void
280lck_grp_deallocate(
281 lck_grp_t *grp)
282{
2d21ac55 283 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0)
91447636
A
284 kfree(grp, sizeof(lck_grp_t));
285}
286
287/*
288 * Routine: lck_grp_lckcnt_incr
289 */
290
291void
292lck_grp_lckcnt_incr(
293 lck_grp_t *grp,
294 lck_type_t lck_type)
295{
296 unsigned int *lckcnt;
297
298 switch (lck_type) {
299 case LCK_TYPE_SPIN:
300 lckcnt = &grp->lck_grp_spincnt;
301 break;
302 case LCK_TYPE_MTX:
303 lckcnt = &grp->lck_grp_mtxcnt;
304 break;
305 case LCK_TYPE_RW:
306 lckcnt = &grp->lck_grp_rwcnt;
307 break;
308 default:
309 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
310 }
311
2d21ac55 312 (void)hw_atomic_add(lckcnt, 1);
91447636
A
313}
314
315/*
316 * Routine: lck_grp_lckcnt_decr
317 */
318
319void
320lck_grp_lckcnt_decr(
321 lck_grp_t *grp,
322 lck_type_t lck_type)
323{
324 unsigned int *lckcnt;
325
326 switch (lck_type) {
327 case LCK_TYPE_SPIN:
328 lckcnt = &grp->lck_grp_spincnt;
329 break;
330 case LCK_TYPE_MTX:
331 lckcnt = &grp->lck_grp_mtxcnt;
332 break;
333 case LCK_TYPE_RW:
334 lckcnt = &grp->lck_grp_rwcnt;
335 break;
336 default:
337 return panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
338 }
339
2d21ac55 340 (void)hw_atomic_sub(lckcnt, 1);
91447636
A
341}
342
343/*
344 * Routine: lck_attr_alloc_init
345 */
346
347lck_attr_t *
348lck_attr_alloc_init(
349 void)
350{
351 lck_attr_t *attr;
352
353 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0)
354 lck_attr_setdefault(attr);
355
356 return(attr);
357}
358
359
360/*
361 * Routine: lck_attr_setdefault
362 */
363
364void
365lck_attr_setdefault(
366 lck_attr_t *attr)
367{
316670eb 368#if __i386__ || __x86_64__
91447636 369#if !DEBUG
593a1d5f
A
370 if (LcksOpts & enaLkDeb)
371 attr->lck_attr_val = LCK_ATTR_DEBUG;
372 else
373 attr->lck_attr_val = LCK_ATTR_NONE;
91447636 374#else
593a1d5f
A
375 attr->lck_attr_val = LCK_ATTR_DEBUG;
376#endif /* !DEBUG */
316670eb
A
377#else
378#error Unknown architecture.
379#endif /* __arm__ */
91447636
A
380}
381
382
383/*
384 * Routine: lck_attr_setdebug
385 */
386void
387lck_attr_setdebug(
388 lck_attr_t *attr)
389{
2d21ac55
A
390 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
391}
392
393/*
394 * Routine: lck_attr_setdebug
395 */
396void
397lck_attr_cleardebug(
398 lck_attr_t *attr)
399{
400 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
91447636
A
401}
402
403
0c530ab8
A
404/*
405 * Routine: lck_attr_rw_shared_priority
406 */
407void
408lck_attr_rw_shared_priority(
409 lck_attr_t *attr)
410{
2d21ac55 411 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
0c530ab8
A
412}
413
414
91447636
A
415/*
416 * Routine: lck_attr_free
417 */
418void
419lck_attr_free(
420 lck_attr_t *attr)
421{
422 kfree(attr, sizeof(lck_attr_t));
423}
424
425
426/*
427 * Routine: lck_spin_sleep
428 */
429wait_result_t
430lck_spin_sleep(
431 lck_spin_t *lck,
432 lck_sleep_action_t lck_sleep_action,
433 event_t event,
434 wait_interrupt_t interruptible)
435{
436 wait_result_t res;
437
438 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
439 panic("Invalid lock sleep action %x\n", lck_sleep_action);
440
441 res = assert_wait(event, interruptible);
442 if (res == THREAD_WAITING) {
443 lck_spin_unlock(lck);
444 res = thread_block(THREAD_CONTINUE_NULL);
445 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
446 lck_spin_lock(lck);
447 }
448 else
449 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
450 lck_spin_unlock(lck);
451
452 return res;
453}
454
455
456/*
457 * Routine: lck_spin_sleep_deadline
458 */
459wait_result_t
460lck_spin_sleep_deadline(
461 lck_spin_t *lck,
462 lck_sleep_action_t lck_sleep_action,
463 event_t event,
464 wait_interrupt_t interruptible,
465 uint64_t deadline)
466{
467 wait_result_t res;
468
469 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
470 panic("Invalid lock sleep action %x\n", lck_sleep_action);
471
472 res = assert_wait_deadline(event, interruptible, deadline);
473 if (res == THREAD_WAITING) {
474 lck_spin_unlock(lck);
475 res = thread_block(THREAD_CONTINUE_NULL);
476 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
477 lck_spin_lock(lck);
478 }
479 else
480 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
481 lck_spin_unlock(lck);
482
483 return res;
484}
485
486
487/*
488 * Routine: lck_mtx_sleep
489 */
490wait_result_t
491lck_mtx_sleep(
492 lck_mtx_t *lck,
493 lck_sleep_action_t lck_sleep_action,
494 event_t event,
495 wait_interrupt_t interruptible)
496{
497 wait_result_t res;
fe8ab488 498 thread_t thread = current_thread();
91447636
A
499
500 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
501 (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0);
502
503 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
504 panic("Invalid lock sleep action %x\n", lck_sleep_action);
505
fe8ab488
A
506 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
507 /*
508 * We overload the RW lock promotion to give us a priority ceiling
509 * during the time that this thread is asleep, so that when it
510 * is re-awakened (and not yet contending on the mutex), it is
511 * runnable at a reasonably high priority.
512 */
513 thread->rwlock_count++;
514 }
515
91447636
A
516 res = assert_wait(event, interruptible);
517 if (res == THREAD_WAITING) {
518 lck_mtx_unlock(lck);
519 res = thread_block(THREAD_CONTINUE_NULL);
b0d623f7
A
520 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
521 if ((lck_sleep_action & LCK_SLEEP_SPIN))
522 lck_mtx_lock_spin(lck);
523 else
524 lck_mtx_lock(lck);
525 }
91447636
A
526 }
527 else
528 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
529 lck_mtx_unlock(lck);
530
fe8ab488
A
531 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
532 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
533 /* sched_flags checked without lock, but will be rechecked while clearing */
534 lck_rw_clear_promotion(thread);
535 }
536 }
537
91447636
A
538 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
539
540 return res;
541}
542
543
544/*
545 * Routine: lck_mtx_sleep_deadline
546 */
547wait_result_t
548lck_mtx_sleep_deadline(
549 lck_mtx_t *lck,
550 lck_sleep_action_t lck_sleep_action,
551 event_t event,
552 wait_interrupt_t interruptible,
553 uint64_t deadline)
554{
555 wait_result_t res;
fe8ab488 556 thread_t thread = current_thread();
91447636
A
557
558 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
559 (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0);
560
561 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
562 panic("Invalid lock sleep action %x\n", lck_sleep_action);
563
fe8ab488
A
564 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
565 /*
566 * See lck_mtx_sleep().
567 */
568 thread->rwlock_count++;
569 }
570
91447636
A
571 res = assert_wait_deadline(event, interruptible, deadline);
572 if (res == THREAD_WAITING) {
573 lck_mtx_unlock(lck);
574 res = thread_block(THREAD_CONTINUE_NULL);
6d2010ae
A
575 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
576 if ((lck_sleep_action & LCK_SLEEP_SPIN))
577 lck_mtx_lock_spin(lck);
578 else
579 lck_mtx_lock(lck);
580 }
91447636
A
581 }
582 else
583 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
584 lck_mtx_unlock(lck);
585
fe8ab488
A
586 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
587 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
588 /* sched_flags checked without lock, but will be rechecked while clearing */
589 lck_rw_clear_promotion(thread);
590 }
591 }
592
91447636
A
593 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
594
595 return res;
596}
597
598/*
599 * Routine: lck_mtx_lock_wait
600 *
601 * Invoked in order to wait on contention.
602 *
603 * Called with the interlock locked and
604 * returns it unlocked.
605 */
606void
607lck_mtx_lock_wait (
608 lck_mtx_t *lck,
609 thread_t holder)
610{
611 thread_t self = current_thread();
612 lck_mtx_t *mutex;
613 integer_t priority;
614 spl_t s = splsched();
2d21ac55
A
615#if CONFIG_DTRACE
616 uint64_t sleep_start = 0;
617
618 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
619 sleep_start = mach_absolute_time();
620 }
621#endif
91447636
A
622
623 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
624 mutex = lck;
625 else
626 mutex = &lck->lck_mtx_ptr->lck_mtx;
627
628 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);
629
630 priority = self->sched_pri;
631 if (priority < self->priority)
632 priority = self->priority;
91447636
A
633 if (priority < BASEPRI_DEFAULT)
634 priority = BASEPRI_DEFAULT;
635
fe8ab488
A
636 /* Do not promote past promotion ceiling */
637 priority = MIN(priority, MAXPRI_PROMOTE);
638
91447636
A
639 thread_lock(holder);
640 if (mutex->lck_mtx_pri == 0)
641 holder->promotions++;
6d2010ae 642 holder->sched_flags |= TH_SFLAG_PROMOTED;
4a3eedf9 643 if ( mutex->lck_mtx_pri < priority &&
91447636 644 holder->sched_pri < priority ) {
4a3eedf9
A
645 KERNEL_DEBUG_CONSTANT(
646 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
b0d623f7 647 holder->sched_pri, priority, holder, lck, 0);
4a3eedf9 648 set_sched_pri(holder, priority);
91447636
A
649 }
650 thread_unlock(holder);
651 splx(s);
652
653 if (mutex->lck_mtx_pri < priority)
654 mutex->lck_mtx_pri = priority;
655 if (self->pending_promoter[self->pending_promoter_index] == NULL) {
656 self->pending_promoter[self->pending_promoter_index] = mutex;
657 mutex->lck_mtx_waiters++;
658 }
659 else
660 if (self->pending_promoter[self->pending_promoter_index] != mutex) {
661 self->pending_promoter[++self->pending_promoter_index] = mutex;
662 mutex->lck_mtx_waiters++;
663 }
664
665 assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
666 lck_mtx_ilk_unlock(mutex);
667
668 thread_block(THREAD_CONTINUE_NULL);
669
670 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
671#if CONFIG_DTRACE
672 /*
673 * Record the Dtrace lockstat probe for blocking, block time
674 * measured from when we were entered.
675 */
676 if (sleep_start) {
677 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
678 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
679 mach_absolute_time() - sleep_start);
680 } else {
681 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
682 mach_absolute_time() - sleep_start);
683 }
684 }
685#endif
91447636
A
686}
687
688/*
689 * Routine: lck_mtx_lock_acquire
690 *
691 * Invoked on acquiring the mutex when there is
692 * contention.
693 *
694 * Returns the current number of waiters.
695 *
696 * Called with the interlock locked.
697 */
698int
699lck_mtx_lock_acquire(
700 lck_mtx_t *lck)
701{
702 thread_t thread = current_thread();
703 lck_mtx_t *mutex;
704
705 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
706 mutex = lck;
707 else
708 mutex = &lck->lck_mtx_ptr->lck_mtx;
709
710 if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
711 thread->pending_promoter[thread->pending_promoter_index] = NULL;
712 if (thread->pending_promoter_index > 0)
713 thread->pending_promoter_index--;
714 mutex->lck_mtx_waiters--;
715 }
716
717 if (mutex->lck_mtx_waiters > 0) {
718 integer_t priority = mutex->lck_mtx_pri;
719 spl_t s = splsched();
720
721 thread_lock(thread);
722 thread->promotions++;
6d2010ae 723 thread->sched_flags |= TH_SFLAG_PROMOTED;
4a3eedf9
A
724 if (thread->sched_pri < priority) {
725 KERNEL_DEBUG_CONSTANT(
726 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
b0d623f7 727 thread->sched_pri, priority, 0, lck, 0);
fe8ab488
A
728 /* Do not promote past promotion ceiling */
729 assert(priority <= MAXPRI_PROMOTE);
4a3eedf9 730 set_sched_pri(thread, priority);
91447636
A
731 }
732 thread_unlock(thread);
733 splx(s);
734 }
735 else
736 mutex->lck_mtx_pri = 0;
737
39236c6e
A
738#if CONFIG_DTRACE
739 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
740 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
741 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
742 } else {
743 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
744 }
745 }
746#endif
91447636
A
747 return (mutex->lck_mtx_waiters);
748}
749
750/*
751 * Routine: lck_mtx_unlock_wakeup
752 *
753 * Invoked on unlock when there is contention.
754 *
755 * Called with the interlock locked.
756 */
757void
758lck_mtx_unlock_wakeup (
759 lck_mtx_t *lck,
760 thread_t holder)
761{
762 thread_t thread = current_thread();
763 lck_mtx_t *mutex;
764
765 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
766 mutex = lck;
767 else
768 mutex = &lck->lck_mtx_ptr->lck_mtx;
769
6d2010ae
A
770 if (thread != holder)
771 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
91447636
A
772
773 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);
774
6d2010ae
A
775 assert(mutex->lck_mtx_waiters > 0);
776 thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
91447636
A
777
778 if (thread->promotions > 0) {
779 spl_t s = splsched();
780
781 thread_lock(thread);
782 if ( --thread->promotions == 0 &&
6d2010ae
A
783 (thread->sched_flags & TH_SFLAG_PROMOTED) ) {
784 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
fe8ab488
A
785
786 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
787 /* Thread still has a RW lock promotion */
788 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
91447636
A
789 KERNEL_DEBUG_CONSTANT(
790 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
b0d623f7 791 thread->sched_pri, DEPRESSPRI, 0, lck, 0);
91447636
A
792
793 set_sched_pri(thread, DEPRESSPRI);
794 }
795 else {
796 if (thread->priority < thread->sched_pri) {
797 KERNEL_DEBUG_CONSTANT(
798 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) |
799 DBG_FUNC_NONE,
800 thread->sched_pri, thread->priority,
b0d623f7 801 0, lck, 0);
91447636
A
802 }
803
6d2010ae 804 SCHED(compute_priority)(thread, FALSE);
91447636
A
805 }
806 }
807 thread_unlock(thread);
808 splx(s);
809 }
91447636
A
810
811 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
812}
813
2d21ac55
A
814void
815lck_mtx_unlockspin_wakeup (
816 lck_mtx_t *lck)
817{
818 assert(lck->lck_mtx_waiters > 0);
819 thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
820
821 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, (int)lck, 0, 0, 1, 0);
822#if CONFIG_DTRACE
823 /*
824 * When there are waiters, we skip the hot-patch spot in the
825 * fastpath, so we record it here.
826 */
827 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
828#endif
829}
830
831
91447636
A
832/*
833 * Routine: mutex_pause
834 *
835 * Called by former callers of simple_lock_pause().
836 */
2d21ac55
A
837#define MAX_COLLISION_COUNTS 32
838#define MAX_COLLISION 8
839
840unsigned int max_collision_count[MAX_COLLISION_COUNTS];
841
842uint32_t collision_backoffs[MAX_COLLISION] = {
843 10, 50, 100, 200, 400, 600, 800, 1000
844};
845
91447636
A
846
847void
2d21ac55 848mutex_pause(uint32_t collisions)
91447636
A
849{
850 wait_result_t wait_result;
2d21ac55 851 uint32_t back_off;
91447636 852
2d21ac55
A
853 if (collisions >= MAX_COLLISION_COUNTS)
854 collisions = MAX_COLLISION_COUNTS - 1;
855 max_collision_count[collisions]++;
856
857 if (collisions >= MAX_COLLISION)
858 collisions = MAX_COLLISION - 1;
859 back_off = collision_backoffs[collisions];
860
861 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
91447636
A
862 assert(wait_result == THREAD_WAITING);
863
864 wait_result = thread_block(THREAD_CONTINUE_NULL);
865 assert(wait_result == THREAD_TIMED_OUT);
866}
867
2d21ac55
A
868
869unsigned int mutex_yield_wait = 0;
870unsigned int mutex_yield_no_wait = 0;
871
872void
b0d623f7
A
873lck_mtx_yield(
874 lck_mtx_t *lck)
2d21ac55 875{
b0d623f7
A
876 int waiters;
877
2d21ac55 878#if DEBUG
b0d623f7 879 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
2d21ac55 880#endif /* DEBUG */
b0d623f7 881
2d21ac55 882 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
b0d623f7
A
883 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
884 else
885 waiters = lck->lck_mtx_waiters;
2d21ac55 886
b0d623f7 887 if ( !waiters) {
2d21ac55
A
888 mutex_yield_no_wait++;
889 } else {
890 mutex_yield_wait++;
b0d623f7 891 lck_mtx_unlock(lck);
2d21ac55 892 mutex_pause(0);
b0d623f7 893 lck_mtx_lock(lck);
2d21ac55
A
894 }
895}
896
897
91447636
A
898/*
899 * Routine: lck_rw_sleep
900 */
901wait_result_t
902lck_rw_sleep(
903 lck_rw_t *lck,
904 lck_sleep_action_t lck_sleep_action,
905 event_t event,
906 wait_interrupt_t interruptible)
907{
908 wait_result_t res;
909 lck_rw_type_t lck_rw_type;
fe8ab488
A
910 thread_t thread = current_thread();
911
91447636
A
912 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
913 panic("Invalid lock sleep action %x\n", lck_sleep_action);
914
fe8ab488
A
915 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
916 /*
917 * Although we are dropping the RW lock, the intent in most cases
918 * is that this thread remains as an observer, since it may hold
919 * some secondary resource, but must yield to avoid deadlock. In
920 * this situation, make sure that the thread is boosted to the
921 * RW lock ceiling while blocked, so that it can re-acquire the
922 * RW lock at that priority.
923 */
924 thread->rwlock_count++;
925 }
926
91447636
A
927 res = assert_wait(event, interruptible);
928 if (res == THREAD_WAITING) {
929 lck_rw_type = lck_rw_done(lck);
930 res = thread_block(THREAD_CONTINUE_NULL);
931 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
932 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
933 lck_rw_lock(lck, lck_rw_type);
934 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
935 lck_rw_lock_exclusive(lck);
936 else
937 lck_rw_lock_shared(lck);
938 }
939 }
940 else
941 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
942 (void)lck_rw_done(lck);
943
fe8ab488
A
944 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
945 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
946 /* sched_flags checked without lock, but will be rechecked while clearing */
947
948 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
949 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
950
951 lck_rw_clear_promotion(thread);
952 }
953 }
954
91447636
A
955 return res;
956}
957
958
959/*
960 * Routine: lck_rw_sleep_deadline
961 */
962wait_result_t
963lck_rw_sleep_deadline(
964 lck_rw_t *lck,
965 lck_sleep_action_t lck_sleep_action,
966 event_t event,
967 wait_interrupt_t interruptible,
968 uint64_t deadline)
969{
970 wait_result_t res;
971 lck_rw_type_t lck_rw_type;
fe8ab488 972 thread_t thread = current_thread();
91447636
A
973
974 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
975 panic("Invalid lock sleep action %x\n", lck_sleep_action);
976
fe8ab488
A
977 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
978 thread->rwlock_count++;
979 }
980
91447636
A
981 res = assert_wait_deadline(event, interruptible, deadline);
982 if (res == THREAD_WAITING) {
983 lck_rw_type = lck_rw_done(lck);
984 res = thread_block(THREAD_CONTINUE_NULL);
985 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
986 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
987 lck_rw_lock(lck, lck_rw_type);
988 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
989 lck_rw_lock_exclusive(lck);
990 else
991 lck_rw_lock_shared(lck);
992 }
993 }
994 else
995 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
996 (void)lck_rw_done(lck);
997
fe8ab488
A
998 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
999 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1000 /* sched_flags checked without lock, but will be rechecked while clearing */
1001
1002 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1003 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1004
1005 lck_rw_clear_promotion(thread);
1006 }
1007 }
1008
91447636
A
1009 return res;
1010}
1011
39236c6e
A
1012/*
1013 * Reader-writer lock promotion
1014 *
1015 * We support a limited form of reader-writer
1016 * lock promotion whose effects are:
1017 *
1018 * * Qualifying threads have decay disabled
1019 * * Scheduler priority is reset to a floor of
1020 * of their statically assigned priority
1021 * or BASEPRI_BACKGROUND
1022 *
1023 * The rationale is that lck_rw_ts do not have
1024 * a single owner, so we cannot apply a directed
1025 * priority boost from all waiting threads
1026 * to all holding threads without maintaining
1027 * lists of all shared owners and all waiting
1028 * threads for every lock.
1029 *
1030 * Instead (and to preserve the uncontended fast-
1031 * path), acquiring (or attempting to acquire)
1032 * a RW lock in shared or exclusive lock increments
1033 * a per-thread counter. Only if that thread stops
1034 * making forward progress (for instance blocking
1035 * on a mutex, or being preempted) do we consult
1036 * the counter and apply the priority floor.
1037 * When the thread becomes runnable again (or in
1038 * the case of preemption it never stopped being
1039 * runnable), it has the priority boost and should
1040 * be in a good position to run on the CPU and
1041 * release all RW locks (at which point the priority
1042 * boost is cleared).
1043 *
1044 * Care must be taken to ensure that priority
1045 * boosts are not retained indefinitely, since unlike
1046 * mutex priority boosts (where the boost is tied
1047 * to the mutex lifecycle), the boost is tied
1048 * to the thread and independent of any particular
1049 * lck_rw_t. Assertions are in place on return
1050 * to userspace so that the boost is not held
1051 * indefinitely.
1052 *
1053 * The routines that increment/decrement the
1054 * per-thread counter should err on the side of
1055 * incrementing any time a preemption is possible
1056 * and the lock would be visible to the rest of the
1057 * system as held (so it should be incremented before
1058 * interlocks are dropped/preemption is enabled, or
1059 * before a CAS is executed to acquire the lock).
1060 *
1061 */
1062
1063/*
1064 * lck_rw_clear_promotion: Undo priority promotions when the last RW
1065 * lock is released by a thread (if a promotion was active)
1066 */
1067void lck_rw_clear_promotion(thread_t thread)
1068{
1069 assert(thread->rwlock_count == 0);
1070
1071 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1072 spl_t s = splsched();
1073
1074 thread_lock(thread);
1075
1076 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1077 thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED;
1078
1079 if (thread->sched_flags & TH_SFLAG_PROMOTED) {
1080 /* Thread still has a mutex promotion */
1081 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1082 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1083 thread->sched_pri, DEPRESSPRI, 0, 0, 0);
1084
1085 set_sched_pri(thread, DEPRESSPRI);
1086 } else {
1087 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1088 thread->sched_pri, thread->priority, 0, 0, 0);
1089
1090 SCHED(compute_priority)(thread, FALSE);
1091 }
1092 }
1093
1094 thread_unlock(thread);
1095 splx(s);
1096}
1097
91447636
A
1098kern_return_t
1099host_lockgroup_info(
1100 host_t host,
1101 lockgroup_info_array_t *lockgroup_infop,
1102 mach_msg_type_number_t *lockgroup_infoCntp)
1103{
1104 lockgroup_info_t *lockgroup_info_base;
1105 lockgroup_info_t *lockgroup_info;
1106 vm_offset_t lockgroup_info_addr;
1107 vm_size_t lockgroup_info_size;
1108 lck_grp_t *lck_grp;
1109 unsigned int i;
1110 vm_size_t used;
1111 vm_map_copy_t copy;
1112 kern_return_t kr;
1113
1114 if (host == HOST_NULL)
1115 return KERN_INVALID_HOST;
1116
b0d623f7 1117 lck_mtx_lock(&lck_grp_lock);
91447636
A
1118
1119 lockgroup_info_size = round_page(lck_grp_cnt * sizeof *lockgroup_info);
1120 kr = kmem_alloc_pageable(ipc_kernel_map,
1121 &lockgroup_info_addr, lockgroup_info_size);
1122 if (kr != KERN_SUCCESS) {
b0d623f7 1123 lck_mtx_unlock(&lck_grp_lock);
91447636
A
1124 return(kr);
1125 }
1126
1127 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1128 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1129 lockgroup_info = lockgroup_info_base;
1130
1131 for (i = 0; i < lck_grp_cnt; i++) {
1132
1133 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1134 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1135 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1136 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1137 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1138 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1139
1140 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1141 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1142 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1143 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1144 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1145 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1146 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1147 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1148 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1149
1150 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1151 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1152 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1153 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1154 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1155 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1156 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1157 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1158 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1159
1160 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1161
1162 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1163 lockgroup_info++;
1164 }
1165
1166 *lockgroup_infoCntp = lck_grp_cnt;
b0d623f7 1167 lck_mtx_unlock(&lck_grp_lock);
91447636
A
1168
1169 used = (*lockgroup_infoCntp) * sizeof *lockgroup_info;
1170
1171 if (used != lockgroup_info_size)
1172 bzero((char *) lockgroup_info, lockgroup_info_size - used);
1173
1174 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1175 (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1176 assert(kr == KERN_SUCCESS);
1177
1178 *lockgroup_infop = (lockgroup_info_t *) copy;
1179
1180 return(KERN_SUCCESS);
1181}
1182