]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/locks_i386.c
xnu-2050.22.13.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
CommitLineData
91447636 1/*
b0d623f7 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
91447636
A
64#include <mach_ldebug.h>
65
66#include <kern/lock.h>
67#include <kern/locks.h>
68#include <kern/kalloc.h>
69#include <kern/misc_protos.h>
70#include <kern/thread.h>
71#include <kern/processor.h>
72#include <kern/cpu_data.h>
73#include <kern/cpu_number.h>
74#include <kern/sched_prim.h>
75#include <kern/xpr.h>
76#include <kern/debug.h>
77#include <string.h>
78
060df5ea 79#include <i386/machine_routines.h> /* machine_timeout_suspended() */
b0d623f7 80#include <machine/machine_cpu.h>
060df5ea 81#include <i386/mp.h>
91447636
A
82
83#include <sys/kdebug.h>
6d2010ae 84#include <mach/branch_predicates.h>
91447636 85
2d21ac55
A
86/*
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
90 */
91#if CONFIG_DTRACE
92#define NEED_DTRACE_DEFS
93#include <../bsd/sys/lockstat.h>
94#endif
95
91447636
A
96#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
97#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
98#define LCK_RW_LCK_SHARED_CODE 0x102
99#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
100#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
101#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
102
b0d623f7
A
103#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
104#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
105#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
106#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
107#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
108#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
109#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
110#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
111
91447636
A
112
113#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
114
115unsigned int LcksOpts=0;
91447636
A
116
117/* Forwards */
118
91447636
A
119#if USLOCK_DEBUG
120/*
121 * Perform simple lock checks.
122 */
123int uslock_check = 1;
124int max_lock_loops = 100000000;
125decl_simple_lock_data(extern , printf_lock)
126decl_simple_lock_data(extern , panic_lock)
91447636
A
127#endif /* USLOCK_DEBUG */
128
129
130/*
131 * We often want to know the addresses of the callers
132 * of the various lock routines. However, this information
133 * is only used for debugging and statistics.
134 */
135typedef void *pc_t;
136#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
137#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
138#if ANY_LOCK_DEBUG
b0d623f7 139#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
91447636
A
140#define DECL_PC(pc) pc_t pc;
141#else /* ANY_LOCK_DEBUG */
142#define DECL_PC(pc)
143#ifdef lint
144/*
145 * Eliminate lint complaints about unused local pc variables.
146 */
b0d623f7 147#define OBTAIN_PC(pc) ++pc
91447636 148#else /* lint */
b0d623f7 149#define OBTAIN_PC(pc)
91447636
A
150#endif /* lint */
151#endif /* USLOCK_DEBUG */
152
153
154/*
155 * Portable lock package implementation of usimple_locks.
156 */
157
158#if USLOCK_DEBUG
159#define USLDBG(stmt) stmt
160void usld_lock_init(usimple_lock_t, unsigned short);
161void usld_lock_pre(usimple_lock_t, pc_t);
162void usld_lock_post(usimple_lock_t, pc_t);
163void usld_unlock(usimple_lock_t, pc_t);
164void usld_lock_try_pre(usimple_lock_t, pc_t);
165void usld_lock_try_post(usimple_lock_t, pc_t);
166int usld_lock_common_checks(usimple_lock_t, char *);
167#else /* USLOCK_DEBUG */
168#define USLDBG(stmt)
169#endif /* USLOCK_DEBUG */
170
b0d623f7
A
171
172extern int lck_rw_grab_want(lck_rw_t *lck);
173extern int lck_rw_grab_shared(lck_rw_t *lck);
174extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
175
176
2d21ac55
A
177/*
178 * Forward definitions
179 */
180
181void lck_rw_lock_shared_gen(
182 lck_rw_t *lck);
183
b0d623f7
A
184void lck_rw_lock_exclusive_gen(
185 lck_rw_t *lck);
186
187boolean_t lck_rw_lock_shared_to_exclusive_success(
2d21ac55
A
188 lck_rw_t *lck);
189
b0d623f7
A
190boolean_t lck_rw_lock_shared_to_exclusive_failure(
191 lck_rw_t *lck,
192 int prior_lock_state);
193
194void lck_rw_lock_exclusive_to_shared_gen(
195 lck_rw_t *lck,
196 int prior_lock_state);
197
198lck_rw_type_t lck_rw_done_gen(
199 lck_rw_t *lck,
200 int prior_lock_state);
201
91447636
A
202/*
203 * Routine: lck_spin_alloc_init
204 */
205lck_spin_t *
206lck_spin_alloc_init(
207 lck_grp_t *grp,
208 lck_attr_t *attr)
209{
210 lck_spin_t *lck;
211
212 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
213 lck_spin_init(lck, grp, attr);
214
215 return(lck);
216}
217
218/*
219 * Routine: lck_spin_free
220 */
221void
222lck_spin_free(
223 lck_spin_t *lck,
224 lck_grp_t *grp)
225{
226 lck_spin_destroy(lck, grp);
227 kfree(lck, sizeof(lck_spin_t));
228}
229
230/*
231 * Routine: lck_spin_init
232 */
233void
234lck_spin_init(
235 lck_spin_t *lck,
236 lck_grp_t *grp,
237 __unused lck_attr_t *attr)
238{
239 usimple_lock_init((usimple_lock_t) lck, 0);
240 lck_grp_reference(grp);
241 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
242}
243
244/*
245 * Routine: lck_spin_destroy
246 */
247void
248lck_spin_destroy(
249 lck_spin_t *lck,
250 lck_grp_t *grp)
251{
b0d623f7 252 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
91447636 253 return;
b0d623f7 254 lck->interlock = LCK_SPIN_TAG_DESTROYED;
91447636
A
255 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
256 lck_grp_deallocate(grp);
257 return;
258}
259
260/*
261 * Routine: lck_spin_lock
262 */
263void
264lck_spin_lock(
265 lck_spin_t *lck)
266{
267 usimple_lock((usimple_lock_t) lck);
268}
269
270/*
271 * Routine: lck_spin_unlock
272 */
273void
274lck_spin_unlock(
275 lck_spin_t *lck)
276{
277 usimple_unlock((usimple_lock_t) lck);
278}
279
280
281/*
282 * Routine: lck_spin_try_lock
283 */
284boolean_t
285lck_spin_try_lock(
286 lck_spin_t *lck)
287{
2d21ac55 288 return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
91447636
A
289}
290
291/*
292 * Initialize a usimple_lock.
293 *
294 * No change in preemption state.
295 */
296void
297usimple_lock_init(
298 usimple_lock_t l,
299 __unused unsigned short tag)
300{
301#ifndef MACHINE_SIMPLE_LOCK
302 USLDBG(usld_lock_init(l, tag));
303 hw_lock_init(&l->interlock);
304#else
305 simple_lock_init((simple_lock_t)l,tag);
306#endif
307}
308
060df5ea
A
309volatile uint32_t spinlock_owner_cpu = ~0;
310volatile usimple_lock_t spinlock_timed_out;
311
312static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
313 uint64_t deadline;
314 uint32_t i;
315
316 for (i = 0; i < real_ncpus; i++) {
317 if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
318 spinlock_owner_cpu = i;
6d2010ae 319 if ((uint32_t) cpu_number() == i)
060df5ea
A
320 break;
321 cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
322 cpu_NMI_interrupt(i);
323 deadline = mach_absolute_time() + (LockTimeOut * 2);
324 while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
325 cpu_pause();
326 break;
327 }
328 }
329
330 return spinlock_owner_cpu;
331}
91447636
A
332
333/*
334 * Acquire a usimple_lock.
335 *
336 * Returns with preemption disabled. Note
337 * that the hw_lock routines are responsible for
338 * maintaining preemption state.
339 */
340void
341usimple_lock(
342 usimple_lock_t l)
343{
344#ifndef MACHINE_SIMPLE_LOCK
2d21ac55 345 DECL_PC(pc);
91447636 346
b0d623f7 347 OBTAIN_PC(pc);
91447636 348 USLDBG(usld_lock_pre(l, pc));
6d2010ae
A
349
350 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) {
b0d623f7 351 boolean_t uslock_acquired = FALSE;
060df5ea
A
352 while (machine_timeout_suspended()) {
353 enable_preemption();
354 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
355 break;
6d2010ae
A
356 }
357
060df5ea
A
358 if (uslock_acquired == FALSE) {
359 uint32_t lock_cpu;
7ddcb079 360 uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
060df5ea 361 spinlock_timed_out = l;
7ddcb079
A
362 lock_cpu = spinlock_timeout_NMI(lowner);
363 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
060df5ea 364 }
b0d623f7 365 }
91447636
A
366 USLDBG(usld_lock_post(l, pc));
367#else
368 simple_lock((simple_lock_t)l);
369#endif
370}
371
372
373/*
374 * Release a usimple_lock.
375 *
376 * Returns with preemption enabled. Note
377 * that the hw_lock routines are responsible for
378 * maintaining preemption state.
379 */
380void
381usimple_unlock(
382 usimple_lock_t l)
383{
384#ifndef MACHINE_SIMPLE_LOCK
385 DECL_PC(pc);
386
b0d623f7 387 OBTAIN_PC(pc);
91447636
A
388 USLDBG(usld_unlock(l, pc));
389 hw_lock_unlock(&l->interlock);
390#else
391 simple_unlock_rwmb((simple_lock_t)l);
392#endif
393}
394
395
396/*
397 * Conditionally acquire a usimple_lock.
398 *
399 * On success, returns with preemption disabled.
400 * On failure, returns with preemption in the same state
401 * as when first invoked. Note that the hw_lock routines
402 * are responsible for maintaining preemption state.
403 *
404 * XXX No stats are gathered on a miss; I preserved this
405 * behavior from the original assembly-language code, but
406 * doesn't it make sense to log misses? XXX
407 */
408unsigned int
409usimple_lock_try(
410 usimple_lock_t l)
411{
412#ifndef MACHINE_SIMPLE_LOCK
91447636 413 unsigned int success;
2d21ac55 414 DECL_PC(pc);
91447636 415
b0d623f7 416 OBTAIN_PC(pc);
91447636
A
417 USLDBG(usld_lock_try_pre(l, pc));
418 if ((success = hw_lock_try(&l->interlock))) {
419 USLDBG(usld_lock_try_post(l, pc));
420 }
421 return success;
422#else
423 return(simple_lock_try((simple_lock_t)l));
424#endif
425}
426
427#if USLOCK_DEBUG
428/*
429 * States of a usimple_lock. The default when initializing
430 * a usimple_lock is setting it up for debug checking.
431 */
432#define USLOCK_CHECKED 0x0001 /* lock is being checked */
433#define USLOCK_TAKEN 0x0002 /* lock has been taken */
434#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
435#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
436#define USLOCK_CHECKING(l) (uslock_check && \
437 ((l)->debug.state & USLOCK_CHECKED))
438
439/*
440 * Trace activities of a particularly interesting lock.
441 */
442void usl_trace(usimple_lock_t, int, pc_t, const char *);
443
444
445/*
446 * Initialize the debugging information contained
447 * in a usimple_lock.
448 */
449void
450usld_lock_init(
451 usimple_lock_t l,
452 __unused unsigned short tag)
453{
454 if (l == USIMPLE_LOCK_NULL)
455 panic("lock initialization: null lock pointer");
456 l->lock_type = USLOCK_TAG;
457 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
458 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
459 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
460 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
461 l->debug.duration[0] = l->debug.duration[1] = 0;
462 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
463 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
464 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
465}
466
467
468/*
469 * These checks apply to all usimple_locks, not just
470 * those with USLOCK_CHECKED turned on.
471 */
472int
473usld_lock_common_checks(
474 usimple_lock_t l,
475 char *caller)
476{
477 if (l == USIMPLE_LOCK_NULL)
478 panic("%s: null lock pointer", caller);
479 if (l->lock_type != USLOCK_TAG)
ebb1b9f4 480 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
91447636 481 if (!(l->debug.state & USLOCK_INIT))
ebb1b9f4 482 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
91447636
A
483 return USLOCK_CHECKING(l);
484}
485
486
487/*
488 * Debug checks on a usimple_lock just before attempting
489 * to acquire it.
490 */
491/* ARGSUSED */
492void
493usld_lock_pre(
494 usimple_lock_t l,
495 pc_t pc)
496{
497 char caller[] = "usimple_lock";
498
499
500 if (!usld_lock_common_checks(l, caller))
501 return;
502
503/*
504 * Note that we have a weird case where we are getting a lock when we are]
505 * in the process of putting the system to sleep. We are running with no
506 * current threads, therefore we can't tell if we are trying to retake a lock
507 * we have or someone on the other processor has it. Therefore we just
508 * ignore this test if the locking thread is 0.
509 */
510
511 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
512 l->debug.lock_thread == (void *) current_thread()) {
2d21ac55
A
513 printf("%s: lock %p already locked (at %p) by",
514 caller, l, l->debug.lock_pc);
515 printf(" current thread %p (new attempt at pc %p)\n",
91447636 516 l->debug.lock_thread, pc);
2d21ac55 517 panic("%s", caller);
91447636
A
518 }
519 mp_disable_preemption();
520 usl_trace(l, cpu_number(), pc, caller);
521 mp_enable_preemption();
522}
523
524
525/*
526 * Debug checks on a usimple_lock just after acquiring it.
527 *
528 * Pre-emption has been disabled at this point,
529 * so we are safe in using cpu_number.
530 */
531void
532usld_lock_post(
533 usimple_lock_t l,
534 pc_t pc)
535{
536 register int mycpu;
537 char caller[] = "successful usimple_lock";
538
539
540 if (!usld_lock_common_checks(l, caller))
541 return;
542
543 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
544 panic("%s: lock %p became uninitialized",
545 caller, l);
91447636 546 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
547 panic("%s: lock 0x%p became TAKEN by someone else",
548 caller, l);
91447636
A
549
550 mycpu = cpu_number();
551 l->debug.lock_thread = (void *)current_thread();
552 l->debug.state |= USLOCK_TAKEN;
553 l->debug.lock_pc = pc;
554 l->debug.lock_cpu = mycpu;
555
556 usl_trace(l, mycpu, pc, caller);
557}
558
559
560/*
561 * Debug checks on a usimple_lock just before
562 * releasing it. Note that the caller has not
563 * yet released the hardware lock.
564 *
565 * Preemption is still disabled, so there's
566 * no problem using cpu_number.
567 */
568void
569usld_unlock(
570 usimple_lock_t l,
571 pc_t pc)
572{
573 register int mycpu;
574 char caller[] = "usimple_unlock";
575
576
577 if (!usld_lock_common_checks(l, caller))
578 return;
579
580 mycpu = cpu_number();
581
582 if (!(l->debug.state & USLOCK_TAKEN))
b0d623f7
A
583 panic("%s: lock 0x%p hasn't been taken",
584 caller, l);
91447636 585 if (l->debug.lock_thread != (void *) current_thread())
b0d623f7
A
586 panic("%s: unlocking lock 0x%p, owned by thread %p",
587 caller, l, l->debug.lock_thread);
91447636 588 if (l->debug.lock_cpu != mycpu) {
b0d623f7
A
589 printf("%s: unlocking lock 0x%p on cpu 0x%x",
590 caller, l, mycpu);
91447636 591 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
2d21ac55 592 panic("%s", caller);
91447636
A
593 }
594 usl_trace(l, mycpu, pc, caller);
595
596 l->debug.unlock_thread = l->debug.lock_thread;
597 l->debug.lock_thread = INVALID_PC;
598 l->debug.state &= ~USLOCK_TAKEN;
599 l->debug.unlock_pc = pc;
600 l->debug.unlock_cpu = mycpu;
601}
602
603
604/*
605 * Debug checks on a usimple_lock just before
606 * attempting to acquire it.
607 *
608 * Preemption isn't guaranteed to be disabled.
609 */
610void
611usld_lock_try_pre(
612 usimple_lock_t l,
613 pc_t pc)
614{
615 char caller[] = "usimple_lock_try";
616
617 if (!usld_lock_common_checks(l, caller))
618 return;
619 mp_disable_preemption();
620 usl_trace(l, cpu_number(), pc, caller);
621 mp_enable_preemption();
622}
623
624
625/*
626 * Debug checks on a usimple_lock just after
627 * successfully attempting to acquire it.
628 *
629 * Preemption has been disabled by the
630 * lock acquisition attempt, so it's safe
631 * to use cpu_number.
632 */
633void
634usld_lock_try_post(
635 usimple_lock_t l,
636 pc_t pc)
637{
638 register int mycpu;
639 char caller[] = "successful usimple_lock_try";
640
641 if (!usld_lock_common_checks(l, caller))
642 return;
643
644 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7
A
645 panic("%s: lock 0x%p became uninitialized",
646 caller, l);
91447636 647 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7
A
648 panic("%s: lock 0x%p became TAKEN by someone else",
649 caller, l);
91447636
A
650
651 mycpu = cpu_number();
652 l->debug.lock_thread = (void *) current_thread();
653 l->debug.state |= USLOCK_TAKEN;
654 l->debug.lock_pc = pc;
655 l->debug.lock_cpu = mycpu;
656
657 usl_trace(l, mycpu, pc, caller);
658}
659
660
661/*
662 * For very special cases, set traced_lock to point to a
663 * specific lock of interest. The result is a series of
664 * XPRs showing lock operations on that lock. The lock_seq
665 * value is used to show the order of those operations.
666 */
667usimple_lock_t traced_lock;
668unsigned int lock_seq;
669
670void
671usl_trace(
672 usimple_lock_t l,
673 int mycpu,
674 pc_t pc,
675 const char * op_name)
676{
677 if (traced_lock == l) {
678 XPR(XPR_SLOCK,
679 "seq %d, cpu %d, %s @ %x\n",
b0d623f7
A
680 (uintptr_t) lock_seq, (uintptr_t) mycpu,
681 (uintptr_t) op_name, (uintptr_t) pc, 0);
91447636
A
682 lock_seq++;
683 }
684}
685
686
687#endif /* USLOCK_DEBUG */
688
689/*
690 * Routine: lock_alloc
691 * Function:
692 * Allocate a lock for external users who cannot
693 * hard-code the structure definition into their
694 * objects.
695 * For now just use kalloc, but a zone is probably
696 * warranted.
697 */
698lock_t *
699lock_alloc(
700 boolean_t can_sleep,
701 unsigned short tag,
702 unsigned short tag1)
703{
704 lock_t *l;
705
706 if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
707 lock_init(l, can_sleep, tag, tag1);
708 return(l);
709}
710
711/*
712 * Routine: lock_free
713 * Function:
714 * Free a lock allocated for external users.
715 * For now just use kfree, but a zone is probably
716 * warranted.
717 */
718void
719lock_free(
720 lock_t *l)
721{
722 kfree(l, sizeof(lock_t));
723}
724
725
726/*
727 * Routine: lock_init
728 * Function:
729 * Initialize a lock; required before use.
730 * Note that clients declare the "struct lock"
731 * variables and then initialize them, rather
732 * than getting a new one from this module.
733 */
734void
735lock_init(
736 lock_t *l,
737 boolean_t can_sleep,
738 __unused unsigned short tag,
0c530ab8 739 __unused unsigned short tag1)
91447636 740{
2d21ac55
A
741 hw_lock_byte_init(&l->lck_rw_interlock);
742 l->lck_rw_want_write = FALSE;
743 l->lck_rw_want_upgrade = FALSE;
744 l->lck_rw_shared_count = 0;
745 l->lck_rw_can_sleep = can_sleep;
0c530ab8 746 l->lck_rw_tag = tag;
2d21ac55 747 l->lck_rw_priv_excl = 1;
b0d623f7 748 l->lck_r_waiting = l->lck_w_waiting = 0;
91447636
A
749}
750
751
752/*
753 * Sleep locks. These use the same data structure and algorithm
754 * as the spin locks, but the process sleeps while it is waiting
755 * for the lock. These work on uniprocessor systems.
756 */
757
758#define DECREMENTER_TIMEOUT 1000000
759
760void
761lock_write(
762 register lock_t * l)
763{
0c530ab8 764 lck_rw_lock_exclusive(l);
91447636
A
765}
766
767void
768lock_done(
769 register lock_t * l)
770{
0c530ab8 771 (void) lck_rw_done(l);
91447636
A
772}
773
774void
775lock_read(
776 register lock_t * l)
777{
0c530ab8 778 lck_rw_lock_shared(l);
91447636
A
779}
780
781
782/*
783 * Routine: lock_read_to_write
784 * Function:
785 * Improves a read-only lock to one with
786 * write permission. If another reader has
787 * already requested an upgrade to a write lock,
788 * no lock is held upon return.
789 *
2d21ac55 790 * Returns FALSE if the upgrade *failed*.
91447636
A
791 */
792
793boolean_t
794lock_read_to_write(
795 register lock_t * l)
796{
0c530ab8 797 return lck_rw_lock_shared_to_exclusive(l);
91447636
A
798}
799
800void
801lock_write_to_read(
802 register lock_t * l)
803{
0c530ab8 804 lck_rw_lock_exclusive_to_shared(l);
8f6c56a5
A
805}
806
8f6c56a5 807
91447636
A
808
809/*
810 * Routine: lck_rw_alloc_init
811 */
812lck_rw_t *
813lck_rw_alloc_init(
814 lck_grp_t *grp,
815 lck_attr_t *attr) {
816 lck_rw_t *lck;
817
b0d623f7
A
818 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
819 bzero(lck, sizeof(lck_rw_t));
91447636 820 lck_rw_init(lck, grp, attr);
b0d623f7
A
821 }
822
91447636
A
823 return(lck);
824}
825
826/*
827 * Routine: lck_rw_free
828 */
829void
830lck_rw_free(
831 lck_rw_t *lck,
832 lck_grp_t *grp) {
833 lck_rw_destroy(lck, grp);
834 kfree(lck, sizeof(lck_rw_t));
835}
836
837/*
838 * Routine: lck_rw_init
839 */
840void
841lck_rw_init(
842 lck_rw_t *lck,
843 lck_grp_t *grp,
0c530ab8
A
844 lck_attr_t *attr)
845{
846 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
847 attr : &LockDefaultLckAttr;
91447636 848
2d21ac55
A
849 hw_lock_byte_init(&lck->lck_rw_interlock);
850 lck->lck_rw_want_write = FALSE;
851 lck->lck_rw_want_upgrade = FALSE;
852 lck->lck_rw_shared_count = 0;
853 lck->lck_rw_can_sleep = TRUE;
b0d623f7 854 lck->lck_r_waiting = lck->lck_w_waiting = 0;
91447636 855 lck->lck_rw_tag = 0;
2d21ac55
A
856 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
857 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
91447636
A
858
859 lck_grp_reference(grp);
860 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
861}
862
863/*
864 * Routine: lck_rw_destroy
865 */
866void
867lck_rw_destroy(
868 lck_rw_t *lck,
b0d623f7
A
869 lck_grp_t *grp)
870{
91447636
A
871 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
872 return;
873 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
874 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
875 lck_grp_deallocate(grp);
876 return;
877}
878
879/*
880 * Sleep locks. These use the same data structure and algorithm
881 * as the spin locks, but the process sleeps while it is waiting
882 * for the lock. These work on uniprocessor systems.
883 */
884
885#define DECREMENTER_TIMEOUT 1000000
886
2d21ac55
A
887#define RW_LOCK_READER_EVENT(x) \
888 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
889
890#define RW_LOCK_WRITER_EVENT(x) \
891 ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
91447636
A
892
893/*
6d2010ae
A
894 * We disable interrupts while holding the RW interlock to prevent an
895 * interrupt from exacerbating hold time.
91447636
A
896 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
897 */
898static boolean_t
899lck_interlock_lock(lck_rw_t *lck)
900{
901 boolean_t istate;
902
903 istate = ml_set_interrupts_enabled(FALSE);
2d21ac55 904 hw_lock_byte_lock(&lck->lck_rw_interlock);
91447636
A
905
906 return istate;
907}
908
909static void
910lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
911{
2d21ac55 912 hw_lock_byte_unlock(&lck->lck_rw_interlock);
91447636
A
913 ml_set_interrupts_enabled(istate);
914}
915
0c530ab8
A
916/*
917 * This inline is used when busy-waiting for an rw lock.
918 * If interrupts were disabled when the lock primitive was called,
919 * we poll the IPI handler for pending tlb flushes.
920 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
921 */
922static inline void
923lck_rw_lock_pause(boolean_t interrupts_enabled)
924{
925 if (!interrupts_enabled)
926 handle_pending_TLB_flushes();
927 cpu_pause();
928}
929
b0d623f7
A
930
931/*
932 * compute the deadline to spin against when
933 * waiting for a change of state on a lck_rw_t
934 */
935static inline uint64_t
936lck_rw_deadline_for_spin(lck_rw_t *lck)
937{
938 if (lck->lck_rw_can_sleep) {
939 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
940 /*
941 * there are already threads waiting on this lock... this
942 * implies that they have spun beyond their deadlines waiting for
943 * the desired state to show up so we will not bother spinning at this time...
944 * or
945 * the current number of threads sharing this lock exceeds our capacity to run them
946 * concurrently and since all states we're going to spin for require the rw_shared_count
947 * to be at 0, we'll not bother spinning since the latency for this to happen is
948 * unpredictable...
949 */
950 return (mach_absolute_time());
951 }
952 return (mach_absolute_time() + MutexSpin);
953 } else
954 return (mach_absolute_time() + (100000LL * 1000000000LL));
955}
956
957
91447636
A
958/*
959 * Routine: lck_rw_lock_exclusive
960 */
961void
b0d623f7 962lck_rw_lock_exclusive_gen(
91447636
A
963 lck_rw_t *lck)
964{
b0d623f7
A
965 uint64_t deadline = 0;
966 int slept = 0;
967 int gotlock = 0;
968 int lockheld = 0;
969 wait_result_t res = 0;
970 boolean_t istate = -1;
91447636 971
2d21ac55 972#if CONFIG_DTRACE
b0d623f7
A
973 boolean_t dtrace_ls_initialized = FALSE;
974 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
975 uint64_t wait_interval = 0;
976 int readers_at_sleep = 0;
2d21ac55 977#endif
91447636 978
91447636 979 /*
2d21ac55 980 * Try to acquire the lck_rw_want_write bit.
91447636 981 */
b0d623f7 982 while ( !lck_rw_grab_want(lck)) {
91447636 983
2d21ac55 984#if CONFIG_DTRACE
b0d623f7
A
985 if (dtrace_ls_initialized == FALSE) {
986 dtrace_ls_initialized = TRUE;
987 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
988 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
989 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
990 if (dtrace_ls_enabled) {
991 /*
992 * Either sleeping or spinning is happening,
993 * start a timing of our delay interval now.
994 */
995 readers_at_sleep = lck->lck_rw_shared_count;
996 wait_interval = mach_absolute_time();
997 }
91447636 998 }
2d21ac55 999#endif
b0d623f7
A
1000 if (istate == -1)
1001 istate = ml_get_interrupts_enabled();
91447636 1002
b0d623f7
A
1003 deadline = lck_rw_deadline_for_spin(lck);
1004
1005 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1006
1007 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
1008 lck_rw_lock_pause(istate);
1009
1010 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
1011
1012 if (gotlock)
1013 break;
1014 /*
1015 * if we get here, the deadline has expired w/o us
1016 * being able to grab the lock exclusively
1017 * check to see if we're allowed to do a thread_block
1018 */
1019 if (lck->lck_rw_can_sleep) {
2d21ac55 1020
91447636 1021 istate = lck_interlock_lock(lck);
91447636 1022
b0d623f7 1023 if (lck->lck_rw_want_write) {
91447636 1024
b0d623f7 1025 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
91447636 1026
b0d623f7 1027 lck->lck_w_waiting = TRUE;
91447636 1028
b0d623f7
A
1029 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1030 lck_interlock_unlock(lck, istate);
91447636 1031
b0d623f7
A
1032 if (res == THREAD_WAITING) {
1033 res = thread_block(THREAD_CONTINUE_NULL);
1034 slept++;
1035 }
1036 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1037 } else {
1038 lck->lck_rw_want_write = TRUE;
1039 lck_interlock_unlock(lck, istate);
1040 break;
1041 }
1042 }
1043 }
1044 /*
1045 * Wait for readers (and upgrades) to finish...
1046 * the test for these conditions must be done simultaneously with
1047 * a check of the interlock not being held since
1048 * the rw_shared_count will drop to 0 first and then want_upgrade
1049 * will be set to 1 in the shared_to_exclusive scenario... those
1050 * adjustments are done behind the interlock and represent an
1051 * atomic change in state and must be considered as such
1052 * however, once we see the read count at 0, the want_upgrade not set
1053 * and the interlock not held, we are safe to proceed
1054 */
1055 while (lck_rw_held_read_or_upgrade(lck)) {
2d21ac55
A
1056
1057#if CONFIG_DTRACE
1058 /*
1059 * Either sleeping or spinning is happening, start
1060 * a timing of our delay interval now. If we set it
1061 * to -1 we don't have accurate data so we cannot later
1062 * decide to record a dtrace spin or sleep event.
1063 */
b0d623f7
A
1064 if (dtrace_ls_initialized == FALSE) {
1065 dtrace_ls_initialized = TRUE;
1066 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1067 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1068 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1069 if (dtrace_ls_enabled) {
1070 /*
1071 * Either sleeping or spinning is happening,
1072 * start a timing of our delay interval now.
1073 */
1074 readers_at_sleep = lck->lck_rw_shared_count;
1075 wait_interval = mach_absolute_time();
1076 }
2d21ac55
A
1077 }
1078#endif
b0d623f7
A
1079 if (istate == -1)
1080 istate = ml_get_interrupts_enabled();
1081
1082 deadline = lck_rw_deadline_for_spin(lck);
1083
1084 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1085
1086 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
1087 lck_rw_lock_pause(istate);
1088
1089 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
1090
1091 if ( !lockheld)
1092 break;
1093 /*
1094 * if we get here, the deadline has expired w/o us
1095 * being able to grab the lock exclusively
1096 * check to see if we're allowed to do a thread_block
1097 */
1098 if (lck->lck_rw_can_sleep) {
91447636 1099
91447636 1100 istate = lck_interlock_lock(lck);
91447636 1101
b0d623f7
A
1102 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
1103 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
1104
1105 lck->lck_w_waiting = TRUE;
1106
1107 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1108 lck_interlock_unlock(lck, istate);
b0d623f7
A
1109
1110 if (res == THREAD_WAITING) {
1111 res = thread_block(THREAD_CONTINUE_NULL);
1112 slept++;
1113 }
1114 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
1115 } else {
1116 lck_interlock_unlock(lck, istate);
1117 /*
1118 * must own the lock now, since we checked for
1119 * readers or upgrade owner behind the interlock
1120 * no need for a call to 'lck_rw_held_read_or_upgrade'
1121 */
1122 break;
91447636
A
1123 }
1124 }
91447636
A
1125 }
1126
2d21ac55
A
1127#if CONFIG_DTRACE
1128 /*
1129 * Decide what latencies we suffered that are Dtrace events.
1130 * If we have set wait_interval, then we either spun or slept.
1131 * At least we get out from under the interlock before we record
1132 * which is the best we can do here to minimize the impact
1133 * of the tracing.
1134 * If we have set wait_interval to -1, then dtrace was not enabled when we
1135 * started sleeping/spinning so we don't record this event.
1136 */
b0d623f7 1137 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1138 if (slept == 0) {
1139 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1140 mach_absolute_time() - wait_interval, 1);
1141 } else {
1142 /*
1143 * For the blocking case, we also record if when we blocked
1144 * it was held for read or write, and how many readers.
1145 * Notice that above we recorded this before we dropped
1146 * the interlock so the count is accurate.
1147 */
1148 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1149 mach_absolute_time() - wait_interval, 1,
1150 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1151 }
1152 }
1153 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1154#endif
91447636
A
1155}
1156
1157
1158/*
2d21ac55 1159 * Routine: lck_rw_done_gen
b0d623f7
A
1160 *
1161 * called from the assembly language wrapper...
1162 * prior_lock_state is the value in the 1st
1163 * word of the lock at the time of a successful
1164 * atomic compare and exchange with the new value...
1165 * it represents the state of the lock before we
1166 * decremented the rw_shared_count or cleared either
1167 * rw_want_upgrade or rw_want_write and
1168 * the lck_x_waiting bits... since the wrapper
1169 * routine has already changed the state atomically,
1170 * we just need to decide if we should
1171 * wake up anyone and what value to return... we do
1172 * this by examining the state of the lock before
1173 * we changed it
91447636
A
1174 */
1175lck_rw_type_t
2d21ac55 1176lck_rw_done_gen(
b0d623f7
A
1177 lck_rw_t *lck,
1178 int prior_lock_state)
91447636 1179{
b0d623f7
A
1180 lck_rw_t *fake_lck;
1181 lck_rw_type_t lock_type;
91447636
A
1182
1183 /*
b0d623f7
A
1184 * prior_lock state is a snapshot of the 1st word of the
1185 * lock in question... we'll fake up a pointer to it
1186 * and carefully not access anything beyond whats defined
1187 * in the first word of a lck_rw_t
91447636 1188 */
b0d623f7 1189 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1190
b0d623f7
A
1191 if (fake_lck->lck_rw_shared_count <= 1) {
1192 if (fake_lck->lck_w_waiting)
1193 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
91447636 1194
b0d623f7
A
1195 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1196 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1197 }
1198 if (fake_lck->lck_rw_shared_count)
1199 lock_type = LCK_RW_TYPE_SHARED;
1200 else
1201 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2d21ac55
A
1202
1203#if CONFIG_DTRACE
b0d623f7 1204 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2d21ac55
A
1205#endif
1206
b0d623f7 1207 return(lock_type);
91447636
A
1208}
1209
1210
91447636
A
1211/*
1212 * Routine: lck_rw_unlock
1213 */
1214void
1215lck_rw_unlock(
1216 lck_rw_t *lck,
1217 lck_rw_type_t lck_rw_type)
1218{
1219 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1220 lck_rw_unlock_shared(lck);
1221 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1222 lck_rw_unlock_exclusive(lck);
1223 else
1224 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1225}
1226
1227
1228/*
1229 * Routine: lck_rw_unlock_shared
1230 */
1231void
1232lck_rw_unlock_shared(
1233 lck_rw_t *lck)
1234{
1235 lck_rw_type_t ret;
1236
1237 ret = lck_rw_done(lck);
1238
1239 if (ret != LCK_RW_TYPE_SHARED)
1240 panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
1241}
1242
1243
1244/*
1245 * Routine: lck_rw_unlock_exclusive
1246 */
1247void
1248lck_rw_unlock_exclusive(
1249 lck_rw_t *lck)
1250{
1251 lck_rw_type_t ret;
1252
1253 ret = lck_rw_done(lck);
1254
1255 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1256 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1257}
1258
1259
1260/*
1261 * Routine: lck_rw_lock
1262 */
1263void
1264lck_rw_lock(
1265 lck_rw_t *lck,
1266 lck_rw_type_t lck_rw_type)
1267{
1268 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1269 lck_rw_lock_shared(lck);
1270 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1271 lck_rw_lock_exclusive(lck);
1272 else
1273 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1274}
1275
1276
1277/*
2d21ac55 1278 * Routine: lck_rw_lock_shared_gen
b0d623f7
A
1279 * Function:
1280 * assembly fast path code has determined that this lock
1281 * is held exclusively... this is where we spin/block
1282 * until we can acquire the lock in the shared mode
91447636
A
1283 */
1284void
2d21ac55 1285lck_rw_lock_shared_gen(
91447636
A
1286 lck_rw_t *lck)
1287{
b0d623f7
A
1288 uint64_t deadline = 0;
1289 int gotlock = 0;
1290 int slept = 0;
1291 wait_result_t res = 0;
1292 boolean_t istate = -1;
1293
2d21ac55
A
1294#if CONFIG_DTRACE
1295 uint64_t wait_interval = 0;
b0d623f7
A
1296 int readers_at_sleep = 0;
1297 boolean_t dtrace_ls_initialized = FALSE;
1298 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2d21ac55 1299#endif
91447636 1300
b0d623f7
A
1301 while ( !lck_rw_grab_shared(lck)) {
1302
2d21ac55 1303#if CONFIG_DTRACE
b0d623f7
A
1304 if (dtrace_ls_initialized == FALSE) {
1305 dtrace_ls_initialized = TRUE;
1306 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1307 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1308 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1309 if (dtrace_ls_enabled) {
1310 /*
1311 * Either sleeping or spinning is happening,
1312 * start a timing of our delay interval now.
1313 */
1314 readers_at_sleep = lck->lck_rw_shared_count;
1315 wait_interval = mach_absolute_time();
1316 }
1317 }
2d21ac55 1318#endif
b0d623f7
A
1319 if (istate == -1)
1320 istate = ml_get_interrupts_enabled();
91447636 1321
b0d623f7 1322 deadline = lck_rw_deadline_for_spin(lck);
0c530ab8 1323
b0d623f7
A
1324 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1325 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
91447636 1326
b0d623f7
A
1327 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
1328 lck_rw_lock_pause(istate);
1329
1330 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1331 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
1332
1333 if (gotlock)
1334 break;
1335 /*
1336 * if we get here, the deadline has expired w/o us
1337 * being able to grab the lock for read
1338 * check to see if we're allowed to do a thread_block
1339 */
1340 if (lck->lck_rw_can_sleep) {
91447636 1341
91447636 1342 istate = lck_interlock_lock(lck);
91447636 1343
b0d623f7
A
1344 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1345 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1346
1347 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1348 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1349
1350 lck->lck_r_waiting = TRUE;
1351
1352 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
91447636 1353 lck_interlock_unlock(lck, istate);
b0d623f7
A
1354
1355 if (res == THREAD_WAITING) {
1356 res = thread_block(THREAD_CONTINUE_NULL);
1357 slept++;
1358 }
1359 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1360 (int)lck, res, slept, 0, 0);
1361 } else {
1362 lck->lck_rw_shared_count++;
1363 lck_interlock_unlock(lck, istate);
1364 break;
91447636
A
1365 }
1366 }
91447636
A
1367 }
1368
2d21ac55 1369#if CONFIG_DTRACE
b0d623f7 1370 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1371 if (slept == 0) {
1372 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1373 } else {
1374 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1375 mach_absolute_time() - wait_interval, 0,
1376 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1377 }
1378 }
1379 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1380#endif
91447636
A
1381}
1382
1383
1384/*
b0d623f7 1385 * Routine: lck_rw_lock_shared_to_exclusive_failure
91447636 1386 * Function:
b0d623f7
A
1387 * assembly fast path code has already dropped our read
1388 * count and determined that someone else owns 'lck_rw_want_upgrade'
1389 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1390 * all we need to do here is determine if a wakeup is needed
91447636 1391 */
91447636 1392boolean_t
b0d623f7
A
1393lck_rw_lock_shared_to_exclusive_failure(
1394 lck_rw_t *lck,
1395 int prior_lock_state)
91447636 1396{
b0d623f7 1397 lck_rw_t *fake_lck;
91447636 1398
b0d623f7
A
1399 /*
1400 * prior_lock state is a snapshot of the 1st word of the
1401 * lock in question... we'll fake up a pointer to it
1402 * and carefully not access anything beyond whats defined
1403 * in the first word of a lck_rw_t
1404 */
1405 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1406
b0d623f7 1407 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
91447636
A
1408 /*
1409 * Someone else has requested upgrade.
b0d623f7
A
1410 * Since we've released the read lock, wake
1411 * him up if he's blocked waiting
91447636 1412 */
b0d623f7
A
1413 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1414 }
1415 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1416 (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
91447636 1417
b0d623f7
A
1418 return (FALSE);
1419}
91447636 1420
91447636 1421
b0d623f7
A
1422/*
1423 * Routine: lck_rw_lock_shared_to_exclusive_failure
1424 * Function:
1425 * assembly fast path code has already dropped our read
1426 * count and successfully acquired 'lck_rw_want_upgrade'
1427 * we just need to wait for the rest of the readers to drain
1428 * and then we can return as the exclusive holder of this lock
1429 */
1430boolean_t
1431lck_rw_lock_shared_to_exclusive_success(
1432 lck_rw_t *lck)
1433{
1434 uint64_t deadline = 0;
1435 int slept = 0;
1436 int still_shared = 0;
1437 wait_result_t res;
1438 boolean_t istate = -1;
91447636 1439
b0d623f7
A
1440#if CONFIG_DTRACE
1441 uint64_t wait_interval = 0;
1442 int readers_at_sleep = 0;
1443 boolean_t dtrace_ls_initialized = FALSE;
1444 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1445#endif
91447636 1446
2d21ac55 1447 while (lck->lck_rw_shared_count != 0) {
b0d623f7 1448
2d21ac55 1449#if CONFIG_DTRACE
b0d623f7
A
1450 if (dtrace_ls_initialized == FALSE) {
1451 dtrace_ls_initialized = TRUE;
1452 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1453 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1454 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1455 if (dtrace_ls_enabled) {
1456 /*
1457 * Either sleeping or spinning is happening,
1458 * start a timing of our delay interval now.
1459 */
1460 readers_at_sleep = lck->lck_rw_shared_count;
1461 wait_interval = mach_absolute_time();
1462 }
2d21ac55
A
1463 }
1464#endif
b0d623f7
A
1465 if (istate == -1)
1466 istate = ml_get_interrupts_enabled();
1467
1468 deadline = lck_rw_deadline_for_spin(lck);
1469
1470 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1471 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1472
1473 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
1474 lck_rw_lock_pause(istate);
1475
1476 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1477 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1478
1479 if ( !still_shared)
1480 break;
1481 /*
1482 * if we get here, the deadline has expired w/o
1483 * the rw_shared_count having drained to 0
1484 * check to see if we're allowed to do a thread_block
1485 */
1486 if (lck->lck_rw_can_sleep) {
1487
91447636 1488 istate = lck_interlock_lock(lck);
b0d623f7
A
1489
1490 if (lck->lck_rw_shared_count != 0) {
1491 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1492 (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
1493
1494 lck->lck_w_waiting = TRUE;
91447636 1495
b0d623f7 1496 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
91447636 1497 lck_interlock_unlock(lck, istate);
b0d623f7
A
1498
1499 if (res == THREAD_WAITING) {
1500 res = thread_block(THREAD_CONTINUE_NULL);
1501 slept++;
1502 }
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1504 (int)lck, res, slept, 0, 0);
1505 } else {
1506 lck_interlock_unlock(lck, istate);
1507 break;
91447636
A
1508 }
1509 }
91447636 1510 }
2d21ac55
A
1511#if CONFIG_DTRACE
1512 /*
1513 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1514 */
b0d623f7 1515 if (dtrace_ls_enabled == TRUE) {
2d21ac55
A
1516 if (slept == 0) {
1517 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1518 } else {
1519 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1520 mach_absolute_time() - wait_interval, 1,
1521 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1522 }
1523 }
2d21ac55
A
1524 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1525#endif
1526 return (TRUE);
91447636
A
1527}
1528
b0d623f7 1529
91447636
A
1530/*
1531 * Routine: lck_rw_lock_exclusive_to_shared
b0d623f7
A
1532 * Function:
1533 * assembly fast path has already dropped
1534 * our exclusive state and bumped lck_rw_shared_count
1535 * all we need to do here is determine if anyone
1536 * needs to be awakened.
91447636
A
1537 */
1538void
b0d623f7
A
1539lck_rw_lock_exclusive_to_shared_gen(
1540 lck_rw_t *lck,
1541 int prior_lock_state)
91447636 1542{
b0d623f7 1543 lck_rw_t *fake_lck;
91447636 1544
b0d623f7
A
1545 /*
1546 * prior_lock state is a snapshot of the 1st word of the
1547 * lock in question... we'll fake up a pointer to it
1548 * and carefully not access anything beyond whats defined
1549 * in the first word of a lck_rw_t
1550 */
1551 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1552
b0d623f7
A
1553 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1554 (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
91447636 1555
b0d623f7
A
1556 /*
1557 * don't wake up anyone waiting to take the lock exclusively
1558 * since we hold a read count... when the read count drops to 0,
1559 * the writers will be woken.
1560 *
1561 * wake up any waiting readers if we don't have any writers waiting,
1562 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1563 */
1564 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
2d21ac55 1565 thread_wakeup(RW_LOCK_READER_EVENT(lck));
91447636
A
1566
1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
2d21ac55 1568 (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
91447636 1569
2d21ac55
A
1570#if CONFIG_DTRACE
1571 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1572#endif
91447636
A
1573}
1574
1575
1576/*
1577 * Routine: lck_rw_try_lock
1578 */
1579boolean_t
1580lck_rw_try_lock(
1581 lck_rw_t *lck,
1582 lck_rw_type_t lck_rw_type)
1583{
1584 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1585 return(lck_rw_try_lock_shared(lck));
1586 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1587 return(lck_rw_try_lock_exclusive(lck));
1588 else
1589 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1590 return(FALSE);
1591}
1592
91447636 1593
2d21ac55
A
1594void
1595lck_rw_assert(
1596 lck_rw_t *lck,
1597 unsigned int type)
1598{
1599 switch (type) {
1600 case LCK_RW_ASSERT_SHARED:
1601 if (lck->lck_rw_shared_count != 0) {
1602 return;
1603 }
1604 break;
1605 case LCK_RW_ASSERT_EXCLUSIVE:
1606 if ((lck->lck_rw_want_write ||
1607 lck->lck_rw_want_upgrade) &&
1608 lck->lck_rw_shared_count == 0) {
1609 return;
1610 }
1611 break;
1612 case LCK_RW_ASSERT_HELD:
1613 if (lck->lck_rw_want_write ||
1614 lck->lck_rw_want_upgrade ||
1615 lck->lck_rw_shared_count != 0) {
1616 return;
1617 }
1618 break;
1619 default:
1620 break;
1621 }
1622
b0d623f7 1623 panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
2d21ac55
A
1624}
1625
6d2010ae
A
1626#ifdef MUTEX_ZONE
1627extern zone_t lck_mtx_zone;
1628#endif
91447636
A
1629/*
1630 * Routine: lck_mtx_alloc_init
1631 */
1632lck_mtx_t *
1633lck_mtx_alloc_init(
1634 lck_grp_t *grp,
1635 lck_attr_t *attr)
1636{
1637 lck_mtx_t *lck;
6d2010ae
A
1638#ifdef MUTEX_ZONE
1639 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
1640 lck_mtx_init(lck, grp, attr);
1641#else
91447636
A
1642 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
1643 lck_mtx_init(lck, grp, attr);
6d2010ae 1644#endif
91447636
A
1645 return(lck);
1646}
1647
1648/*
1649 * Routine: lck_mtx_free
1650 */
1651void
1652lck_mtx_free(
1653 lck_mtx_t *lck,
1654 lck_grp_t *grp)
1655{
1656 lck_mtx_destroy(lck, grp);
6d2010ae
A
1657#ifdef MUTEX_ZONE
1658 zfree(lck_mtx_zone, lck);
1659#else
91447636 1660 kfree(lck, sizeof(lck_mtx_t));
6d2010ae 1661#endif
91447636
A
1662}
1663
1664/*
1665 * Routine: lck_mtx_ext_init
1666 */
1667static void
1668lck_mtx_ext_init(
1669 lck_mtx_ext_t *lck,
1670 lck_grp_t *grp,
1671 lck_attr_t *attr)
1672{
2d21ac55 1673 bzero((void *)lck, sizeof(lck_mtx_ext_t));
91447636
A
1674
1675 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636
A
1676 lck->lck_mtx_deb.type = MUTEX_TAG;
1677 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1678 }
1679
1680 lck->lck_mtx_grp = grp;
2d21ac55
A
1681
1682 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
6d2010ae 1683 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
b0d623f7 1684
6d2010ae
A
1685 lck->lck_mtx.lck_mtx_is_ext = 1;
1686#if defined(__x86_64__)
1687 lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1688#endif
91447636
A
1689}
1690
1691/*
1692 * Routine: lck_mtx_init
1693 */
1694void
1695lck_mtx_init(
1696 lck_mtx_t *lck,
1697 lck_grp_t *grp,
1698 lck_attr_t *attr)
1699{
1700 lck_mtx_ext_t *lck_ext;
2d21ac55
A
1701 lck_attr_t *lck_attr;
1702
1703 if (attr != LCK_ATTR_NULL)
1704 lck_attr = attr;
1705 else
1706 lck_attr = &LockDefaultLckAttr;
91447636 1707
2d21ac55 1708 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636 1709 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2d21ac55 1710 lck_mtx_ext_init(lck_ext, grp, lck_attr);
91447636
A
1711 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1712 lck->lck_mtx_ptr = lck_ext;
1713 }
1714 } else {
b0d623f7 1715 lck->lck_mtx_owner = 0;
6d2010ae 1716 lck->lck_mtx_state = 0;
91447636 1717 }
6d2010ae
A
1718#if defined(__x86_64__)
1719 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1720#endif
91447636
A
1721 lck_grp_reference(grp);
1722 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1723}
1724
2d21ac55
A
1725/*
1726 * Routine: lck_mtx_init_ext
1727 */
1728void
1729lck_mtx_init_ext(
1730 lck_mtx_t *lck,
1731 lck_mtx_ext_t *lck_ext,
1732 lck_grp_t *grp,
1733 lck_attr_t *attr)
1734{
1735 lck_attr_t *lck_attr;
1736
1737 if (attr != LCK_ATTR_NULL)
1738 lck_attr = attr;
1739 else
1740 lck_attr = &LockDefaultLckAttr;
1741
1742 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1743 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1744 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1745 lck->lck_mtx_ptr = lck_ext;
1746 } else {
b0d623f7 1747 lck->lck_mtx_owner = 0;
6d2010ae 1748 lck->lck_mtx_state = 0;
2d21ac55 1749 }
6d2010ae
A
1750#if defined(__x86_64__)
1751 lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
1752#endif
1753
2d21ac55
A
1754 lck_grp_reference(grp);
1755 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1756}
1757
91447636
A
1758/*
1759 * Routine: lck_mtx_destroy
1760 */
1761void
1762lck_mtx_destroy(
1763 lck_mtx_t *lck,
1764 lck_grp_t *grp)
1765{
1766 boolean_t lck_is_indirect;
1767
1768 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
1769 return;
1770 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
b0d623f7
A
1771
1772 lck_mtx_lock_mark_destroyed(lck);
1773
91447636
A
1774 if (lck_is_indirect)
1775 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
1776 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1777 lck_grp_deallocate(grp);
1778 return;
1779}
1780
b0d623f7
A
1781
1782#define LCK_MTX_LCK_WAIT_CODE 0x20
1783#define LCK_MTX_LCK_WAKEUP_CODE 0x21
1784#define LCK_MTX_LCK_SPIN_CODE 0x22
1785#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1786#define LCK_MTX_LCK_DEMOTE_CODE 0x24
1787
1788
1789/*
1790 * Routine: lck_mtx_unlock_wakeup_x86
1791 *
6d2010ae
A
1792 * Invoked on unlock when there is
1793 * contention (i.e. the assembly routine sees that
1794 * that mutex->lck_mtx_waiters != 0 or
1795 * that mutex->lck_mtx_promoted != 0...
b0d623f7 1796 *
6d2010ae 1797 * neither the mutex or interlock is held
b0d623f7
A
1798 */
1799void
1800lck_mtx_unlock_wakeup_x86 (
1801 lck_mtx_t *mutex,
6d2010ae 1802 int prior_lock_state)
b0d623f7 1803{
6d2010ae
A
1804 lck_mtx_t fake_lck;
1805
1806 /*
1807 * prior_lock state is a snapshot of the 2nd word of the
1808 * lock in question... we'll fake up a lock with the bits
1809 * copied into place and carefully not access anything
1810 * beyond whats defined in the second word of a lck_mtx_t
1811 */
1812 fake_lck.lck_mtx_state = prior_lock_state;
1813
1814 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1815 mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
b0d623f7 1816
6d2010ae 1817 if (__probable(fake_lck.lck_mtx_waiters)) {
b0d623f7 1818
6d2010ae
A
1819 if (fake_lck.lck_mtx_waiters > 1)
1820 thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
1821 else
1822 thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
1823 }
b0d623f7 1824
6d2010ae 1825 if (__improbable(fake_lck.lck_mtx_promoted)) {
b0d623f7
A
1826 thread_t thread = current_thread();
1827
1828
6d2010ae
A
1829 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
1830 thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
b0d623f7
A
1831
1832 if (thread->promotions > 0) {
1833 spl_t s = splsched();
1834
1835 thread_lock(thread);
1836
6d2010ae 1837 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
b0d623f7 1838
6d2010ae 1839 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
b0d623f7 1840
6d2010ae
A
1841 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1842 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
1843 thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
b0d623f7
A
1844
1845 set_sched_pri(thread, DEPRESSPRI);
1846 }
1847 else {
1848 if (thread->priority < thread->sched_pri) {
6d2010ae
A
1849 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
1850 thread->sched_pri, thread->priority, 0, mutex, 0);
b0d623f7 1851
6d2010ae 1852 SCHED(compute_priority)(thread, FALSE);
b0d623f7
A
1853 }
1854 }
1855 }
1856 thread_unlock(thread);
1857 splx(s);
1858 }
1859 }
6d2010ae
A
1860 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1861 mutex, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7
A
1862}
1863
1864
1865/*
1866 * Routine: lck_mtx_lock_acquire_x86
1867 *
1868 * Invoked on acquiring the mutex when there is
6d2010ae
A
1869 * contention (i.e. the assembly routine sees that
1870 * that mutex->lck_mtx_waiters != 0 or
1871 * thread->was_promoted_on_wakeup != 0)...
1872 *
1873 * mutex is owned... interlock is held... preemption is disabled
b0d623f7
A
1874 */
1875void
1876lck_mtx_lock_acquire_x86(
1877 lck_mtx_t *mutex)
1878{
6d2010ae 1879 thread_t thread;
b0d623f7 1880 integer_t priority;
6d2010ae 1881 spl_t s;
b0d623f7 1882
6d2010ae
A
1883 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1884 mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7 1885
6d2010ae
A
1886 if (mutex->lck_mtx_waiters)
1887 priority = mutex->lck_mtx_pri;
1888 else
1889 priority = 0;
b0d623f7 1890
6d2010ae 1891 thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */
b0d623f7 1892
6d2010ae 1893 if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
b0d623f7 1894
6d2010ae
A
1895 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
1896 thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0);
b0d623f7 1897
6d2010ae
A
1898 s = splsched();
1899 thread_lock(thread);
b0d623f7 1900
6d2010ae
A
1901 if (thread->sched_pri < priority)
1902 set_sched_pri(thread, priority);
b0d623f7 1903
6d2010ae
A
1904 if (mutex->lck_mtx_promoted == 0) {
1905 mutex->lck_mtx_promoted = 1;
1906
b0d623f7 1907 thread->promotions++;
6d2010ae 1908 thread->sched_flags |= TH_SFLAG_PROMOTED;
b0d623f7 1909 }
6d2010ae
A
1910 thread->was_promoted_on_wakeup = 0;
1911
1912 thread_unlock(thread);
1913 splx(s);
b0d623f7 1914 }
6d2010ae
A
1915 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1916 mutex, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7
A
1917}
1918
1919
1920
91447636 1921/*
b0d623f7 1922 * Routine: lck_mtx_lock_spinwait_x86
0c530ab8
A
1923 *
1924 * Invoked trying to acquire a mutex when there is contention but
1925 * the holder is running on another processor. We spin for up to a maximum
1926 * time waiting for the lock to be released.
1927 *
1928 * Called with the interlock unlocked.
6d2010ae
A
1929 * returns 0 if mutex acquired
1930 * returns 1 if we spun
1931 * returns 2 if we didn't spin due to the holder not running
0c530ab8 1932 */
b0d623f7
A
1933int
1934lck_mtx_lock_spinwait_x86(
1935 lck_mtx_t *mutex)
0c530ab8 1936{
b0d623f7
A
1937 thread_t holder;
1938 uint64_t deadline;
1939 int retval = 1;
1940 int loopcount = 0;
0c530ab8 1941
6d2010ae
A
1942
1943 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1944 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
0c530ab8
A
1945
1946 deadline = mach_absolute_time() + MutexSpin;
b0d623f7 1947
0c530ab8
A
1948 /*
1949 * Spin while:
1950 * - mutex is locked, and
b0d623f7 1951 * - its locked as a spin lock, and
0c530ab8 1952 * - owner is running on another processor, and
2d21ac55 1953 * - owner (processor) is not idling, and
0c530ab8
A
1954 * - we haven't spun for long enough.
1955 */
b0d623f7 1956 do {
6d2010ae 1957 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
b0d623f7
A
1958 retval = 0;
1959 break;
2d21ac55 1960 }
b0d623f7
A
1961 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
1962
1963 if ( !(holder->machine.specFlags & OnProc) ||
1964 (holder->state & TH_IDLE)) {
1965 if (loopcount == 0)
1966 retval = 2;
1967 break;
1968 }
1969 }
1970 cpu_pause();
1971
1972 loopcount++;
1973
1974 } while (mach_absolute_time() < deadline);
1975
1976
2d21ac55
A
1977#if CONFIG_DTRACE
1978 /*
1979 * We've already kept a count via deadline of how long we spun.
1980 * If dtrace is active, then we compute backwards to decide how
1981 * long we spun.
1982 *
1983 * Note that we record a different probe id depending on whether
1984 * this is a direct or indirect mutex. This allows us to
1985 * penalize only lock groups that have debug/stats enabled
1986 * with dtrace processing if desired.
1987 */
6d2010ae 1988 if (__probable(mutex->lck_mtx_is_ext == 0)) {
b0d623f7 1989 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
2d21ac55
A
1990 mach_absolute_time() - (deadline - MutexSpin));
1991 } else {
b0d623f7 1992 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
2d21ac55
A
1993 mach_absolute_time() - (deadline - MutexSpin));
1994 }
1995 /* The lockstat acquire event is recorded by the assembly code beneath us. */
1996#endif
b0d623f7 1997
6d2010ae
A
1998 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
1999 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
b0d623f7
A
2000
2001 return retval;
0c530ab8
A
2002}
2003
b0d623f7
A
2004
2005
0c530ab8 2006/*
b0d623f7
A
2007 * Routine: lck_mtx_lock_wait_x86
2008 *
2009 * Invoked in order to wait on contention.
2010 *
2011 * Called with the interlock locked and
6d2010ae
A
2012 * preemption disabled...
2013 * returns it unlocked and with preemption enabled
0c530ab8
A
2014 */
2015void
b0d623f7
A
2016lck_mtx_lock_wait_x86 (
2017 lck_mtx_t *mutex)
0c530ab8 2018{
b0d623f7
A
2019 thread_t self = current_thread();
2020 thread_t holder;
2021 integer_t priority;
b0d623f7
A
2022 spl_t s;
2023#if CONFIG_DTRACE
2024 uint64_t sleep_start = 0;
2025
2026 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
2027 sleep_start = mach_absolute_time();
2028 }
2029#endif
6d2010ae
A
2030 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2031 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7
A
2032
2033 priority = self->sched_pri;
2034
2035 if (priority < self->priority)
2036 priority = self->priority;
2037 if (priority < BASEPRI_DEFAULT)
2038 priority = BASEPRI_DEFAULT;
2039
6d2010ae 2040 if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
b0d623f7 2041 mutex->lck_mtx_pri = priority;
6d2010ae 2042 mutex->lck_mtx_waiters++;
b0d623f7 2043
6d2010ae
A
2044 if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
2045 holder->sched_pri < mutex->lck_mtx_pri ) {
b0d623f7
A
2046
2047 s = splsched();
2048 thread_lock(holder);
2049
6d2010ae 2050 if (holder->sched_pri < mutex->lck_mtx_pri) {
b0d623f7
A
2051 KERNEL_DEBUG_CONSTANT(
2052 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
6d2010ae 2053 holder->sched_pri, priority, thread_tid(holder), mutex, 0);
b0d623f7
A
2054
2055 set_sched_pri(holder, priority);
2056
2057 if (mutex->lck_mtx_promoted == 0) {
2058 holder->promotions++;
6d2010ae
A
2059 holder->sched_flags |= TH_SFLAG_PROMOTED;
2060
b0d623f7
A
2061 mutex->lck_mtx_promoted = 1;
2062 }
2063 }
2064 thread_unlock(holder);
2065 splx(s);
2066 }
b0d623f7
A
2067 assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
2068
2069 lck_mtx_ilk_unlock(mutex);
2070
2071 thread_block(THREAD_CONTINUE_NULL);
2072
6d2010ae
A
2073 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2074 mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
b0d623f7
A
2075
2076#if CONFIG_DTRACE
2077 /*
2078 * Record the Dtrace lockstat probe for blocking, block time
2079 * measured from when we were entered.
2080 */
2081 if (sleep_start) {
6d2010ae 2082 if (mutex->lck_mtx_is_ext == 0) {
b0d623f7
A
2083 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2084 mach_absolute_time() - sleep_start);
2085 } else {
2086 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2087 mach_absolute_time() - sleep_start);
2088 }
2089 }
2090#endif
0c530ab8 2091}